import express from "express"; import { Readability, isProbablyReaderable } from "@mozilla/readability"; import got from "got"; import path from "path"; import { fileURLToPath } from "url"; import "dotenv/config"; import { parseHTML, parseJSON } from "linkedom"; // @ts-ignore import XHR2 from "xhr2"; const XMLHttpRequest = XHR2.XMLHttpRequest; import { minify } from "html-minifier"; import { blazeFunctionality, blazeUrl, highlightBlazedLinks, injectBlazeToPageLinks, } from "./utils.js"; import etag from "etag"; import compression from "compression"; import fs from "fs"; const app = express(); const port = 8888; const minifierOptions = { collapseWhitespace: true, removeComments: true, removeOptionalTags: true, removeRedundantAttributes: true, removeScriptTypeAttributes: true, removeTagWhitespace: true, useShortDoctype: true, minifyCSS: true, }; // @ts-ignore const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Middlewares app.use(compression()); app.use((req, res, next) => { res.set("Cache-Control", "public, max-age=60000"); res.set("Service-Worker-Allowed", "/"); next(); }); // Routes app.get("/", async (req, res) => { const searchEngine = "https://api.search.brave.com/res/v1/web/search"; const query = req.query.q as string; if (!query) { return res.sendFile(path.join(__dirname, "/index.html")); } const key = process.env.CYCLIC_BRAVE_KEY; if (!key) { throw new Error("No brave key found"); } try { const xhr = new XMLHttpRequest(); const formattedQuery = encodeURIComponent(query); xhr.open( "GET", `${searchEngine}?q=${formattedQuery}&safesearch=moderate`, true ); xhr.setRequestHeader("Accept", "*/*"); xhr.setRequestHeader("X-Subscription-Token", key); xhr.onreadystatechange = () => { if (xhr.readyState !== 4) { return; } if (xhr.status !== 200) { console.error("XHR request failed:", xhr.status, xhr.statusText); return; } const data = JSON.parse(xhr.responseText); // @ts-ignore const results = data.web.results.map( (result: any) => `

${result.title}

${result.meta_url.hostname}

${result.description}


` ); const html = ` Blaze - ${query}

${results.join("")} `; try { const minifiedSerp = minify(html, minifierOptions); res.set("X-Blaze-Etag", etag(minifiedSerp)); res.send(minifiedSerp); } catch (e) { console.log("Error during html minifier:", e); res.sendFile(path.join(__dirname, "/not_blazed.html")); } }; xhr.send(); } catch (err) { console.error(err); } }); app.get("/blazed", async (req, res) => { const pageToBlaze = req.query.url as string; try { const xhr = new XMLHttpRequest(); xhr.open("GET", pageToBlaze, true); xhr.setRequestHeader("Accept", "text/html"); xhr.onreadystatechange = async () => { if (xhr.readyState !== 4) { return; } if (xhr.status === 404) { res.sendFile(path.join(__dirname, "/404.html")); return; } if (xhr.status !== 200) { console.error("XHR request failed:", xhr.status, xhr.statusText); res.send( minify( ` Blaze - error

Blaze could not load the page :(

Reason: ${xhr.status} ${xhr.statusText}



If you want (it would be great!) you can report this problem, writing the requested URL and the reason, at support.blaze@dannyspina.com


Go back
`, minifierOptions ) ); return; } const response = xhr.responseText; const { document } = parseHTML(response); if (!isProbablyReaderable(document)) { // TODO: still a lot of bugs, must be refined to handle some cases, like // cookie banners, etc. document.querySelectorAll("link").forEach((l) => { l.remove(); }); document.querySelectorAll("style").forEach((s) => { s.remove(); }); document.querySelectorAll("script").forEach((s) => { s.remove(); }); document.querySelectorAll("img").forEach((i) => { i.remove(); }); document.querySelectorAll("iframe").forEach((f) => { f.remove(); }); const blazeDisclaimer = document.createElement("div"); blazeDisclaimer.style.width = "100dvw"; blazeDisclaimer.style.border = "1px solid red"; blazeDisclaimer.style.padding = "1rem"; blazeDisclaimer.style.textAlign = "center"; blazeDisclaimer.innerHTML = `

BLAZE INFO

The page you are seeing could not be correctly blazed due to these webpage characteristics. Blaze served anyway a lightweight version of the page. Keep in mind that this kind of pages can be hard or even impossible to use, read or understand.

`; const referenceElement = document.body.firstChild; document.body.insertBefore(blazeDisclaimer, referenceElement); const blazedPage = minify(document.toString(), minifierOptions); return res.send(blazedPage); } //TODO: find if there are more performant ways to remove images or evaluate if is the case to remove images document.querySelectorAll("img").forEach((img) => img.remove()); const reader = new Readability(document); const article = reader.parse(); if (!article) { return res.send("Something went wrong"); } const blazedPage = ` ${article.content} `; const minifiedBlazedPage = minify(blazedPage, minifierOptions); res.send(minifiedBlazedPage); }; xhr.send(); } catch (err) { console.log(err); } }); app.get("/info", (_, res) => { let Etag; fs.readFile(path.join(__dirname + "/info.html"), "utf8", (err, data) => { if (err) { console.error(err); return; } Etag = etag(data); res.set("X-Blaze-Etag", Etag); res.sendFile(path.join(__dirname + "/info.html")); }); }); app.get("/ooops", (_, res) => { res.sendFile(path.join(__dirname + "/info_not_blazed.html")); }); app.get("/favicon.svg", (_, res) => { res.sendFile(path.join(__dirname + "/favicon.svg")); }); app.get("/service-worker.js", (_, res) => { res.sendFile(path.join(__dirname + "/service-worker.js")); }); app.get("/styles/serp.css", (_, res) => { res.sendFile(path.join(__dirname + "/styles/serp.css")); }); app.listen(port, () => { console.log(`Got request`); });