Improve parsing performance

This commit is contained in:
Danny Spina 2023-06-20 11:48:11 +02:00
parent 49b9c10ae6
commit fd829e4cf9
3 changed files with 103 additions and 760 deletions

View File

@ -1,11 +1,11 @@
import express from "express"; import express from "express";
import { Readability, isProbablyReaderable } from "@mozilla/readability"; import { Readability, isProbablyReaderable } from "@mozilla/readability";
import { JSDOM } from "jsdom";
import got from "got"; import got from "got";
import path from "path"; import path from "path";
import { fileURLToPath } from "url"; import { fileURLToPath } from "url";
import fetch from "node-fetch"; import fetch from "node-fetch";
import "dotenv/config"; import "dotenv/config";
import { parseHTML } from "linkedom";
const app = express(); const app = express();
const port = 8888; const port = 8888;
@ -78,31 +78,31 @@ app.get("/", async (req, res) => {
}); });
}); });
app.get("/blazed", (req, res) => { app.get("/blazed", async (req, res) => {
const pageToBlaze = req.query.url as string; const pageToBlaze = req.query.url as string;
console.time("blaze");
got(pageToBlaze) try {
.then((response) => { const response = await got(pageToBlaze);
const dom = new JSDOM(response.body); const { document } = parseHTML(response.body);
if (!isProbablyReaderable(dom.window.document)) { if (!isProbablyReaderable(document)) {
res.sendFile(path.join(__dirname + "/dist/not_blazed.html")); return res.sendFile(path.join(__dirname, "/dist/not_blazed.html"));
return;
} }
let reader = new Readability(dom.window.document); const reader = new Readability(document);
let article = reader.parse(); const article = reader.parse();
if (!article) { if (!article) {
res.send("Something went wrong"); return res.send("Something went wrong");
return;
} }
res.send(article.content); res.send(article.content);
}) } catch (err) {
.catch((err) => {
console.log(err); console.log(err);
}); } finally {
console.timeEnd("blaze");
}
}); });
app.get("/info", (_, res) => { app.get("/info", (_, res) => {

821
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -19,7 +19,7 @@
"fetch": "^1.1.0", "fetch": "^1.1.0",
"got": "^13.0.0", "got": "^13.0.0",
"html-minifier": "^4.0.0", "html-minifier": "^4.0.0",
"jsdom": "^22.1.0", "linkedom": "^0.14.26",
"node-fetch": "^3.3.1", "node-fetch": "^3.3.1",
"node-html-parser": "^6.1.5" "node-html-parser": "^6.1.5"
}, },