Improve parsing performance

This commit is contained in:
Danny Spina 2023-06-20 11:48:11 +02:00
parent 49b9c10ae6
commit fd829e4cf9
3 changed files with 103 additions and 760 deletions

View File

@ -1,11 +1,11 @@
import express from "express";
import { Readability, isProbablyReaderable } from "@mozilla/readability";
import { JSDOM } from "jsdom";
import got from "got";
import path from "path";
import { fileURLToPath } from "url";
import fetch from "node-fetch";
import "dotenv/config";
import { parseHTML } from "linkedom";
const app = express();
const port = 8888;
@ -78,31 +78,31 @@ app.get("/", async (req, res) => {
});
});
app.get("/blazed", (req, res) => {
app.get("/blazed", async (req, res) => {
const pageToBlaze = req.query.url as string;
console.time("blaze");
got(pageToBlaze)
.then((response) => {
const dom = new JSDOM(response.body);
try {
const response = await got(pageToBlaze);
const { document } = parseHTML(response.body);
if (!isProbablyReaderable(dom.window.document)) {
res.sendFile(path.join(__dirname + "/dist/not_blazed.html"));
return;
if (!isProbablyReaderable(document)) {
return res.sendFile(path.join(__dirname, "/dist/not_blazed.html"));
}
let reader = new Readability(dom.window.document);
let article = reader.parse();
const reader = new Readability(document);
const article = reader.parse();
if (!article) {
res.send("Something went wrong");
return;
return res.send("Something went wrong");
}
res.send(article.content);
})
.catch((err) => {
} catch (err) {
console.log(err);
});
} finally {
console.timeEnd("blaze");
}
});
app.get("/info", (_, res) => {

821
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -19,7 +19,7 @@
"fetch": "^1.1.0",
"got": "^13.0.0",
"html-minifier": "^4.0.0",
"jsdom": "^22.1.0",
"linkedom": "^0.14.26",
"node-fetch": "^3.3.1",
"node-html-parser": "^6.1.5"
},