Remove images before parsing

This commit is contained in:
Danny Spina 2023-06-20 12:36:47 +02:00
parent 575f529f66
commit 2d87c057e6

View File

@ -98,11 +98,13 @@ app.get("/blazed", async (req, res) => {
try { try {
const response = await got(pageToBlaze); const response = await got(pageToBlaze);
const { document } = parseHTML(response.body); const { document } = parseHTML(response.body);
if (!isProbablyReaderable(document)) { if (!isProbablyReaderable(document)) {
return res.sendFile(path.join(__dirname, "/dist/not_blazed.html")); return res.sendFile(path.join(__dirname, "/dist/not_blazed.html"));
} }
//TODO: find if there are more performant ways to remove images or evaluate if is the case to remove images
document.querySelectorAll("img").forEach((img) => img.remove());
const reader = new Readability(document); const reader = new Readability(document);
const article = reader.parse(); const article = reader.parse();