From 2d87c057e67e8b73afe56b467f3eb4a2dfe733c4 Mon Sep 17 00:00:00 2001 From: Danny Spina Date: Tue, 20 Jun 2023 12:36:47 +0200 Subject: [PATCH] Remove images before parsing --- index.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/index.ts b/index.ts index 5fd07e1..07d2428 100644 --- a/index.ts +++ b/index.ts @@ -98,11 +98,13 @@ app.get("/blazed", async (req, res) => { try { const response = await got(pageToBlaze); const { document } = parseHTML(response.body); - if (!isProbablyReaderable(document)) { return res.sendFile(path.join(__dirname, "/dist/not_blazed.html")); } + //TODO: find if there are more performant ways to remove images or evaluate if is the case to remove images + document.querySelectorAll("img").forEach((img) => img.remove()); + const reader = new Readability(document); const article = reader.parse();