I am attempting to scrape data from a specific website, but Puppeteer is returning an empty array even after executing my scraping code. Below is an overview of my implementation, where I navigate through multiple pages to collect names while removing certain elements that contain ads:
const puppeteer = require("puppeteer");
const express = require("express");
const cors = require("cors");
const server = express();
server.use(cors());
let scrapedData = [];
(async () => {
const browserInstance = await puppeteer.launch({
headless: false,
defaultViewport: null,
});
const newPage = await browserInstance.newPage();
for (let pageIndex = 1; pageIndex < 42; pageIndex++) {
await newPage.goto(`https://naamhinaam.com/baby-girl-names-a?page=${pageIndex}`);
await newPage.waitForTimeout(3000);
await newPage.click("#promotionalPopup > div > div > div > button > span");
await newPage.$eval(
"div.name-suggestion.mt-1 > div > div:nth-child(22)",
(element) => element.remove()
);
await newPage.$eval(
"div.name-suggestion.mt-1 > div > div:nth-child(43)",
(element) => element.remove()
);
for (let itemIndex = 3; itemIndex < 54; itemIndex++) {
let name = "Not Found";
if (await newPage.$("div.name-suggestion.mt-1 > div > div:nth-child(22)")) {
continue;
}
await newPage.waitForSelector(
`div.name-suggestion.mt-1 > div > div:nth-child(${itemIndex}) > div.nsg__name_meaning > a`
);
let nameElement = await newPage.$(
`div.name-suggestion.mt-1 > div > div:nth-child(${itemIndex}) > div.nsg__name_meaning > a`
);
name = await newPage.evaluate((el) => el.textContent, nameElement);
scrapedData.push({ name });
}
console.log(scrapedData);
}
await browserInstance.close();
})();
server.get("/", (request, response) => {
response.status(200).json(scrapedData);
});
server.listen(3000, () => {
console.log("Server is live...");
});
I am specifically removing certain elements to avoid ads. However, in the end, I receive an empty array. Could anyone offer guidance on what might be causing this issue?