My Problem:
I switched from regular Puppeteer to Puppeteer-Core combined with @sparticuz/chromium because of memory issues. I need to verify if a script loads dynamically and if a specific UI component appears on web pages.
My Current Approach:
- Block image loading to speed things up
- Get all script URLs from the page to verify script installation
- Look for a specific UI component (.my-widget-element)
- Changed from ‘load’ to ‘networkidle2’ wait condition for better dynamic content detection
The Problem:
• Script detection works fine but UI element detection is unreliable
• UI elements seem to need more time to load even with 30 second timeout
• Right now I navigate to the same page twice (first with ‘load’ then ‘networkidle2’) which feels wrong
My Code:
const puppeteer = require("puppeteer-core");
const chromium = require("@sparticuz/chromium");
const WIDGET_SCRIPT_URL = "https://cdn.example.com/widget/js/"; // Sample URL
class DynamicContentDetector {
async disableImageLoading(page) {
await page.setRequestInterception(true);
page.on("request", (req) => {
if (req.resourceType() === "image") {
req.abort();
} else {
req.continue();
}
});
}
checkForWidgetScript(scriptList, widgetCode) {
let widgetScriptUrl = WIDGET_SCRIPT_URL + widgetCode;
return scriptList.some((scriptSrc) => scriptSrc && scriptSrc.includes(widgetScriptUrl));
}
async extractScriptSources(page) {
return await page.evaluate(() => {
return Array.from(document.querySelectorAll("script"))
.map((script) => script.getAttribute("src"))
.filter(Boolean);
});
}
async checkWidgetInstallation(siteUrl, widgetCode) {
let scriptFound = false;
let elementFound = false;
let browser;
try {
browser = await puppeteer.launch({
args: [...chromium.args, "--no-sandbox", "--disable-setuid-sandbox"],
executablePath: await chromium.executablePath(),
headless: true,
ignoreHTTPSErrors: true,
});
console.log("Browser started");
const page = await browser.newPage();
await this.disableImageLoading(page);
await page.goto(siteUrl, { waitUntil: "load" });
console.log("Page loaded");
const scriptSources = await this.extractScriptSources(page);
scriptFound = this.checkForWidgetScript(scriptSources, widgetCode);
console.log("Script found:", scriptFound);
await page.goto(siteUrl, { waitUntil: "networkidle2" });
console.log("Switched to networkidle2 for element detection");
try {
await page.waitForSelector(".my-widget-element", { timeout: 30000 });
elementFound = true;
console.log("Widget element found:", elementFound);
} catch (error) {
console.log("Widget element not detected within timeout");
}
await browser.close();
} catch (error) {
console.error("Detection error:", error);
if (browser) await browser.close();
}
return { scriptFound, elementFound };
}
}
Is there a better way to handle dynamic content detection without navigating twice?