I am utilizing Puppeteer with a clustered Node.js setup to capture screenshots from multiple website URLs. In the code snippet below, I launch a browser instance for every worker to navigate to the specified domain and capture a screenshot. However, I’m frequently encountering the following errors with some legitimate domains:
Error: Protocol error (Page.navigate): Target closed.
Error: Protocol error (Runtime.callFunctionOn): Session closed. Most likely the page has been closed.
I’ve read that these errors can occur if the page redirects and modifies the URL, possibly adding ‘www’. Can someone help me identify if I’m overlooking something important in my code or setup?
Here’s my example implementation:
const cluster = require('cluster');
const express = require('express');
const bodyParser = require('body-parser');
const puppeteer = require('puppeteer');
async function captureScreenshot(site) {
let image;
const browserInstance = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] });
const newPage = await browserInstance.newPage();
try {
await newPage.goto('http://' + site + '/', { timeout: 60000, waitUntil: 'networkidle2' });
} catch (err) {
try {
await newPage.goto('http://' + site + '/', { timeout: 120000, waitUntil: 'networkidle2' });
image = await newPage.screenshot({ type: 'png', encoding: 'base64' });
} catch (err) {
console.error('Error connecting to: ' + site + ' due to: ' + err);
}
}
await newPage.close();
await browserInstance.close();
return image;
}
if (cluster.isMaster) {
const workerCount = require('os').cpus().length;
for (let i = 0; i < workerCount; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.debug(`Worker ${worker.process.pid} exited with code: ${code}, and signal: ${signal}`);
cluster.fork();
});
cluster.on('message', (worker, msg) => {
console.debug(`Worker: ${worker.process.pid} completed work on ${msg.site}. Terminating...`);
if (cluster.workers[worker.id]) {
cluster.workers[worker.id].kill('SIGTERM');
}
});
} else {
const app = express();
app.use(bodyParser.json());
app.listen(80, () => {
console.debug('Worker ' + process.pid + ' is ready to receive requests.');
});
app.post('/capture', (req, res) => {
const site = req.body.site;
captureScreenshot(site)
.then((image) => {
try {
process.send({ site: site });
} catch (err) {
console.error('Error while terminating worker ' + process.pid + ' due to: ' + err);
}
res.status(200).json({ image: image });
})
.catch((error) => {
try {
process.send({ site: site });
} catch (err) {
console.error('Error while terminating worker ' + process.pid + ' due to: ' + err);
}
res.status(500).json({ error: error });
});
});
}
Each worker processes requests individually and terminates after completing its task, while new browser instances are created with each request. I’m looking for assistance in diagnosing this problem. Thank you!