I’m working on a web scraping project using Puppeteer in Node.js with worker processes to capture website screenshots. The setup works most of the time, but I keep running into protocol errors with certain websites that seem to load fine in regular browsers.
const cluster = require('cluster');
const express = require('express');
const bodyParser = require('body-parser');
const puppeteer = require('puppeteer');
async function captureWebsiteImage(url) {
let imageData;
const browserInstance = await puppeteer.launch({
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
});
const newPage = await browserInstance.newPage();
try {
await newPage.goto('http://' + url + '/', { timeout: 60000, waitUntil: 'networkidle2' });
} catch (err) {
try {
await newPage.goto('http://' + url + '/', { timeout: 120000, waitUntil: 'networkidle2' });
imageData = await newPage.screenshot({ type: 'png', encoding: 'base64' });
} catch (err) {
console.error('Failed to load: ' + url + ' error: ' + err);
}
}
await newPage.close();
await browserInstance.close();
return imageData;
}
if (cluster.isMaster) {
const workerCount = require('os').cpus().length;
for (let i = 0; i < workerCount; i++) {
cluster.fork();
}
cluster.on('exit', function (worker, code, signal) {
console.log('Worker ' + worker.process.pid + ' exited with code: ' + code);
cluster.fork();
});
} else {
const server = express();
server.use(bodyParser.json());
server.listen(80);
server.post('/capture', (req, res) => {
const websiteUrl = req.body.url;
captureWebsiteImage(websiteUrl)
.then((image) => {
res.status(200).json({ image: image });
})
.catch((err) => {
res.status(500).json({ error: err });
});
});
}
The error messages I keep seeing are:
Error: Protocol error (Page.navigate): Target closed.
Error: Protocol error (Runtime.callFunctionOn): Session closed. Most likely the page has been closed.
I’m not sure what causes this. Could it be related to redirects or some other page behavior? Has anyone encountered similar issues with Puppeteer navigation?