Issue Description
I’m having trouble getting the final page content after redirects when using Puppeteer. The script either times out or redirects to the wrong URL, preventing me from accessing the final content.
My setup:
- Puppeteer version: 1.0.0
- Platform: Linux / CentOS 7.0
- Node.js version: v9.4.0
Code example:
const puppeteer = require('puppeteer');
var arguments = process.argv.splice(2);
var targetUrl = arguments[0];
var userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0';
puppeteer.launch({
ignoreHTTPSErrors: true,
timeout: 1000,
args: ['--no-sandbox', '--disable-setuid-sandbox']
}).then(async browser => {
const newPage = await browser.newPage();
await newPage.setExtraHTTPHeaders({
'upgrade-insecure-requests': '1'
});
newPage.setUserAgent(userAgent);
newPage.setDefaultNavigationTimeout(25000);
await newPage.setRequestInterception(true);
newPage.on('request', (req) => {
var resourceType = req.resourceType();
if (resourceType == 'image' || resourceType == 'media')
req.abort();
else {
console.log("requesting: " + req.url());
req.continue();
}
});
newPage.on('response', (res) => {
console.log('responded: ' + res.url());
if (resourceType == 'document') {
res.text().then(function (content) {
console.log(content);
});
}
});
const finalResponse = await newPage.goto(targetUrl, {
waitUntil: 'networkidle2',
})
.catch(function(error) {
if (error.toString().indexOf("Timeout")) {
browser.close();
console.log("Timeout occurred!");
process.exit();
}
});
browser.close();
});
Expected behavior: Should capture the correct final page content after redirects
Actual behavior: The script either times out (increasing timeout doesn’t help) or gets redirected to incorrect URLs, making it impossible to retrieve the final content. Interestingly, PhantomJS works fine with the same scenario.