I’m working with web scraping on a .NET application using Puppeteer. My workflow involves a repetitive process where I need to:
- Fill out a form and submit it
- Navigate to results page
- Parse data from a table on that page
- Go back to the original form and repeat with new parameters
The tricky part is that the results page uses the same URL each time, so I need to make sure each cycle completes fully before starting the next one. I can handle opening new pages with browser.on('targetcreated') and extracting the data, but I’m struggling with the synchronization part.
How can I make my code wait for a tab to close completely before moving on to submit the form again with different values? This seems like it might be a broader JavaScript async/await question.
Here’s my current implementation that handles the form submission and checks whether data appears immediately or requires opening a new page:
async function processDataAnalysis(currentPage, recordId, xValue, yValue) {
const WAIT_TIMEOUT = 90000; // 90 seconds max wait
const xInput = await currentPage.$(SELECTORS.analysis_x_field);
await xInput.type(xValue[1])
const yInput = await currentPage.$(SELECTORS.analysis_y_field);
await yInput.type(yValue[1])
await currentPage.click(SELECTORS.calculate_button);
await currentPage.waitForSelector(SELECTORS.loading_spinner, { timeout: WAIT_TIMEOUT, hidden: true });
// check if results appear inline
var gridSelector = null;
if (await currentPage.$(SELECTORS.inline_results_grid) !== null) {
console.log("Results appeared inline");
await currentPage.screenshot({ path: './screenshots/Results: '+xValue[1]+' VS '+yValue[1]+'.png' });
var gridSelector = SELECTORS.inline_results_grid;
} else {
console.log("Results require new page");
await currentPage.click(SELECTORS.open_results_link);
console.log("Link clicked");
return;
}
const extractedData = await utils.parseTableToJson(currentPage, gridSelector);
await db.query('INSERT INTO analysis_db.results_table ( record_id, x_param, y_param, data_json ) VALUES (?,?,?,?)', [ recordId, xValue[1], yValue[1], JSON.stringify(extractedData) ], function (error, results, fields) {
if (error) throw error;
});
console.log("Data saved successfully");
}