Scraping table data with pagination involves extracting data from a web table and navigating through multiple pages to collect all available records. It can be achieved using JavaScript with the following approach:
Here’s a sample function scrapeTable
in Javascript:
async function scrapeTable() {
var maxPagesToScrape = 0;
var tableData = [];
// Function to extract data from the current page's table
function extractTableData() {
const table = document.querySelector('#products > table'); // Update with the specific table selector
if (!table) return [];
const rows = Array.from(table.querySelectorAll('tbody > tr'));
return rows.map((row) => {
const cells = Array.from(row.querySelectorAll('td'));
return cells.map((cell) => cell.textContent.trim());
});
}
// Function to navigate to the next page
async function goToNextPage() {
const nextButton = document.querySelector('.pagination-next > a'); // Update with the correct selector for the "next" button
if (nextButton && !nextButton.disabled) {
nextButton.click();
await new Promise((resolve) => setTimeout(resolve, 1000)); // Wait for data to load
return true;
}
return false;
}
// Scrape all pages
do {
const currentPageData = extractTableData();
if (currentPageData) {
tableData.push(...currentPageData);
}
maxPagesToScrape++;
} while (maxPagesToScrape < 5 && await goToNextPage());
console.log('Scraped Data:', tableData);
return tableData;
}
The above script can be used in a Chrome Extension or any browser extension to paginate the table and grab the data. In cases when the table is populated using frontend frameworks, we might need to check if the html element is in the DOM. To check that you can use the following code:
waitForElement(".pagination-next > a", function () {
scrapeTable();
})
The function waitForElement can look like the below code or make the necessary adjustments:
function waitForElement(el, callback) {
var poops = setInterval(function () {
if (document.getElementsByClassName(el)) {
clearInterval(poops);
callback();
}
}, 1000);
}
This method works for client-rendered pages but may not work for server-side-rendered or protected websites (e.g., those with CAPTCHA or anti-scraping measures). For complex tasks, consider using a scraping library with headless browser automation.
Enjoy!
Scraping table data with pagination using Javascript
https://bhupalsapkota.com/scraping-table-data-with-pagination-using-javascript/