Created
November 26, 2019 04:08
-
-
Save koladilip/279245da667de226f6b6bdae5242653f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
async function wait(timeInMills) { | |
return new Promise((resolve) => { | |
setTimeout(() => { | |
resolve(); | |
}, timeInMills); | |
}); | |
} | |
// Scrolls the page till new content is available | |
async function scrollPage(page) { | |
const currentContentLength = (await page.content()).length; | |
await page.evaluate(() => { | |
window.scrollBy(0, document.body.scrollHeight); | |
}); | |
await wait(Math.max(5000, 10000 * Math.random())); | |
const nextContentLength = (await page.content()).length; | |
if (currentContentLength != nextContentLength) { | |
console.log("Scrolling page:", await page.url(), "for more content"); | |
await scrollPage(page); | |
} | |
} | |
// Scrolls the page and gets the page content using PhantomJS | |
async function getPageData(pageUrl, shouldScrollPage) { | |
const browser = await puppeteer.launch(); | |
const page = await browser.newPage(); | |
await page.goto(pageUrl); | |
if (shouldScrollPage) { | |
await scrollPage(page); | |
} | |
const pageContent = await page.content(); | |
await page.close(); | |
await browser.close(); | |
return pageContent; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
thank you