-
-
Save n0ncetonic/1fe6616f6741be4ba8de3cd9f391b9b7 to your computer and use it in GitHub Desktop.
How to grab the page source from any dynamically generated webpage and then process it .
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const CDP = require('chrome-remote-interface'); | |
const chromeLauncher = require('chrome-launcher'); | |
const cheerio = require('cheerio'); | |
(async function() { | |
const launchChrome = () => | |
chromeLauncher.launch({ chromeFlags: ['--disable-gpu', '--headless'] }); | |
const chrome = await launchChrome(); | |
const protocol = await CDP({ port: chrome.port }); | |
const timeout = ms => new Promise(resolve => setTimeout(resolve, ms)); | |
// See API docs: https://chromedevtools.github.io/devtools-protocol/ | |
const { Page, Runtime, DOM } = protocol; | |
await Promise.all([Page.enable(), Runtime.enable(), DOM.enable()]); | |
Page.navigate({ url: 'https://www.sfcinemacity.com/showtime/cinema/9936' }); | |
// wait until the page says it's loaded... | |
Page.loadEventFired(async () => { | |
try { | |
console.log('Page loaded! Now waiting a few seconds for all the JS to load...'); | |
await timeout(3000); // give the JS some time to load | |
console.log('Selecting English..'); | |
// first set the language to English | |
const result = await Runtime.evaluate({ | |
expression: | |
"document.querySelector('.lang-switcher li:nth-of-type(2) a').click()" | |
}); | |
// get the page source | |
const rootNode = await DOM.getDocument({ depth: -1 }); | |
const pageSource = await DOM.getOuterHTML({ | |
nodeId: rootNode.root.nodeId | |
}); | |
protocol.close(); | |
chrome.kill(); | |
// load the page source into cheerio | |
console.log('Processing page source...'); | |
const $ = cheerio.load(pageSource.outerHTML); | |
// perform queries | |
console.log('Getting movie times for', $('.showtime-cinema-name').text()); | |
$('.showtime-box').each((i, movieElement) => { | |
console.log($(movieElement).find('.movie-detail .name').text()); | |
}); | |
} catch (err) { | |
console.log(err); | |
} | |
}); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment