Skip to content

Instantly share code, notes, and snippets.

@tkssharma
Created December 29, 2020 17:16
Show Gist options
  • Save tkssharma/56086ee6a9b8988d977270f0ef70eccc to your computer and use it in GitHub Desktop.
Save tkssharma/56086ee6a9b8988d977270f0ef70eccc to your computer and use it in GitHub Desktop.
const { startBrowser } = require("./browser");
const cheerio = require("cheerio");
async function scrapeData(url) {
const browser = await startBrowser();
const page = await browser.newPage();
await page.goto(url, { waitUntil: "networkidle0" });
await page.waitForSelector(".listResults");
const content = await page.content();
const links = await getJobData(content);
for(let link of links) {
await page.goto(`https://stackoverflow.com${link}`);
await page.waitForSelector('.job-details--header');
const pageContent = await page.content();
await getCurrentPageJobData(pageContent);
}
await browser.close();
}
async function getJobData(html) {
const links = [];
const $ = cheerio.load(html);
$('.-job').each((i, element) => {
links.push($(element).attr('data-preview-url'));
});
return links;
}
async function getCurrentPageJobData(html) {
const $ = cheerio.load(html);
$('#overview-items').each((i, element) => {
$(element).find('section').find('span').each((i, elem) => {
console.log($(elem).text());
});
})
}
scrapeData("https://stackoverflow.com/jobs");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment