Created
August 16, 2020 20:59
-
-
Save JCotton1123/934d3fe90100562cd0567a61faf13e08 to your computer and use it in GitHub Desktop.
Stocktwits Message Scraper using Headless Browser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
const debug = process.env.DEBUG == 'true'; | |
const url = 'https://stocktwits.com/mrinvestorpro'; | |
const main = async () => { | |
const browser = await puppeteer.launch({ headless: !debug }); | |
try { | |
console.log('Launching page'); | |
const page = await browser.newPage(); | |
await page.goto(url, { | |
timeout: (60 * 1000), | |
waitUntil: ['load', 'networkidle2'] | |
}); | |
console.log('Waiting for Real-Time button'); | |
await page.waitForXPath("//span[text()='Real-Time']", ); | |
console.log('Clicking Real-Time button'); | |
await (await page.$x("//span[text()='Real-Time']"))[0].click(); | |
while(true) { | |
let result = await page.$eval("//span[text()='1 New Post']", (el) => { | |
el.click(); | |
}); | |
let result = await page.$eval("div.infinite-scroll-component article", (el) => { | |
let url = el.querySelector("a[href*='/mrinvestorpro/message']").href; | |
let msg = el.querySelector('div > div:nth-of-type(2) > div:nth-of-type(2)').innerHTML; | |
msg = msg.replace(/(<([^>]+)>)/gi, ""); | |
return [url, msg] | |
}); | |
console.log(result); | |
await page.waitFor(2000); | |
} | |
} finally { | |
await browser.close(); | |
} | |
}; | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment