Last active
April 5, 2025 09:41
-
-
Save madindo/783afb2db457ca16ac37380284a9e984 to your computer and use it in GitHub Desktop.
Scrapping twitter user profile (not perfect) | npm init -y | npm install puppeteer | node twitterScrapper.js realDonaldTrump
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require("puppeteer"); | |
const fs = require("fs"); | |
// Get username from CLI args | |
const username = process.argv[2]; | |
if (!username) { | |
console.error("β Please provide a Twitter username."); | |
console.log("Usage: node twitterScrapper.js <username>"); | |
process.exit(1); | |
} | |
const profileUrl = `https://twitter.com/${username}`; | |
const outputFile = `${username}_tweets.json`; | |
async function scrapeAllTweets(profileUrl, outputFile) { | |
const browser = await puppeteer.launch({ | |
headless: true, | |
defaultViewport: null, | |
args: ["--no-sandbox"], | |
}); | |
const page = await browser.newPage(); | |
await page.goto(profileUrl, { waitUntil: "networkidle2" }); | |
const tweets = new Set(); | |
let lastHeight = 0; | |
let stuckCounter = 0; | |
console.log(`π Scraping tweets from: ${profileUrl}`); | |
while (true) { | |
const newTweets = await page.evaluate(() => { | |
const tweetElements = document.querySelectorAll("article div[lang]"); | |
return Array.from(tweetElements).map((el) => el.innerText.trim()); | |
}); | |
newTweets.forEach((tweet) => { | |
if (tweet.length > 0) tweets.add(tweet); | |
}); | |
// Scroll smoothly | |
await page.evaluate(async () => { | |
await new Promise((resolve) => { | |
let totalHeight = 0; | |
const distance = 500; | |
const timer = setInterval(() => { | |
window.scrollBy(0, distance); | |
totalHeight += distance; | |
if (totalHeight >= document.body.scrollHeight) { | |
clearInterval(timer); | |
resolve(); | |
} | |
}, 300); | |
}); | |
}); | |
await new Promise((r) => setTimeout(r, 2500)); | |
const newHeight = await page.evaluate("document.body.scrollHeight"); | |
if (newHeight === lastHeight) { | |
stuckCounter++; | |
} else { | |
stuckCounter = 0; | |
} | |
lastHeight = newHeight; | |
if (stuckCounter >= 3) { | |
console.log("β Reached the end or no more tweets are loading."); | |
break; | |
} | |
console.log(`π Collected: ${tweets.size} tweets so far...`); | |
} | |
const tweetArray = Array.from(tweets); | |
fs.writeFileSync(outputFile, JSON.stringify(tweetArray, null, 2)); | |
console.log(`\nπΎ Saved ${tweetArray.length} tweets to: ${outputFile}`); | |
try { | |
await browser.close(); | |
} catch (e) { | |
console.error("β Error closing browser:", e); | |
} | |
} | |
scrapeAllTweets(profileUrl, outputFile); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment