Skip to content

Instantly share code, notes, and snippets.

@madindo
Last active April 5, 2025 09:41
Show Gist options
  • Save madindo/783afb2db457ca16ac37380284a9e984 to your computer and use it in GitHub Desktop.
Save madindo/783afb2db457ca16ac37380284a9e984 to your computer and use it in GitHub Desktop.
Scrapping twitter user profile (not perfect) | npm init -y | npm install puppeteer | node twitterScrapper.js realDonaldTrump
const puppeteer = require("puppeteer");
const fs = require("fs");
// Get username from CLI args
const username = process.argv[2];
if (!username) {
console.error("❌ Please provide a Twitter username.");
console.log("Usage: node twitterScrapper.js <username>");
process.exit(1);
}
const profileUrl = `https://twitter.com/${username}`;
const outputFile = `${username}_tweets.json`;
async function scrapeAllTweets(profileUrl, outputFile) {
const browser = await puppeteer.launch({
headless: true,
defaultViewport: null,
args: ["--no-sandbox"],
});
const page = await browser.newPage();
await page.goto(profileUrl, { waitUntil: "networkidle2" });
const tweets = new Set();
let lastHeight = 0;
let stuckCounter = 0;
console.log(`πŸ” Scraping tweets from: ${profileUrl}`);
while (true) {
const newTweets = await page.evaluate(() => {
const tweetElements = document.querySelectorAll("article div[lang]");
return Array.from(tweetElements).map((el) => el.innerText.trim());
});
newTweets.forEach((tweet) => {
if (tweet.length > 0) tweets.add(tweet);
});
// Scroll smoothly
await page.evaluate(async () => {
await new Promise((resolve) => {
let totalHeight = 0;
const distance = 500;
const timer = setInterval(() => {
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= document.body.scrollHeight) {
clearInterval(timer);
resolve();
}
}, 300);
});
});
await new Promise((r) => setTimeout(r, 2500));
const newHeight = await page.evaluate("document.body.scrollHeight");
if (newHeight === lastHeight) {
stuckCounter++;
} else {
stuckCounter = 0;
}
lastHeight = newHeight;
if (stuckCounter >= 3) {
console.log("βœ… Reached the end or no more tweets are loading.");
break;
}
console.log(`πŸ“ˆ Collected: ${tweets.size} tweets so far...`);
}
const tweetArray = Array.from(tweets);
fs.writeFileSync(outputFile, JSON.stringify(tweetArray, null, 2));
console.log(`\nπŸ’Ύ Saved ${tweetArray.length} tweets to: ${outputFile}`);
try {
await browser.close();
} catch (e) {
console.error("❌ Error closing browser:", e);
}
}
scrapeAllTweets(profileUrl, outputFile);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment