Skip to content

Instantly share code, notes, and snippets.

@CharlyWargnier
Created June 5, 2025 14:55
Show Gist options
  • Save CharlyWargnier/d600a8fce4839a858d2b08404cbc534e to your computer and use it in GitHub Desktop.
Save CharlyWargnier/d600a8fce4839a858d2b08404cbc534e to your computer and use it in GitHub Desktop.
Hacker News Auto Digest - powered by StageHand!
import { Stagehand } from "@browserbasehq/stagehand";
import fs from "fs";
import path from "path";
import Tesseract from "tesseract.js";
import { OpenAI } from "openai";
import dotenv from "dotenv";
dotenv.config();
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
async function extractTextFromImage(imagePath: string) {
console.log(`πŸ” Running OCR on: ${imagePath}`);
const result = await Tesseract.recognize(imagePath, "eng", {
logger: (m) => console.log(m.status),
});
return result.data.text;
}
async function summarizeWithGPT(text: string): Promise<string> {
console.log("πŸ’‘ Sending first 300 words of OCR text to GPT-4 for summarization...");
const first300Words = text.split(/\s+/).slice(0, 300).join(" ");
const response = await openai.chat.completions.create({
model: "gpt-4o-mini",
messages: [
{
role: "system",
content: "You summarize webpage content based on OCR extracted text.",
},
{
role: "user",
content: `Please summarize this text:\n\n${first300Words}`,
},
],
temperature: 0.7,
});
return response.choices[0].message.content || "";
}
async function main() {
console.log("πŸš€ Starting Stagehand...");
const stagehand = new Stagehand({
env: "LOCAL",
headless: false,
});
try {
await stagehand.init();
console.log("βœ… Stagehand initialized");
// 🧹 Remove accidental root-level 'Texts' folder if it exists
const rogueTextsFolder = path.join(process.cwd(), "Texts");
if (fs.existsSync(rogueTextsFolder)) {
fs.rmSync(rogueTextsFolder, { recursive: true, force: true });
console.log("πŸ—‘οΈ Removed rogue 'Texts' folder from root directory.");
}
// Folder structure
const baseFolder = path.join(process.cwd(), "Files");
const screenshotsFolder = path.join(baseFolder, "Screenshots");
const textsFolder = path.join(baseFolder, "Texts");
const summariesFolder = path.join(baseFolder, "Summaries");
const summariesTextsFolder = path.join(summariesFolder, "Texts");
[baseFolder, screenshotsFolder, textsFolder, summariesFolder, summariesTextsFolder].forEach((folder) => {
if (!fs.existsSync(folder)) {
fs.mkdirSync(folder);
console.log(`πŸ“ Created folder: ${folder}`);
}
});
console.log("🌐 Navigating to Hacker News...");
await stagehand.page.goto("https://news.ycombinator.com/");
await stagehand.page.waitForLoadState("domcontentloaded");
const targetIndices = [1, 2];
for (const index of targetIndices) {
console.log(`πŸ” Locating article link #${index + 1}...`);
const link = stagehand.page.locator(".titleline > a").nth(index);
await link.waitFor({ timeout: 5000 });
console.log(`πŸ‘† Clicking article #${index + 1}...`);
await Promise.all([
stagehand.page.waitForNavigation(),
link.click(),
]);
const title = await stagehand.page.title();
console.log(`πŸ“ Page title: "${title}"`);
const screenshotName = `hackernews_link_${index + 1}.png`;
const screenshotPath = path.join(screenshotsFolder, screenshotName);
await stagehand.page.screenshot({ path: screenshotPath, fullPage: true });
console.log(`πŸ“Έ Screenshot saved at: ${screenshotPath}`);
const ocrText = await extractTextFromImage(screenshotPath);
const textFilePath = path.join(textsFolder, `hackernews_link_${index + 1}.txt`);
fs.writeFileSync(textFilePath, ocrText);
console.log(`πŸ“ OCR text saved to: ${textFilePath}`);
const summary = await summarizeWithGPT(ocrText);
const summaryPath = path.join(summariesTextsFolder, `hackernews_link_${index + 1}_summary.txt`);
fs.writeFileSync(summaryPath, summary);
console.log(`🧠 Summary saved to: ${summaryPath}`);
console.log("↩️ Returning to Hacker News...");
await stagehand.page.goBack();
await stagehand.page.waitForLoadState("domcontentloaded");
}
// Merge summaries
console.log("πŸ“¦ Merging summaries into MD, JSON, CSV...");
const summaryFiles = fs.readdirSync(summariesTextsFolder).filter(file => file.endsWith(".txt"));
const markdownOutput: string[] = [];
const jsonOutput: Record<string, string> = {};
const csvOutput: string[] = ["id,summary"];
for (const file of summaryFiles) {
const id = path.basename(file, ".txt");
const content = fs.readFileSync(path.join(summariesTextsFolder, file), "utf-8").trim();
markdownOutput.push(`### ${id}\n\n${content}\n`);
jsonOutput[id] = content;
csvOutput.push(`"${id}","${content.replace(/"/g, "'")}"`);
}
fs.writeFileSync(path.join(summariesFolder, "summaries.md"), markdownOutput.join("\n---\n"));
fs.writeFileSync(path.join(summariesFolder, "summaries.json"), JSON.stringify(jsonOutput, null, 2));
fs.writeFileSync(path.join(summariesFolder, "summaries.csv"), csvOutput.join("\n"));
console.log("βœ… summaries.md, .json, .csv created.");
await stagehand.page.waitForTimeout(3000);
} catch (err) {
console.error("❌ Error:", err);
await stagehand.page.waitForTimeout(3000);
} finally {
await stagehand.close();
console.log("πŸ”š Browser closed – automation complete!");
}
}
main().catch(console.error);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment