Skip to content

Instantly share code, notes, and snippets.

@hzeyuan
Last active September 27, 2024 05:59
Show Gist options
  • Save hzeyuan/41a7ca3eff4d5c032f61024c323ae626 to your computer and use it in GitHub Desktop.
Save hzeyuan/41a7ca3eff4d5c032f61024c323ae626 to your computer and use it in GitHub Desktop.
Product Hunt 产品获取工具,可以定时,也可以选择日期。
{
"name": "ph",
"version": "1.0.0",
"description": "",
"main": "auto-reply.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"commander": "^12.1.0",
"puppeteer": "^23.1.1"
}
}
const puppeteer = require('puppeteer');
const path = require('path');
const fs = require('fs');
const { program } = require('commander');
const schedule = require('node-schedule');
const { submitUrlToGoogleIndex } = require('./submit-url-to-google-index');
// const USER_DATA_DIR = path.join(__dirname, 'puppeteer_user_data');
async function delay(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* 主函数
* @param {*} date
*/
async function main(date) {
// if (!fs.existsSync(USER_DATA_DIR)) {
// fs.mkdirSync(USER_DATA_DIR);
// }
async function concurrentLimit(urls, limit, processFn) {
const results = [];
const executing = new Set();
for (const url of urls) {
const p = Promise.resolve().then(() => processFn(url));
results.push(p);
executing.add(p);
const clean = () => executing.delete(p);
p.then(clean).catch(clean);
if (executing.size >= limit) {
await Promise.race(executing);
}
}
return Promise.all(results);
}
async function processUrl(url) {
const newPage = await browser.newPage();
try {
await newPage.goto(url, { waitUntil: 'networkidle0', timeout: 30000 });
const websiteUrl = await newPage.evaluate(() => {
const visitLink = document.querySelector('a[data-test="product-header-visit-button"]');
return visitLink ? visitLink.href : null;
});
// 写你得逻辑处理websiteUrl
console.log(`处理 post: ${url}, 获取原网址: ${cleanWebsiteUrl}`);
return { url, cleanWebsiteUrl };
} catch (error) {
console.error(`处理 ${url} 时出错:`, error.message);
return { url, cleanWebsiteUrl: null };
} finally {
await newPage.close();
}
}
console.log('启动浏览器...');
const browser = await puppeteer.launch({
headless: "new",
defaultViewport: null,
args: [
'--start-maximized',
'--no-sandbox',
'--disable-setuid-sandbox'
],
// userDataDir: USER_DATA_DIR
});
const page = await browser.newPage();
async function retryOperation(operation, maxRetries = 3) {
for (let i = 0; i < maxRetries; i++) {
try {
return await operation();
} catch (error) {
if (i === maxRetries - 1) throw error;
console.error(error.message);
console.log(`操作失败,正在重试 (${i + 1}/${maxRetries})...`);
await delay(1000);
}
}
}
async function scrollToBottomAndWait() {
for (let i = 0; i < 15; i++) {
console.log(`滚动到页面底部 (${i + 1}/15)...`);
// 滚动到页面底部
await page.evaluate(async () => {
await new Promise((resolve, reject) => {
var totalHeight = 0;
var distance = 500; // 每次滚动的距离
var timer = setInterval(() => {
var scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight) {
clearInterval(timer);
resolve();
}
}, 400);
});
});
// 等待2秒
// await page.waitForTimeout(2000);
await delay(1000);
}
}
try {
const url = `https://www.producthunt.com/leaderboard/daily/${date}/all`;
console.log(`正在加载页面: ${url}`);
await page.goto(url, { waitUntil: 'networkidle0', timeout: 60000 });
await retryOperation(async () => {
await page.goto('https://www.producthunt.com/leaderboard/daily/2024/9/3/all', { waitUntil: 'networkidle0', timeout: 60000 });
});
console.log('页面已加载。如果需要登录,请手动登录。');
console.log('滚动页面以加载所有内容。');
await scrollToBottomAndWait();
// 2. 获取所有 post 项
const postItems = await retryOperation(async () => {
return await page.$$('[data-test^="post-item-"]');
});
postItems.reverse();
console.log(`找到 ${postItems.length} 个 post 项`);
const urls = await Promise.all(postItems.map(async item => {
if (await item.$('a')) {
return await item.$eval('a', a => a.href);
}
})
);
await concurrentLimit(urls, 5, processUrl);
submitUrlToGoogleIndex(urls);
console.log('所有 post 处理完毕');
} catch (error) {
console.error('脚本执行出错:', error);
} finally {
console.log('脚本执行结束。浏览器窗口保持打开状态。您可以手动关闭浏览器,或在控制台输入 "closeBrowser()" 来关闭浏览器。');
// 保持脚本运行,直到用户手动关闭浏览器
// await new Promise(() => { });
}
}
function scheduledJob() {
const date = new Date();
const formattedDate = `${date.getFullYear()}/${date.getMonth() + 1}/${date.getDate()}`;
main(formattedDate, 'all');
}
program
.version('1.0.0')
.description('从 Product Hunt 获取产品 URL')
.option('-s, --schedule', '启用定时任务模式')
.argument('[date]', '日期 (YYYY/M/D 格式)')
.action(async (date, options) => {
const { schedule: isScheduled } = options;
if (isScheduled) {
console.log('启动定时任务模式');
// 每天凌晨 2 点运行
schedule.scheduleJob('0 2 * * *', scheduledJob);
console.log('定时任务已设置,将在每天凌晨 2 点运行');
} else if (date) {
await main(date);
} else {
const currentDate = new Date();
const formattedDate = `${currentDate.getFullYear()}/${currentDate.getMonth() + 1}/${currentDate.getDate()}`;
console.log(`未提供日期,使用当前日期: ${formattedDate}`);
await main(formattedDate);
}
});
program.parse(process.argv);
// node producthunt-get-urls.js
// node producthunt-get-urls.js 2024/3/15
// node producthunt-get-urls.js -s
// sudo docker compose up -d --build
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment