Last active
September 27, 2024 05:59
-
-
Save hzeyuan/41a7ca3eff4d5c032f61024c323ae626 to your computer and use it in GitHub Desktop.
Product Hunt 产品获取工具,可以定时,也可以选择日期。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "ph", | |
"version": "1.0.0", | |
"description": "", | |
"main": "auto-reply.js", | |
"scripts": { | |
"test": "echo \"Error: no test specified\" && exit 1" | |
}, | |
"author": "", | |
"license": "ISC", | |
"dependencies": { | |
"commander": "^12.1.0", | |
"puppeteer": "^23.1.1" | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
const path = require('path'); | |
const fs = require('fs'); | |
const { program } = require('commander'); | |
const schedule = require('node-schedule'); | |
const { submitUrlToGoogleIndex } = require('./submit-url-to-google-index'); | |
// const USER_DATA_DIR = path.join(__dirname, 'puppeteer_user_data'); | |
async function delay(ms) { | |
return new Promise(resolve => setTimeout(resolve, ms)); | |
} | |
/** | |
* 主函数 | |
* @param {*} date | |
*/ | |
async function main(date) { | |
// if (!fs.existsSync(USER_DATA_DIR)) { | |
// fs.mkdirSync(USER_DATA_DIR); | |
// } | |
async function concurrentLimit(urls, limit, processFn) { | |
const results = []; | |
const executing = new Set(); | |
for (const url of urls) { | |
const p = Promise.resolve().then(() => processFn(url)); | |
results.push(p); | |
executing.add(p); | |
const clean = () => executing.delete(p); | |
p.then(clean).catch(clean); | |
if (executing.size >= limit) { | |
await Promise.race(executing); | |
} | |
} | |
return Promise.all(results); | |
} | |
async function processUrl(url) { | |
const newPage = await browser.newPage(); | |
try { | |
await newPage.goto(url, { waitUntil: 'networkidle0', timeout: 30000 }); | |
const websiteUrl = await newPage.evaluate(() => { | |
const visitLink = document.querySelector('a[data-test="product-header-visit-button"]'); | |
return visitLink ? visitLink.href : null; | |
}); | |
// 写你得逻辑处理websiteUrl | |
console.log(`处理 post: ${url}, 获取原网址: ${cleanWebsiteUrl}`); | |
return { url, cleanWebsiteUrl }; | |
} catch (error) { | |
console.error(`处理 ${url} 时出错:`, error.message); | |
return { url, cleanWebsiteUrl: null }; | |
} finally { | |
await newPage.close(); | |
} | |
} | |
console.log('启动浏览器...'); | |
const browser = await puppeteer.launch({ | |
headless: "new", | |
defaultViewport: null, | |
args: [ | |
'--start-maximized', | |
'--no-sandbox', | |
'--disable-setuid-sandbox' | |
], | |
// userDataDir: USER_DATA_DIR | |
}); | |
const page = await browser.newPage(); | |
async function retryOperation(operation, maxRetries = 3) { | |
for (let i = 0; i < maxRetries; i++) { | |
try { | |
return await operation(); | |
} catch (error) { | |
if (i === maxRetries - 1) throw error; | |
console.error(error.message); | |
console.log(`操作失败,正在重试 (${i + 1}/${maxRetries})...`); | |
await delay(1000); | |
} | |
} | |
} | |
async function scrollToBottomAndWait() { | |
for (let i = 0; i < 15; i++) { | |
console.log(`滚动到页面底部 (${i + 1}/15)...`); | |
// 滚动到页面底部 | |
await page.evaluate(async () => { | |
await new Promise((resolve, reject) => { | |
var totalHeight = 0; | |
var distance = 500; // 每次滚动的距离 | |
var timer = setInterval(() => { | |
var scrollHeight = document.body.scrollHeight; | |
window.scrollBy(0, distance); | |
totalHeight += distance; | |
if (totalHeight >= scrollHeight) { | |
clearInterval(timer); | |
resolve(); | |
} | |
}, 400); | |
}); | |
}); | |
// 等待2秒 | |
// await page.waitForTimeout(2000); | |
await delay(1000); | |
} | |
} | |
try { | |
const url = `https://www.producthunt.com/leaderboard/daily/${date}/all`; | |
console.log(`正在加载页面: ${url}`); | |
await page.goto(url, { waitUntil: 'networkidle0', timeout: 60000 }); | |
await retryOperation(async () => { | |
await page.goto('https://www.producthunt.com/leaderboard/daily/2024/9/3/all', { waitUntil: 'networkidle0', timeout: 60000 }); | |
}); | |
console.log('页面已加载。如果需要登录,请手动登录。'); | |
console.log('滚动页面以加载所有内容。'); | |
await scrollToBottomAndWait(); | |
// 2. 获取所有 post 项 | |
const postItems = await retryOperation(async () => { | |
return await page.$$('[data-test^="post-item-"]'); | |
}); | |
postItems.reverse(); | |
console.log(`找到 ${postItems.length} 个 post 项`); | |
const urls = await Promise.all(postItems.map(async item => { | |
if (await item.$('a')) { | |
return await item.$eval('a', a => a.href); | |
} | |
}) | |
); | |
await concurrentLimit(urls, 5, processUrl); | |
submitUrlToGoogleIndex(urls); | |
console.log('所有 post 处理完毕'); | |
} catch (error) { | |
console.error('脚本执行出错:', error); | |
} finally { | |
console.log('脚本执行结束。浏览器窗口保持打开状态。您可以手动关闭浏览器,或在控制台输入 "closeBrowser()" 来关闭浏览器。'); | |
// 保持脚本运行,直到用户手动关闭浏览器 | |
// await new Promise(() => { }); | |
} | |
} | |
function scheduledJob() { | |
const date = new Date(); | |
const formattedDate = `${date.getFullYear()}/${date.getMonth() + 1}/${date.getDate()}`; | |
main(formattedDate, 'all'); | |
} | |
program | |
.version('1.0.0') | |
.description('从 Product Hunt 获取产品 URL') | |
.option('-s, --schedule', '启用定时任务模式') | |
.argument('[date]', '日期 (YYYY/M/D 格式)') | |
.action(async (date, options) => { | |
const { schedule: isScheduled } = options; | |
if (isScheduled) { | |
console.log('启动定时任务模式'); | |
// 每天凌晨 2 点运行 | |
schedule.scheduleJob('0 2 * * *', scheduledJob); | |
console.log('定时任务已设置,将在每天凌晨 2 点运行'); | |
} else if (date) { | |
await main(date); | |
} else { | |
const currentDate = new Date(); | |
const formattedDate = `${currentDate.getFullYear()}/${currentDate.getMonth() + 1}/${currentDate.getDate()}`; | |
console.log(`未提供日期,使用当前日期: ${formattedDate}`); | |
await main(formattedDate); | |
} | |
}); | |
program.parse(process.argv); | |
// node producthunt-get-urls.js | |
// node producthunt-get-urls.js 2024/3/15 | |
// node producthunt-get-urls.js -s | |
// sudo docker compose up -d --build |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment