Skip to content

Instantly share code, notes, and snippets.

@cchudant
Created August 11, 2019 22:14
Show Gist options
  • Save cchudant/40da3f41186453c0cd807e9bce5a5d15 to your computer and use it in GitHub Desktop.
Save cchudant/40da3f41186453c0cd807e9bce5a5d15 to your computer and use it in GitHub Desktop.
Scrap nyaa comments and expose them as an RSS feed
const httpFetch = require('node-fetch')
const { JSDOM } = require('jsdom')
const express = require('express')
const morgan = require('morgan')
const { Feed } = require('feed')
const xmlEscape = require('xml-escape')
const wait = t => new Promise(r => setTimeout(r, t))
const {
PORT = 8080
} = process.env
const app = express()
app.use(morgan('dev'))
/// global
const UPDATE_INTERVAL = 3 * 60 * 1000
const UPLOADERS = ['Punisher694']
const cache = new Map()
/// limit network requests
let lastNetRsq = Promise.resolve()
function fetch(...args) {
const lastrsq = lastNetRsq
const promise = Promise.resolve()
.then(async () => {
await lastrsq
console.log('now requesting')
return await httpFetch(...args)
})
lastNetRsq = promise
return promise
}
/// logic
function commentsState(document) {
return Array.from(document.querySelectorAll('tr > td:nth-child(2)'))
.map(tr => tr.children)
.map(([acom, alink]) => ({
title: (alink || acom).textContent,
link: (alink || acom).href,
comments: alink ? +acom.textContent : 0
}))
.map(o => [o.link, o])
.reduce((o, [k, v]) => (o[k] = v, o), {}) // Object.fromEntries
}
function commentsDiff(lastState, newState) {
return Object.entries(newState)
.map(([link, { comments, ...fields }]) =>
({ comments: comments - ((lastState[link] || {}).comments || 0), ...fields })
)
.filter(({ comments }) => comments > 0)
}
function comments(document) {
return Array.from(document.querySelectorAll('#collapse-comments > div > div'))
.map(div => ({
user: div.querySelector('a').textContent,
avatar: div.querySelector('img.avatar').src,
commentLink: div.querySelector('div.comment-details > a').href.slice('about:blank'.length),
date: new Date(div.querySelector('div.comment-details > a > small').dataset['timestamp'] * 1000),
content: div.querySelector('div.comment-content').innerHTML,
}))
}
/// network out
async function getStateFor(user) {
const link = `https://nyaa.si/user/${user}?s=comments&o=desc`
console.log(`Requesting comments state for ${user}`)
const res = await fetch(link)
if (!res.ok)
{
console.error(`Error for url /user/${user}?s=comments&o=desc: status ${res.status}`)
return { status: res.status === 404 ? 'not-found' : 'error' }
}
const text = await res.text()
const dom = new JSDOM(text)
const document = dom.window.document
try {
return { status: 'ok', state: commentsState(document) }
} catch (e) {
console.error(`Error for user ${user}`, e)
return { status: 'error' }
}
}
async function getComments(partialUrl) {
const link = `https://nyaa.si${partialUrl}`
console.log(`Requesting comments for ${partialUrl}`)
const res = await fetch(link)
if (!res.ok)
{
console.error(`Error for url ${partialUrl}: status ${res.status}`)
return { status: res.status === 404 ? 'not-found' : 'error' }
}
const text = await res.text()
const dom = new JSDOM(text)
const document = dom.window.document
try {
return { status: 'ok', comments: comments(document) }
} catch (e) {
console.error(`Error for url ${partialUrl}`, e)
return { status: 'error' }
}
}
/// update state
function logsRss(user, logs) {
const feed = new Feed({
title: `Nyaa.si ${user} comment feed`,
id: `https://nyaa.si/user/${user}`,
link: `https://nyaa.si/user/${user}`,
description: `Comment feed for nyaa.si user ${user}`
})
logs.forEach(({ title, link, avatar, content, date, user, commentLink }) =>
feed.addItem({
title: `${user} on ${title}`,
image: xmlEscape(avatar), // the `feed` lib is baad
date,
link: `https://nyaa.si${link}${commentLink}`,
author: [{
name: user,
email: '[email protected]',
link: `https://nyaa.si/user/${user}`
}],
content,
description: content
})
)
return feed.atom1()
}
async function updateState(user) {
let {
state: cached = {},
logs = []
} = cache.get(user) || {}
const { status, state } = await getStateFor(user)
if (!status) return
const diff = commentsDiff(cached, state)
const update = (await Promise.all(
diff.map(obj => getComments(obj.link).then(res => [obj, res]))
))
.filter(([, { status }]) => status === 'ok')
.map(([obj, { comments }]) => ({ ...obj, comments }))
const newLogs =
update.map(({ comments, ...torrFields }) => comments.map(comm => ({ ...torrFields, ...comm })))
.reduce((acc, cur) => [...acc, ...cur], []) // Array.prototype.flat()
logs = [...newLogs, ...logs]
.sort(({ date: d1 }, { date: d2 }) => d2 - d1)
cache.set(user, { state, logs })
}
UPLOADERS.reduce(async (promise, user) => {
await promise
const fn = async () => {
console.log(`Updating state for ${user}...`)
await updateState(user)
console.log(`Updated state for ${user}.`)
}
setImmediate(fn)
setInterval(fn, UPDATE_INTERVAL)
await wait(UPDATE_INTERVAL / UPLOADERS.length)
}, Promise.resolve())
/// network in
app.get('/user/:user', async (req, res) => {
const { user } = req.params
const { logs } = cache.get(user) || {}
if (!logs)
return res.sendStatus(500)
res.set('Content-Type', 'application/rss+xml');
res.send(logsRss(user, logs))
})
app.listen(PORT, () => console.log(`App running on port ${PORT}!`))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment