Skip to content

Instantly share code, notes, and snippets.

@Sonictherocketman
Created September 26, 2025 22:02
Show Gist options
  • Select an option

  • Save Sonictherocketman/606fcded0e511b92224c6deee86faa24 to your computer and use it in GitHub Desktop.

Select an option

Save Sonictherocketman/606fcded0e511b92224c6deee86faa24 to your computer and use it in GitHub Desktop.
A script to parse Caddy files for RSS feed subscribers.
#! /usr/bin/env python3
import argparse
from collections import Counter, defaultdict
import json
import re
import sys
FEED_URIS = (
'/rss.xml',
'/feed.json',
)
BROWSERS = (
'iPhone',
'iPad',
'Android',
'Linux',
'Windows NT',
'Macintosh',
)
# Services which are either self-hosted or desktop that do not report
# subscriber counts, so we estimate by unique IPs. These are processed
# in order so more general user agents (browser versions, etc) go last.
INDIVIDUAL_SERVICES = (
'NetNewsWire',
'Unread',
'Tiny Tiny RSS',
'kurtmckee/feedparser',
'Googlebot',
'YandexBot',
'FreshRSS',
'Reeder',
'Miniflux',
'UniversalFeedParser',
'curl',
'SimplePie',
'FeedsFun',
'Twingly',
'RSS.Social',
'SpaceCowboys',
'FreedomController',
'Newsflash',
'Yarr',
'Micro.blog',
'fluent-reader',
'FeedDiscovery',
)
FRIENDLY_SERVICES = {
'NewsBlur Feed Fetcher': 'NewsBlur Feed Fetcher - ([0-9]+) subscribers',
'Inoreader': 'Inoreader/[0-9\.]+ \(\+http://www.inoreader.com/feed-fetcher; ([0-9]+) subscribers; \)',
'Feedbin': 'Feedbin feed-id:[0-9]+ - ([0-9]+) subscribers',
'Feedly': 'Feedly/[0-9\.]+ \(\+http://www.feedly.com/fetcher.html; ([0-9]+) subscribers; \)',
'BazQux': 'Mozilla/5.0 \(compatible; BazQux/[0-9\.]+; +https://bazqux.com/fetcher; ([0-9]+) subscribers\)',
'TheOldReader': 'Mozilla/[0-9\.]+ \(compatible; theoldreader.com; .* ([0-9]+) subscribers; feed-id=(.*)',
}
BOTS = {
'Newslitbot',
'Googlebot',
'athena-spider',
'rawweb-bot',
'YandexBot',
'amazonbot',
'dataforseo-bot',
'kagibot',
'Go-http-client/',
'applebot',
'trafilatura',
'DnBCrawler-Analytics',
'AhrefsBot',
'Xobaque',
'rss2tg_crawler',
'ScourRSSBot',
'sumi.news',
'Buck/',
'ifastnet.com',
'Rome Client',
'hackney/1.24.1',
'Apache-HttpClient',
'yakread.com',
}
def parse_args():
parser = argparse.ArgumentParser('Collect info about RSS feed subs')
parser.add_argument('input', type=argparse.FileType('r'), default=sys.stdin)
parser.add_argument('--include-browsers', action='store_true', default=False)
return parser.parse_args()
def main():
args = parse_args()
counts = defaultdict(int)
for line in args.input:
data = json.loads(line)
uri = data['request']['uri']
if uri not in FEED_URIS:
continue
ip = data['request']['remote_ip']
user_agent = data['request']['headers'].get('User-Agent', ['none'])[0]
if any(bot for bot in BOTS if bot in user_agent):
# Ignore bots
continue
# Try Cloud Services
try:
service = [
service for service in FRIENDLY_SERVICES.keys()
if service.lower() in user_agent.lower()
][0]
except IndexError:
# This URL was not a friendly service. Try another method.
pass
else:
regex = FRIENDLY_SERVICES[service]
match = re.search(regex, user_agent)
if match:
try:
key = (service, uri, match.group(2))
except IndexError:
key = (service, uri, '')
counts[key] = int(match.group(1))
continue
# Try Known Self-Hosted
try:
service = [
service for service in INDIVIDUAL_SERVICES
if service.lower() in user_agent.lower()
][0]
except IndexError:
pass
else:
key = (service, uri, ip)
counts[key] = 1
continue
# Try Others
try:
service = [
service for service in BROWSERS
if service.lower() in user_agent.lower()
][0]
is_browser = True
except IndexError:
is_browser = False
if args.include_browsers and is_browser:
key = (service, uri, ip)
counts[key] = 1
elif not args.include_browsers and not is_browser:
key = (user_agent, uri, ip)
counts[key] = 1
aggregated = defaultdict(int)
for key, count in counts.items():
agent, uri, _ = key
aggregated[agent] += count
counter = Counter(aggregated)
print('Results:')
print('-----------------------')
print(f'Total Subscribers: {counter.total()}')
print('-----------------------')
print('Subs.\tUser-Agent')
print('-----------------------')
for item, count in counter.most_common():
print(f'{count}\t{item}')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment