Skip to content

Instantly share code, notes, and snippets.

@htlin222
Last active April 19, 2026 13:51
Show Gist options
  • Select an option

  • Save htlin222/ec650324c812d466466ff42cb034f1b5 to your computer and use it in GitHub Desktop.

Select an option

Save htlin222/ec650324c812d466466ff42cb034f1b5 to your computer and use it in GitHub Desktop.
daily_hema_onc_rss_digest.toml
# ── RSS 來源 ──────────────────────────────────────────────
[[rss.feeds]]
name = "JCO (ASCO Pubs)"
url = "https://ascopubs.org/action/showFeed?type=etoc&feed=rss&jc=jco"
[[rss.feeds]]
name = "NEJM Hematology-Oncology"
url = "https://onesearch-rss.nejm.org/api/specialty/rss?context=nejm&specialty=hematology-oncology"
[[rss.feeds]]
name = "PubMed Custom Search"
url = "https://pubmed.ncbi.nlm.nih.gov/rss/search/1L5AT7N6rGvBm3-KX3KlG8UZrXSOhP8AGNCuYYH-ucz8SijUCJ/?limit=50&utm_campaign=pubmed-2&fc=20240727041330"
# ── Cloudflare 防護繞過策略 ─────────────────────────────────
[rss.cloudflare_bypass]
description = "針對 Atypon / Cloudflare 保護的學術出版商網站(ascopubs.org、academic.oup.com、onlinelibrary.wiley.com、nejm.org 等),禁止使用預設 header 的裸 HTTP client"
[rss.cloudflare_bypass.headers]
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36"
accept_rss = "application/rss+xml, application/xml;q=0.9, */*;q=0.8"
accept_html = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
accept_language = "en-US,en;q=0.9"
accept_encoding = "gzip, deflate, br"
[rss.cloudflare_bypass.behavior]
set_referer_same_origin = true # e.g. "https://ascopubs.org/loi/jco"
enable_cookie_jar = true # 保留 cf_clearance token
retry_on_403 = true
retry_count = 1
retry_backoff_seconds = [2, 5] # 隨機 2–5 秒
fallback_to_headless = true # Playwright / Puppeteer + stealth plugin
cache_minutes = 10 # 避免重複請求觸發 429
[rss.cloudflare_bypass.curl_example]
command = '''
curl -sS --compressed \
-A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36" \
-H "Accept: application/rss+xml, application/xml;q=0.9, */*;q=0.8" \
-H "Accept-Language: en-US,en;q=0.9" \
-e "https://ascopubs.org/loi/jco" \
-b /tmp/asco.jar -c /tmp/asco.jar \
"https://ascopubs.org/action/showFeed?type=etoc&feed=rss&jc=jco"
'''
[rss.cloudflare_bypass.python_example]
code = '''
import httpx
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
"Accept": "application/rss+xml, application/xml;q=0.9, */*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Referer": "https://ascopubs.org/loi/jco",
}
with httpx.Client(headers=headers, http2=True, follow_redirects=True, timeout=20) as c:
r = c.get("https://ascopubs.org/action/showFeed?type=etoc&feed=rss&jc=jco")
r.raise_for_status()
feed_xml = r.text
'''
# ── 內容處理規則 ─────────────────────────────────────────
[rss.content_rules]
filter_window_hours = 24 # 僅處理過去 24 小時內更新的項目
[rss.content_rules.output]
prompt = """
僅處理過去 24 小時內更新的項目。收集所有符合條件的文章後,產出一組重點摘要,每篇附上短評:「嘻嘻」為正面評價,「不嘻嘻」為搞笑吐槽。必須完整涵蓋所有符合條件的文章,不可省略。
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment