Last active
April 19, 2026 13:51
-
-
Save htlin222/ec650324c812d466466ff42cb034f1b5 to your computer and use it in GitHub Desktop.
daily_hema_onc_rss_digest.toml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # ── RSS 來源 ────────────────────────────────────────────── | |
| [[rss.feeds]] | |
| name = "JCO (ASCO Pubs)" | |
| url = "https://ascopubs.org/action/showFeed?type=etoc&feed=rss&jc=jco" | |
| [[rss.feeds]] | |
| name = "NEJM Hematology-Oncology" | |
| url = "https://onesearch-rss.nejm.org/api/specialty/rss?context=nejm&specialty=hematology-oncology" | |
| [[rss.feeds]] | |
| name = "PubMed Custom Search" | |
| url = "https://pubmed.ncbi.nlm.nih.gov/rss/search/1L5AT7N6rGvBm3-KX3KlG8UZrXSOhP8AGNCuYYH-ucz8SijUCJ/?limit=50&utm_campaign=pubmed-2&fc=20240727041330" | |
| # ── Cloudflare 防護繞過策略 ───────────────────────────────── | |
| [rss.cloudflare_bypass] | |
| description = "針對 Atypon / Cloudflare 保護的學術出版商網站(ascopubs.org、academic.oup.com、onlinelibrary.wiley.com、nejm.org 等),禁止使用預設 header 的裸 HTTP client" | |
| [rss.cloudflare_bypass.headers] | |
| user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36" | |
| accept_rss = "application/rss+xml, application/xml;q=0.9, */*;q=0.8" | |
| accept_html = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" | |
| accept_language = "en-US,en;q=0.9" | |
| accept_encoding = "gzip, deflate, br" | |
| [rss.cloudflare_bypass.behavior] | |
| set_referer_same_origin = true # e.g. "https://ascopubs.org/loi/jco" | |
| enable_cookie_jar = true # 保留 cf_clearance token | |
| retry_on_403 = true | |
| retry_count = 1 | |
| retry_backoff_seconds = [2, 5] # 隨機 2–5 秒 | |
| fallback_to_headless = true # Playwright / Puppeteer + stealth plugin | |
| cache_minutes = 10 # 避免重複請求觸發 429 | |
| [rss.cloudflare_bypass.curl_example] | |
| command = ''' | |
| curl -sS --compressed \ | |
| -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36" \ | |
| -H "Accept: application/rss+xml, application/xml;q=0.9, */*;q=0.8" \ | |
| -H "Accept-Language: en-US,en;q=0.9" \ | |
| -e "https://ascopubs.org/loi/jco" \ | |
| -b /tmp/asco.jar -c /tmp/asco.jar \ | |
| "https://ascopubs.org/action/showFeed?type=etoc&feed=rss&jc=jco" | |
| ''' | |
| [rss.cloudflare_bypass.python_example] | |
| code = ''' | |
| import httpx | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " | |
| "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36", | |
| "Accept": "application/rss+xml, application/xml;q=0.9, */*;q=0.8", | |
| "Accept-Language": "en-US,en;q=0.9", | |
| "Referer": "https://ascopubs.org/loi/jco", | |
| } | |
| with httpx.Client(headers=headers, http2=True, follow_redirects=True, timeout=20) as c: | |
| r = c.get("https://ascopubs.org/action/showFeed?type=etoc&feed=rss&jc=jco") | |
| r.raise_for_status() | |
| feed_xml = r.text | |
| ''' | |
| # ── 內容處理規則 ───────────────────────────────────────── | |
| [rss.content_rules] | |
| filter_window_hours = 24 # 僅處理過去 24 小時內更新的項目 | |
| [rss.content_rules.output] | |
| prompt = """ | |
| 僅處理過去 24 小時內更新的項目。收集所有符合條件的文章後,產出一組重點摘要,每篇附上短評:「嘻嘻」為正面評價,「不嘻嘻」為搞笑吐槽。必須完整涵蓋所有符合條件的文章,不可省略。 | |
| """ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment