重點
- 要反代如
https://scholar.google.com.hk等的地區域名,而不是https://scholar.google.com - 需將客戶端原始 IP 回傳給 Google,否則會 404
- 不要用
http.cache,以免被當爬蟲被封 IP
| scholar.example.com { | |
| timeouts 5m | |
| proxy / https://scholar.google.com.hk { | |
| except /robots.txt /usercontent | |
| header_upstream X-Real-IP {remote} | |
| header_upstream X-Forwarded-For {remote} | |
| header_upstream User-Agent {>User-Agent} | |
| # header_upstream X-Real-IP {>CF-Connecting-IP} | |
| # header_upstream X-Forwarded-For {>CF-Connecting-IP} | |
| header_upstream Accept-Language zh-HK | |
| header_upstream Accept-Encoding identity | |
| insecure_skip_verify | |
| } | |
| proxy /usercontent https://scholar.googleusercontent.com { | |
| without /usercontent | |
| except /robots.txt | |
| header_upstream X-Real-IP {remote} | |
| header_upstream X-Forwarded-For {remote} | |
| header_upstream User-Agent {>User-Agent} | |
| # header_upstream X-Real-IP {>CF-Connecting-IP} | |
| # header_upstream X-Forwarded-For {>CF-Connecting-IP} | |
| header_upstream Accept-Language zh-HK | |
| header_upstream Accept-Encoding identity | |
| insecure_skip_verify | |
| } | |
| filter rule { | |
| content_type text/.* | |
| search_pattern scholar\.google\.com | |
| replacement scholar.example.com | |
| } | |
| filter rule { | |
| content_type text/.* | |
| search_pattern scholar\.googleusercontent\.com | |
| replacement scholar.example.com/usercontent | |
| } | |
| tls { | |
| dns cloudflare | |
| } | |
| } |