wuxmedia · February 11, 2019 09:46
diff --git a/wget.txt b/wget.txt
 wget --spider -o wget.log -e robots=off -r -l 5 -p -S --header="X-Bypass-Cache: 1"  --limit-rate=200k live-mysite.gotpantheon.com

 # Options explained
 # --spider: Crawl the site
 # -o wget.log: Keep the log
 # -e robots=off: Ignore robots.txt
 # -r: specify recursive download
 # -l 5: Depth to search. I.e 1 means 'crawl the homepages'.  2 means 'crawl the homepage and all pages it links to'...
 # -p: get all images, etc. needed to display HTML page
 # -S: print server response
 # --header="X-Bypass-Cache: 1": Set a header (this one bypasses Varnish cache)
 # live-mysite.gotpantheon.com: URL to start crawling
	wget --spider -o wget.log -e robots=off -r -l 5 -p -S --header="X-Bypass-Cache: 1" --limit-rate=200k live-mysite.gotpantheon.com

	# Options explained
	# --spider: Crawl the site
	# -o wget.log: Keep the log
	# -e robots=off: Ignore robots.txt
	# -r: specify recursive download
	# -l 5: Depth to search. I.e 1 means 'crawl the homepages'. 2 means 'crawl the homepage and all pages it links to'...
	# -p: get all images, etc. needed to display HTML page
	# -S: print server response
	# --header="X-Bypass-Cache: 1": Set a header (this one bypasses Varnish cache)
	# live-mysite.gotpantheon.com: URL to start crawling