Skip to content

Instantly share code, notes, and snippets.

@zegomesjf
Created June 1, 2020 19:56

Revisions

  1. zegomesjf created this gist Jun 1, 2020.
    485 changes: 485 additions & 0 deletions robots.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,485 @@
    "(^| )sentry\\/"
    "[wW]get"
    "^Apache-HttpClient"
    "^curl"
    "^LCC"
    "007ac9 Crawler"
    "2ip.ru"
    "360Spider"
    "A6-Indexer"
    "Aboundex"
    "acapbot"
    "acoonbot"
    "adbeat_bot"
    "AddSearchBot"
    "AddThis"
    "Adidxbot"
    "ADmantX"
    "AdsBot-Google-Mobile"
    "AdsBot-Google([^-]|$)"
    "adscanner"
    "AdsTxtCrawler"
    "AdvBot"
    "AHC\\/"
    "Ahrefs(Bot|SiteAudit)"
    "aiHitBot"
    "AISearchBot"
    "AlphaBot"
    "Amazon CloudFront"
    "Amazonbot"
    "AndersPinkBot"
    "antibot"
    "AnyEvent"
    "Apercite"
    "APIs-Google"
    "AppEngine-Google"
    "AppInsights"
    "Applebot"
    "arabot"
    "archive.org_bot"
    "ArchiveBot"
    "AspiegelBot"
    "Atom Feed Robot"
    "AwarioRssBot"
    "AwarioSmartBot"
    "awesomecrawler"
    "axios"
    "B2B Bot"
    "backlinkcrawler"
    "Baidu-YunGuanCe"
    "Baiduspider"
    "Bark[rR]owler"
    "BazQux"
    "BDCbot"
    "BehloolBot"
    "betaBot"
    "bidswitchbot"
    "BIGLOTRON"
    "bingbot"
    "BingPreview\\/"
    "binlar"
    "BitBot"
    "bitlybot"
    "Blackboard"
    "BLEXBot\\/"
    "blogmuraBot"
    "BlogTraffic\\/\\d\\.\\d+ Feed-Fetcher"
    "BLP_bbot"
    "bnf.fr_bot"
    "BomboraBot"
    "bot-pge.chlooe.com"
    "Bot.AraTurka.com"
    "botify"
    "BoxcarBot"
    "brainobot"
    "BrandONbot"
    "BrandVerity"
    "BTWebClient"
    "BUbiNG"
    "BublupBot"
    "Buck\\/"
    "buzzbot"
    "Bytespider"
    "Caliperbot"
    "CapsuleChecker"
    "careerbot"
    "CC Metadata Scaper"
    "CCBot"
    "changedetection"
    "check_http"
    "CheckMarkNetwork\\/"
    "Chrome-Lighthouse"
    "Cincraw"
    "citeseerxbot"
    "Clickagy"
    "Cliqzbot\\/"
    "CloudFlare-AlwaysOnline"
    "coccoc"
    "collection@infegy.com"
    "Companybook-Crawler"
    "content crawler spider"
    "ContextAd Bot"
    "contxbot"
    "convera"
    "crawler4j"
    "CrunchBot"
    "CrystalSemanticsBot"
    "Curebot"
    "Cutbot"
    "cXensebot"
    "CyberPatrol"
    "DareBoost"
    "Datafeedwatch"
    "datagnionbot"
    "Datanyze"
    "Dataprovider.com"
    "Daum\\/"
    "dcrawl"
    "deadlinkchecker"
    "DeuSu\\/"
    "Diffbot\\/"
    "Digg Deeper"
    "Digincore bot"
    "discobot"
    "Discordbot"
    "Disqus"
    "DnyzBot"
    "Domain Re-Animator Bot"
    "domaincrawler"
    "Domains Project\\/"
    "DomainStatsBot"
    "dotbot"
    "Dragonbot"
    "drupact"
    "Dubbotbot"
    "DuckDuckBot"
    "DuckDuckGo-Favicons-Bot"
    "ec2linkfinder"
    "edisterbot"
    "electricmonk"
    "elisabot"
    "Embedly"
    "epicbot"
    "eright"
    "europarchive.org"
    "EveryoneSocialBot"
    "exabot"
    "Experibot"
    "ExtLinksBot"
    "Eyeotabot"
    "EZID"
    "ezooms"
    "facebookexternalhit"
    "Facebot"
    "FAST Enterprise Crawler"
    "FAST-WebCrawler"
    "fedoraplanet"
    "feedbot"
    "Feedfetcher-Google"
    "Feedly"
    "Feedspot"
    "FeedValidator"
    "FemtosearchBot"
    "Fetch\\/"
    "Fever"
    "filterdb.iss.net\\/crawler"
    "FindITAnswersbot"
    "findlink"
    "findthatfile"
    "findxbot"
    "Flamingo_SearchEngine"
    "FlipboardProxy"
    "fluffy"
    "fr-crawler"
    "FreeWebMonitoring SiteChecker"
    "FreshRSS"
    "Friendica"
    "fuelbot"
    "Fyrebot"
    "g00g1e.net"
    "G2 Web Services"
    "g2reader-bot"
    "GarlikCrawler"
    "Genieo"
    "Gigablast"
    "Gigabot"
    "GingerCrawler"
    "Gluten Free Crawler\\/"
    "gnam gnam spider"
    "GnowitNewsbot"
    "Go-http-client"
    "Google Favicon"
    "Google Web Preview"
    "Google-Adwords-Instant"
    "Google-PhysicalWeb"
    "Google-Site-Verification"
    "Google-Structured-Data-Testing-Tool"
    "google-xrawler"
    "Googlebot-Image"
    "Googlebot-Mobile"
    "Googlebot-News"
    "Googlebot-Video"
    "Googlebot\\/"
    "Gowikibot"
    "GrapeshotCrawler"
    "Grobbot"
    "GroupHigh\\/"
    "grub.org"
    "gslfbot"
    "Gwene"
    "Hatena"
    "HeadlessChrome"
    "heritrix"
    "http_get"
    "httpunit"
    "HttpUrlConnection"
    "HTTrack"
    "HubSpot"
    "ia_archiver"
    "IAS crawler"
    "ICBot\\/"
    "ICC-Crawler"
    "ichiro"
    "imrbot"
    "IndeedBot"
    "infoobot"
    "integromedb"
    "intelium_bot"
    "InterfaxScanBot"
    "ip-web-crawler.com"
    "ips-agent"
    "iskanie"
    "IstellaBot"
    "it2media-domain-crawler"
    "James BOT"
    "Jamie's Spider"
    "Jetslide"
    "Jetty"
    "JobboerseBot"
    "Jooblebot"
    "jpg-newsbot"
    "Jugendschutzprogramm-Crawler"
    "jyxobot"
    "K7MLWCBot"
    "Kemvibot"
    "KosmioBot"
    "Landau-Media-Spider"
    "Laserlikebot"
    "lb-spider"
    "Leikibot"
    "libwww-perl"
    "Linguee Bot"
    "linkapediabot"
    "LinkArchiver"
    "linkdex"
    "LinkedInBot"
    "LinkisBot"
    "lipperhey"
    "Livelap[bB]ot"
    "lssbot"
    "lssrocketcrawler"
    "ltx71"
    "Luminator-robots"
    "magpie-crawler"
    "Mail.RU_Bot"
    "mappydata"
    "Mastodon"
    "MauiBot"
    "MBCrawler"
    "Mediapartners \\(Googlebot\\)"
    "Mediapartners-Google"
    "Mediatoolkitbot"
    "MegaIndex"
    "MeltwaterNews"
    "memorybot"
    "MetaJobBot"
    "MetaURI"
    "mindUpBot"
    "Miniflux\\/"
    "MixnodeCache\\/"
    "MJ12bot"
    "mlbot"
    "moatbot"
    "MojeekBot\\/"
    "MoodleBot"
    "Moreover"
    "msnbot"
    "msrbot"
    "MuckRack"
    "Multiviewbot"
    "NAVER Blog Rssbot"
    "NerdByNature.Bot"
    "nerdybot"
    "NetcraftSurveyAgent"
    "netEstate NE Crawler"
    "Neticle Crawler"
    "netresearchserver"
    "Netvibes"
    "newsharecounts"
    "newspaper\\/"
    "NextCloud"
    "niki-bot"
    "Nimbostratus-Bot"
    "NING\\/"
    "NINJA bot"
    "NIXStatsbot"
    "Nmap Scripting Engine"
    "NTENTbot"
    "nutch"
    "Nuzzel"
    "Ocarinabot"
    "officestorebot"
    "okhttp"
    "omgili"
    "online-webceo-bot"
    "OpenHoseBot"
    "openindexspider"
    "OrangeBot\\/"
    "outbrain"
    "OutclicksBot"
    "page2rss"
    "PagePeeker\\/"
    "Pandalytics"
    "panscient"
    "PaperLiBot"
    "Pcore-HTTP"
    "PhantomJS"
    "phpcrawl"
    "pingdom"
    "pinterest.com.bot"
    "PiplBot"
    "PocketParser"
    "postrank"
    "PR-CY.RU"
    "Primalbot"
    "PrivacyAwareBot"
    "proximic"
    "psbot"
    "Pulsepoint"
    "purebot"
    "python-requests"
    "Python-urllib"
    "Qwantify"
    "RankActiveLinkBot"
    "redditbot"
    "Refindbot"
    "RegionStuttgartBot"
    "RetrevoPageAnalyzer"
    "RidderBot"
    "Rivva"
    "rogerbot"
    "rssbot\\/"
    "RSSingBot"
    "RyteBot"
    "S[eE][mM]rushBot"
    "SafeDNSBot"
    "SafeSearch microdata crawler"
    "SBL-BOT"
    "ScoutJet"
    "Scrapy"
    "Screaming Frog SEO Spider"
    "scribdbot"
    "SearchAtlas"
    "seekbot"
    "Seekport Crawler"
    "seewithkids"
    "semanticbot"
    "SemanticScholarBot"
    "SentiBot"
    "SeobilityBot"
    "SEOkicks"
    "seoscanners"
    "SerendeputyBot"
    "serpstatbot\\/"
    "seznambot"
    "SimpleCrawler"
    "SimplePie"
    "SimpleScraper"
    "sistrix crawler"
    "sitebot"
    "siteexplorer.info"
    "Siteimprove.com"
    "SkypeUriPreview"
    "Slack-ImgProxy"
    "Slackbot"
    "Slurp"
    "smtbot"
    "Snacktory"
    "SocialRankIOBot"
    "Sogou"
    "Sonic"
    "spbot"
    "speedy"
    "startmebot\\/"
    "StorygizeBot"
    "Streamline3Bot\\/"
    "summify"
    "Superfeedr"
    "SurdotlyBot"
    "SurveyBot"
    "SWIMGBot"
    "Sysomos"
    "Taboolabot"
    "tagoobot"
    "TangibleeBot"
    "TelegramBot"
    "Teoma"
    "theoldreader.com"
    "Thinklab"
    "tigerbot"
    "TinEye"
    "Tiny Tiny RSS"
    "toplistbot"
    "ToutiaoSpider"
    "Traackr.com"
    "tracemyfile"
    "trendictionbot"
    "TrendsmapResolver"
    "Trove"
    "TurnitinBot"
    "tweetedtimes"
    "TweetmemeBot"
    "twengabot"
    "Twingly"
    "Twitterbot"
    "Twurly"
    "um-LN"
    "Upflow"
    "Uptimebot\\.org"
    "UptimeRobot"
    "urlappendbot"
    "UsineNouvelleCrawler"
    "UT-Dorkbot"
    "Validator\\.nu"
    "vebidoobot"
    "VelenPublicWebCrawler"
    "Veoozbot"
    "Vigil\\/"
    "VKRobot"
    "vkShare"
    "voilabot"
    "VoluumDSP-content-bot"
    "W3C_CSS_Validator"
    "W3C_I18n-Checker"
    "W3C_Unicorn"
    "W3C_Validator"
    "W3C-checklink"
    "W3C-mobileOK"
    "wbsearchbot"
    "web-archive-net.com.bot"
    "webcompanycrawler"
    "WebDataStats"
    "webmon"
    "WeSEE:Search"
    "WhatsApp"
    "wocbot"
    "woobot"
    "WordupInfoSearch"
    "woriobot"
    "wotbox"
    "www\\.uptime\\.com"
    "Xenu Link Sleuth"
    "xovibot"
    "Y!J"
    "yacybot"
    "Yahoo Link Preview"
    "YaK\\/"
    "YandexAccessibilityBot"
    "YandexBot"
    "YandexImageResizer"
    "YandexImages"
    "YandexMetrika"
    "YandexMobileBot"
    "YandexTurbo"
    "YandexVideoParser"
    "yanga"
    "Yeti"
    "YisouSpider"
    "yoozBot"
    "Zabbix"
    "zenback bot"
    "zgrab"
    "ZoomBot"
    "ZoominfoBot"
    "ZumBot"
    "ZuperlistBot\\/"