Last active
September 25, 2023 09:52
-
-
Save hn-support/b70d33d870981f5d6109b58900379643 to your computer and use it in GitHub Desktop.
A curated list of user agents that use lot's of resources but don't cause more conversion or add any value to most of the webshops
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
360Spider | |
80legs\.com | |
ADmantX | |
Abonti | |
AcoonBot | |
Acunetix | |
AddThis\.com | |
AhrefsBot | |
AngloINFO | |
Antelope | |
Applebot | |
BLP_bbot | |
BOT\ for\ JCE | |
BaiduSpider | |
BeetleBot | |
BlackWidow | |
BoardReader | |
Bolt\ 0 | |
Bot\ mailto\:craftbot@yahoo\.com | |
CCBot | |
CPython | |
CRAZYWEBCRAWLER | |
CWS_proxy | |
CazoodleBot | |
ChinaClaw | |
Clerkbot | |
Cliqzbot | |
CommonCrawler | |
Crawlera | |
Curious | |
Custo | |
DIIbot | |
DISCo | |
DTS\.Agent | |
Dataprovider | |
Default\ Browser\ 0 | |
DeuSu | |
DigExt | |
Digincore | |
DoCoMo | |
DotBot | |
Download\ Demon | |
EasouSpider | |
EirGrabber | |
Elmer | |
EmailCollector | |
EmailSiphon | |
EmailWolf | |
Exabot | |
ExaleadCloudView | |
ExpertSearch | |
Express\ WebPictures | |
ExtractorPro | |
EyeNetIE | |
Ezooms | |
F2S | |
FHscan | |
FastSeek | |
FeedlyBot | |
Flamingo_SearchEngine | |
FlappyBot | |
FlashGet | |
GTB5 | |
GT\:\:WWW | |
GetRight | |
GetWeb\! | |
GigablastOpenSource | |
Go\!Zilla | |
Go\-Ahead\-Got\-It | |
GozaikBot | |
GrabNet | |
Grafula | |
Gluten Free Crawler | |
Guzzle | |
HMView | |
HTTP\:\:Lite | |
HTTrack | |
HomePageBot | |
HubSpot | |
IDBot | |
IRLbot | |
ISC\ Systems\ iRc\ Search\ 2\.1 | |
IlseBot | |
Image\ Stripper | |
Image\ Sucker | |
Indigonet | |
Indy\ Library | |
InterGET | |
InternetSeer\.com | |
Internet\ Ninja | |
JOC\ Web\ Spider | |
Java | |
JetCar | |
JobdiggerSpider | |
Jooblebot | |
KINGSpider | |
LeechFTP | |
LeguideBot | |
Lingewoud | |
Linguee | |
LinkChecker | |
LinksCrawler | |
LinksManager\.com_bot | |
LinqiaRSSBot | |
LivelapBot | |
LubbersBot | |
MFC_Tear_Sample | |
MIDown\ tool | |
MSFrontPage | |
Mail\.RU | |
Mass\ Downloader | |
Maxthon | |
Mediatoolkitbot | |
MegaIndex | |
Microsoft\ URL\ Control | |
Missigua\ Locator | |
Mister\ PiX | |
Mozilla.*Indy | |
Mozilla.*NEWT | |
Navroad | |
NearSite | |
NetAnts | |
NetSpider | |
NetZIP | |
Net\ Vampire | |
NextGenSearchBot | |
Octopus | |
Offline\ Explorer | |
Offline\ Navigator | |
OpenWebSpider | |
OpenindexSpider | |
OrangeBot | |
Owlin | |
PECL\:\:HTTP | |
PHPCrawl | |
PNAMAIN\.EXE | |
PageGrabber | |
PagesInventory | |
Papa\ Foto | |
PeoplePal | |
Photon | |
PleaseCrawl | |
PodcastPartyBot | |
Quantify | |
QuerySeekerSpider | |
R6_CommentReader | |
R6_FeedFetcher | |
RSSingBot | |
ReGet | |
RealDownload | |
Riddler | |
Rippers\ 0 | |
RyzeCrawler | |
SBIder | |
SEOkicks | |
SISTRIX | |
SafeSearch | |
Scrapy | |
Screaming | |
SeaMonkey | |
SearchmetricsBot | |
SemrushBot | |
SentiBot | |
Seznam | |
ShowyouBot | |
SightupBot | |
Site24x7 | |
SiteSnagger | |
Slackbot | |
Slurp | |
SmartDownload | |
Snoopy | |
Sogou | |
Sosospider | |
Steeler | |
SuperBot | |
SuperHTTP | |
Superfeedr | |
SurdotlyBot | |
Surfbot | |
Teleport\ Pro | |
TinEye-bot | |
Toata\ dragostea\ mea\ pentru\ diavola | |
Toplistbot | |
TurnitinBot | |
Twenga | |
Twitterbot | |
URI\:\:Fetch | |
Vagabondo | |
VoidEYE | |
VoilaBot | |
WBSearchBot | |
WEP\ Search | |
WPScan | |
WWWOFFLE | |
WWW\-Mechanize | |
WeSEE | |
WebAuto | |
WebBandit | |
WebCollage | |
WebCopier | |
WebFetch | |
WebGo\ IS | |
WebLeacher | |
WebReaper | |
WebSauger | |
WebStripper | |
WebWhacker | |
WebZIP | |
Web\ Image\ Collector | |
Web\ Sucker | |
Website\ Quester | |
Website\ eXtractor | |
Wells\ Search\ II | |
Widow | |
WinInet | |
Wotbox | |
Xaldon\ WebSpider | |
XoviBot | |
Yahoo | |
Yandex | |
YisouSpider | |
Zeus | |
ZmEu | |
ZumBot | |
ZyBorg | |
adbeat_bot | |
adidxbot | |
billigerbot | |
binlar | |
bitlybot | |
casper | |
checkprivacy | |
chromeframe | |
clshttp | |
comodo | |
crawler4j | |
diavol | |
discobot | |
eCatch | |
ecxi | |
ers\.net | |
extract | |
feedfinder | |
finbot | |
flicky | |
g00g1e | |
genieo | |
grab | |
harvest | |
heritrix | |
ia_archiver | |
icarus6 | |
id\-search | |
integromedb | |
jakarta | |
kanagawa | |
kmccrew | |
larbin | |
libwww | |
linkdexbot | |
linkwalker | |
ltx71 | |
lwp\-trivial | |
masscan | |
maverick | |
megaindex | |
microsoft\.url | |
miner | |
mj12bot | |
netEstate | |
nutch | |
panopta | |
panscient\.com | |
pavuk | |
pcBrowser | |
planetwork | |
prijsbest | |
psbot | |
purebot | |
pycurl | |
rogerbot | |
search.goo\.ne\.jp | |
search_robot | |
seoscann | |
sitecheck\.internetseer\.com | |
siteexplorer\.info | |
skygrid | |
spaumbot | |
spbot | |
sucker | |
tAkeOut | |
trendictionbot | |
turnit | |
urllib | |
vikspider | |
webalta | |
woobot | |
woopingbot | |
worldwebheritage\.org | |
yacybot | |
zermelo | |
zh-CN |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check for blocklist existence | |
if [ ! -f ./blocklist.txt ] ; then | |
echo "Blocklist ./blocklist.txt not found! Please download it first from https://git.io/vySHE" | |
exit 0 | |
fi | |
# Create config snippet | |
echo -e "## This is a list with collected useragents of bots that are eating your resources." | |
echo -e "\n" | |
echo -e "RewriteEngine On" | |
for line in $(cat blocklist.txt | grep -Ev '(^#|^$)' ); do | |
AGENT="$( echo $line | sed -e 's/\ /\\ /g" )" | |
echo "RewriteCond %{HTTP_USER_AGENT} $AGENT [OR]" | |
done | |
echo "RewriteCond %{HTTP_USER_AGENT} ^-?$ [NC]" | |
echo "RewriteRule .* - [F,L]" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check for blocklist existence | |
if [ ! -f ./blocklist.txt ] ; then | |
echo "Blocklist ./blocklist.txt not found! Please download it first from https://git.io/vySHE" | |
exit 0 | |
fi | |
## Get all useragent's from file, remove empty lines and replace the \n after each agent string with a pipe. | |
USERAGENTS="$( cat ./blocklist.txt | grep -vE '^#|^$' | tr "\n" "|" )" | |
## Escape spaces to '\ ' and remove the ending pipe | |
REGEX="$( echo $USERAGENTS | sed -e 's/\ /\\ /g' | sed -e 's/|$//' )" | |
## Form snippet | |
echo -e "if (\$http_user_agent ~* ($REGEX) ) {" | |
echo -e " return 410;" | |
echo -e "}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment