Created
May 21, 2019 01:25
-
-
Save mdvaldosta/38606b08197d2c8e68ccc3219cceee33 to your computer and use it in GitHub Desktop.
Blacklist Generator in Hosts Format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php header("Content-Type: text/plain"); | |
################################################### | |
############# Blocklist for router use ############ | |
################################################### | |
// Lists to use | |
$blocklists = array ( | |
// suspicious | |
'https://hosts-file.net/grm.txt', | |
'https://v.firebog.net/hosts/static/w3kbl.txt', | |
'https://reddestdream.github.io/Projects/MinimalHosts/etc/MinimalHostsBlocker/minimalhosts', | |
'https://someonewhocares.org/hosts/zero/hosts', | |
'https://raw.githubusercontent.com/Dawsey21/Lists/master/main-blacklist.txt', | |
// advertising | |
'https://adaway.org/hosts.txt', | |
'https://v.firebog.net/hosts/AdguardDNS.txt', | |
'https://raw.githubusercontent.com/anudeepND/blacklist/master/adservers.txt', | |
'https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt', | |
'https://hosts-file.net/ad_servers.txt', | |
'https://v.firebog.net/hosts/Easylist.txt', | |
'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts;showintro=0', | |
'https://raw.githubusercontent.com/StevenBlack/hosts/master/data/UncheckyAds/hosts', | |
'https://www.squidblacklist.org/downloads/dg-ads.acl', | |
'http://winhelp2002.mvps.org/hosts.txt', | |
'https://blocklist.site/app/dl/ads', | |
// tracking and telemetry | |
'https://v.firebog.net/hosts/Easyprivacy.txt', | |
'https://v.firebog.net/hosts/Prigent-Ads.txt', | |
'https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-blocklist.txt', | |
'https://raw.githubusercontent.com/StevenBlack/hosts/master/data/add.2o7Net/hosts', | |
'https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt', | |
'https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/android-tracking.txt', | |
'https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV.txt', | |
'https://v.firebog.net/hosts/Airelle-trc.txt', | |
// malicious | |
'https://s3.amazonaws.com/lists.disconnect.me/simple_malvertising.txt', | |
'https://mirror1.malwaredomains.com/files/justdomains', | |
'https://hosts-file.net/exp.txt', | |
'https://hosts-file.net/emd.txt', | |
'https://hosts-file.net/psh.txt', | |
'https://mirror.cedia.org.ec/malwaredomains/immortal_domains.txt', | |
'https://www.malwaredomainlist.com/hostslist/hosts.txt', | |
'https://bitbucket.org/ethanr/dns-blacklists/raw/8575c9f96e5b4a1308f2f12394abd86d0927a4a0/bad_lists/Mandiant_APT1_Report_Appendix_D.txt', | |
'https://v.firebog.net/hosts/Prigent-Malware.txt', | |
'https://v.firebog.net/hosts/Prigent-Phishing.txt', | |
'https://phishing.army/download/phishing_army_blocklist_extended.txt', | |
'https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-malware.txt', | |
'https://ransomwaretracker.abuse.ch/downloads/RW_DOMBL.txt', | |
'https://ransomwaretracker.abuse.ch/downloads/CW_C2_DOMBL.txt', | |
'https://ransomwaretracker.abuse.ch/downloads/LY_C2_DOMBL.txt', | |
'https://ransomwaretracker.abuse.ch/downloads/TC_C2_DOMBL.txt', | |
'https://ransomwaretracker.abuse.ch/downloads/TL_C2_DOMBL.txt', | |
'https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist', | |
'https://v.firebog.net/hosts/Shalla-mal.txt', | |
'https://raw.githubusercontent.com/StevenBlack/hosts/master/data/add.Risk/hosts', | |
'https://www.squidblacklist.org/downloads/dg-malicious.acl', | |
'https://raw.githubusercontent.com/HorusTeknoloji/TR-PhishingList/master/url-lists.txt', | |
'https://blocklist.site/app/dl/phishing', | |
// coinblocker | |
'https://zerodot1.gitlab.io/CoinBlockerLists/hosts', | |
); | |
// Manually add more | |
$blocklist_add = array ( | |
'xxx.com', | |
); | |
################################################### | |
############ Whitelist for router use ############# | |
################################################### | |
// Lists to use | |
$whitelists = array ( | |
'https://raw.githubusercontent.com/anudeepND/whitelist/master/domains/whitelist.txt', | |
'https://raw.githubusercontent.com/anudeepND/whitelist/master/domains/referral-sites.txt', | |
'https://raw.githubusercontent.com/anudeepND/whitelist/master/domains/optional-list.txt', | |
'https://raw.githubusercontent.com/SwissSkynet/whitelist/master/domains/whitelist.txt', | |
'https://raw.githubusercontent.com/PracticalChip/Adblocking/master/RawWhitelist.txt', | |
'https://raw.githubusercontent.com/raghavdua1995/DNSlock-PiHole-whitelist/master/whitelist.list', | |
'https://raw.githubusercontent.com/Nickwasused/Pihole-Whitelist/master/whitelist.txt', | |
'https://raw.githubusercontent.com/sml156/when_pigs_fly/master/whitelist.txt', | |
'https://gist.githubusercontent.com/mdvaldosta/8e0ec8dad06a303cec5afac02d25d831/raw', | |
); | |
// Manually add more | |
$whitelist_add = array ( | |
'hayesmedia.org', | |
); | |
// Don't ever whitelist these | |
$excludelist = array ( | |
'testdomain.com', | |
//'testdomain2.com', | |
); | |
//---------- LOGIC ------------// | |
if (empty($_GET)) | |
{ | |
header('HTTP/1.1 200 OK'); | |
// Do blocklist processing | |
$blocklist = get_blacklist($blocklists, $blocklist_add); | |
$whitelist = get_whitelist($whitelists, $whitelist_add, $excludelist); | |
// Remove whitelist items | |
$blacklist = array_diff($blocklist, $whitelist); | |
$saved = count($blocklist) - count($blacklist); | |
display_blacklist_info($blocklists, $blocklist_add, $blocklist); | |
display_whitelist_info($whitelists, $whitelist_add, $excludelist, $whitelist); | |
echo "# There are (".number_format(count($blacklist)).") total websites in the blacklist, (".number_format($saved).") were saved by the whitelist.\n\n"; | |
// Output in hosts format for Diversion | |
foreach ($blacklist as $item) echo '0.0.0.0 '.$item."\n"; | |
} | |
elseif ($_GET['display'] == 'blocklist') // display=blocklist | |
{ | |
header('HTTP/1.1 200 OK'); | |
$blocklist = get_blacklist($blocklists, $blocklist_add); | |
display_blacklist_info($blocklists, $blocklist_add, $blocklist); | |
foreach ($blocklist as $item) echo $item."\n"; | |
} | |
elseif ($_GET['display'] == 'whitelist') // display=whitelist | |
{ | |
header('HTTP/1.1 200 OK'); | |
$whitelist = get_whitelist($whitelists, $whitelist_add, $excludelist); | |
display_whitelist_info($whitelists, $whitelist_add, $excludelist, $whitelist); | |
foreach ($whitelist as $item) echo $item."\n"; | |
} | |
else | |
{ | |
header('HTTP/1.1 404 Not Found'); | |
echo '# Invalid request'; | |
} | |
################################################### | |
############# ----- FUNCTIONS ------ ############## | |
################################################### | |
// Display blacklist info | |
function display_blacklist_info($blocklists, $blocklist_add, $blocklist) | |
{ | |
echo "# There are (".number_format(count($blocklists)).") hosted blocklists + (".number_format(count($blocklist_add)).") additional sites for a total of (".number_format(count($blocklist)).") urls.\n"; | |
foreach ($blocklists as $list) echo "# ".$list."\n"; echo "\n"; | |
} | |
// Display whitelist info | |
function display_whitelist_info($whitelists, $whitelist_add, $excludelist, $whitelist) | |
{ | |
echo "# There are (".number_format(count($whitelists)).") hosted whitelists + (".number_format(count($whitelist_add)).") additional sites - (".number_format(count($excludelist)).") excluded sites for a total of (".number_format(count($whitelist)).") urls.\n"; | |
foreach ($whitelists as $list) echo "# ".$list."\n"; echo "\n"; | |
} | |
// Builds blacklist | |
function get_blacklist($blocklists, $blocklist_add) | |
{ | |
// Make clean lists | |
$blocklist = make_array($blocklists); | |
$blocklist = array_merge($blocklist, $blocklist_add); | |
// Remove duplicates | |
$blocklist = array_unique($blocklist); | |
// Sort ascending | |
sort($blocklist); | |
return ($blocklist); | |
} | |
// Builds whitelist | |
function get_whitelist($whitelists, $whitelist_add, $excludelist) | |
{ | |
// Make clean lists | |
$whitelist = make_array($whitelists); | |
$whitelist = array_merge($whitelist, $whitelist_add); | |
// Remove duplicates | |
$whitelist = array_unique($whitelist); | |
// Remove blacklisted items | |
$whitelist = array_diff($whitelist, $excludelist); | |
// Sort ascending | |
sort($whitelist); | |
return $whitelist; | |
} | |
// Makes lists into a clean array without empty lines or comments | |
function make_array($array_lists) // string or array | |
{ | |
$temp_list = NULL; | |
// Turn array into string, if sent that way | |
if (is_array($array_lists)) { | |
foreach (multiRequest($array_lists) as $array_list) { $temp_list .= $array_list."\n";} | |
//foreach ($array_lists as $array_list) { $temp_list .= get_url($array_list)."\n";} | |
} | |
// Turn list into array | |
$temp_array = explode("\n", $temp_list); | |
// Clean up, clean up, everybody everywhere... | |
foreach ($temp_array as $key => &$val) { | |
// Remove host IP portion | |
// example: '127.0.0.1 10xcdn.com' becomes '10xcdn.com' | |
$val = str_replace(array('127.0.0.1', '0.0.0.0', '255.255.255.255', '0 '), '', $val); | |
// remove all whitespace | |
$val = preg_replace('/\s+/', '', $val); | |
// remove everything on a line starting at #, stripping comments | |
// example: 'pagead2.googlesyndication.com # whitelisted for smallnetbuilder.com' | |
// becomes: 'pagead2.googlesyndication.com' | |
$val = array_shift(explode('#', $val)); | |
// only keep legit domains (FILTER function requires http in front) | |
if (filter_var('http://'.$val, FILTER_VALIDATE_URL) == FALSE) { | |
unset($temp_array[$key]); | |
} | |
} | |
// Remove empty items | |
$temp_array = array_filter($temp_array); | |
return $temp_array; | |
} | |
// Via https://gist.github.com/aalfiann/736d44b3f4dfd3c35a3d9cb1ae34e2ef | |
// Returns an array of all the websites in array that are requested | |
function multiRequest($data, $options = array()) { | |
// array of curl handles | |
$curly = array(); | |
// data to be returned | |
$result = array(); | |
// multi handle | |
$mh = curl_multi_init(); | |
// loop through $data and create curl handles | |
// then add them to the multi-handle | |
foreach ($data as $id => $d) { | |
$curly[$id] = curl_init(); | |
$url = (is_array($d) && !empty($d['url'])) ? $d['url'] : $d; | |
curl_setopt($curly[$id], CURLOPT_URL, $url); | |
curl_setopt($curly[$id], CURLOPT_HEADER, 0); | |
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER, 1); | |
// post? | |
if (is_array($d)) { | |
if (!empty($d['post'])) { | |
curl_setopt($curly[$id], CURLOPT_POST, 1); | |
curl_setopt($curly[$id], CURLOPT_POSTFIELDS, $d['post']); | |
} | |
} | |
// extra options? | |
if (!empty($options)) { | |
curl_setopt_array($curly[$id], $options); | |
} | |
curl_multi_add_handle($mh, $curly[$id]); | |
} | |
// execute the handles | |
$running = null; | |
do { | |
curl_multi_exec($mh, $running); | |
} while ($running > 0); | |
// get content and remove handles | |
foreach ($curly as $id => $c) { | |
$result[$id] = curl_multi_getcontent($c); | |
curl_multi_remove_handle($mh, $c); | |
} | |
// all done | |
curl_multi_close($mh); | |
return $result; | |
} | |
// Time | |
$time = microtime(true) - $_SERVER["REQUEST_TIME_FLOAT"]; | |
echo "\n# process time: {$time}"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment