Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mdvaldosta/38606b08197d2c8e68ccc3219cceee33 to your computer and use it in GitHub Desktop.
Save mdvaldosta/38606b08197d2c8e68ccc3219cceee33 to your computer and use it in GitHub Desktop.
Blacklist Generator in Hosts Format
<?php header("Content-Type: text/plain");
###################################################
############# Blocklist for router use ############
###################################################
// Lists to use
$blocklists = array (
// suspicious
'https://hosts-file.net/grm.txt',
'https://v.firebog.net/hosts/static/w3kbl.txt',
'https://reddestdream.github.io/Projects/MinimalHosts/etc/MinimalHostsBlocker/minimalhosts',
'https://someonewhocares.org/hosts/zero/hosts',
'https://raw.githubusercontent.com/Dawsey21/Lists/master/main-blacklist.txt',
// advertising
'https://adaway.org/hosts.txt',
'https://v.firebog.net/hosts/AdguardDNS.txt',
'https://raw.githubusercontent.com/anudeepND/blacklist/master/adservers.txt',
'https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt',
'https://hosts-file.net/ad_servers.txt',
'https://v.firebog.net/hosts/Easylist.txt',
'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts;showintro=0',
'https://raw.githubusercontent.com/StevenBlack/hosts/master/data/UncheckyAds/hosts',
'https://www.squidblacklist.org/downloads/dg-ads.acl',
'http://winhelp2002.mvps.org/hosts.txt',
'https://blocklist.site/app/dl/ads',
// tracking and telemetry
'https://v.firebog.net/hosts/Easyprivacy.txt',
'https://v.firebog.net/hosts/Prigent-Ads.txt',
'https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-blocklist.txt',
'https://raw.githubusercontent.com/StevenBlack/hosts/master/data/add.2o7Net/hosts',
'https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt',
'https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/android-tracking.txt',
'https://raw.githubusercontent.com/Perflyst/PiHoleBlocklist/master/SmartTV.txt',
'https://v.firebog.net/hosts/Airelle-trc.txt',
// malicious
'https://s3.amazonaws.com/lists.disconnect.me/simple_malvertising.txt',
'https://mirror1.malwaredomains.com/files/justdomains',
'https://hosts-file.net/exp.txt',
'https://hosts-file.net/emd.txt',
'https://hosts-file.net/psh.txt',
'https://mirror.cedia.org.ec/malwaredomains/immortal_domains.txt',
'https://www.malwaredomainlist.com/hostslist/hosts.txt',
'https://bitbucket.org/ethanr/dns-blacklists/raw/8575c9f96e5b4a1308f2f12394abd86d0927a4a0/bad_lists/Mandiant_APT1_Report_Appendix_D.txt',
'https://v.firebog.net/hosts/Prigent-Malware.txt',
'https://v.firebog.net/hosts/Prigent-Phishing.txt',
'https://phishing.army/download/phishing_army_blocklist_extended.txt',
'https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-malware.txt',
'https://ransomwaretracker.abuse.ch/downloads/RW_DOMBL.txt',
'https://ransomwaretracker.abuse.ch/downloads/CW_C2_DOMBL.txt',
'https://ransomwaretracker.abuse.ch/downloads/LY_C2_DOMBL.txt',
'https://ransomwaretracker.abuse.ch/downloads/TC_C2_DOMBL.txt',
'https://ransomwaretracker.abuse.ch/downloads/TL_C2_DOMBL.txt',
'https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist',
'https://v.firebog.net/hosts/Shalla-mal.txt',
'https://raw.githubusercontent.com/StevenBlack/hosts/master/data/add.Risk/hosts',
'https://www.squidblacklist.org/downloads/dg-malicious.acl',
'https://raw.githubusercontent.com/HorusTeknoloji/TR-PhishingList/master/url-lists.txt',
'https://blocklist.site/app/dl/phishing',
// coinblocker
'https://zerodot1.gitlab.io/CoinBlockerLists/hosts',
);
// Manually add more
$blocklist_add = array (
'xxx.com',
);
###################################################
############ Whitelist for router use #############
###################################################
// Lists to use
$whitelists = array (
'https://raw.githubusercontent.com/anudeepND/whitelist/master/domains/whitelist.txt',
'https://raw.githubusercontent.com/anudeepND/whitelist/master/domains/referral-sites.txt',
'https://raw.githubusercontent.com/anudeepND/whitelist/master/domains/optional-list.txt',
'https://raw.githubusercontent.com/SwissSkynet/whitelist/master/domains/whitelist.txt',
'https://raw.githubusercontent.com/PracticalChip/Adblocking/master/RawWhitelist.txt',
'https://raw.githubusercontent.com/raghavdua1995/DNSlock-PiHole-whitelist/master/whitelist.list',
'https://raw.githubusercontent.com/Nickwasused/Pihole-Whitelist/master/whitelist.txt',
'https://raw.githubusercontent.com/sml156/when_pigs_fly/master/whitelist.txt',
'https://gist.githubusercontent.com/mdvaldosta/8e0ec8dad06a303cec5afac02d25d831/raw',
);
// Manually add more
$whitelist_add = array (
'hayesmedia.org',
);
// Don't ever whitelist these
$excludelist = array (
'testdomain.com',
//'testdomain2.com',
);
//---------- LOGIC ------------//
if (empty($_GET))
{
header('HTTP/1.1 200 OK');
// Do blocklist processing
$blocklist = get_blacklist($blocklists, $blocklist_add);
$whitelist = get_whitelist($whitelists, $whitelist_add, $excludelist);
// Remove whitelist items
$blacklist = array_diff($blocklist, $whitelist);
$saved = count($blocklist) - count($blacklist);
display_blacklist_info($blocklists, $blocklist_add, $blocklist);
display_whitelist_info($whitelists, $whitelist_add, $excludelist, $whitelist);
echo "# There are (".number_format(count($blacklist)).") total websites in the blacklist, (".number_format($saved).") were saved by the whitelist.\n\n";
// Output in hosts format for Diversion
foreach ($blacklist as $item) echo '0.0.0.0 '.$item."\n";
}
elseif ($_GET['display'] == 'blocklist') // display=blocklist
{
header('HTTP/1.1 200 OK');
$blocklist = get_blacklist($blocklists, $blocklist_add);
display_blacklist_info($blocklists, $blocklist_add, $blocklist);
foreach ($blocklist as $item) echo $item."\n";
}
elseif ($_GET['display'] == 'whitelist') // display=whitelist
{
header('HTTP/1.1 200 OK');
$whitelist = get_whitelist($whitelists, $whitelist_add, $excludelist);
display_whitelist_info($whitelists, $whitelist_add, $excludelist, $whitelist);
foreach ($whitelist as $item) echo $item."\n";
}
else
{
header('HTTP/1.1 404 Not Found');
echo '# Invalid request';
}
###################################################
############# ----- FUNCTIONS ------ ##############
###################################################
// Display blacklist info
function display_blacklist_info($blocklists, $blocklist_add, $blocklist)
{
echo "# There are (".number_format(count($blocklists)).") hosted blocklists + (".number_format(count($blocklist_add)).") additional sites for a total of (".number_format(count($blocklist)).") urls.\n";
foreach ($blocklists as $list) echo "# ".$list."\n"; echo "\n";
}
// Display whitelist info
function display_whitelist_info($whitelists, $whitelist_add, $excludelist, $whitelist)
{
echo "# There are (".number_format(count($whitelists)).") hosted whitelists + (".number_format(count($whitelist_add)).") additional sites - (".number_format(count($excludelist)).") excluded sites for a total of (".number_format(count($whitelist)).") urls.\n";
foreach ($whitelists as $list) echo "# ".$list."\n"; echo "\n";
}
// Builds blacklist
function get_blacklist($blocklists, $blocklist_add)
{
// Make clean lists
$blocklist = make_array($blocklists);
$blocklist = array_merge($blocklist, $blocklist_add);
// Remove duplicates
$blocklist = array_unique($blocklist);
// Sort ascending
sort($blocklist);
return ($blocklist);
}
// Builds whitelist
function get_whitelist($whitelists, $whitelist_add, $excludelist)
{
// Make clean lists
$whitelist = make_array($whitelists);
$whitelist = array_merge($whitelist, $whitelist_add);
// Remove duplicates
$whitelist = array_unique($whitelist);
// Remove blacklisted items
$whitelist = array_diff($whitelist, $excludelist);
// Sort ascending
sort($whitelist);
return $whitelist;
}
// Makes lists into a clean array without empty lines or comments
function make_array($array_lists) // string or array
{
$temp_list = NULL;
// Turn array into string, if sent that way
if (is_array($array_lists)) {
foreach (multiRequest($array_lists) as $array_list) { $temp_list .= $array_list."\n";}
//foreach ($array_lists as $array_list) { $temp_list .= get_url($array_list)."\n";}
}
// Turn list into array
$temp_array = explode("\n", $temp_list);
// Clean up, clean up, everybody everywhere...
foreach ($temp_array as $key => &$val) {
// Remove host IP portion
// example: '127.0.0.1 10xcdn.com' becomes '10xcdn.com'
$val = str_replace(array('127.0.0.1', '0.0.0.0', '255.255.255.255', '0 '), '', $val);
// remove all whitespace
$val = preg_replace('/\s+/', '', $val);
// remove everything on a line starting at #, stripping comments
// example: 'pagead2.googlesyndication.com # whitelisted for smallnetbuilder.com'
// becomes: 'pagead2.googlesyndication.com'
$val = array_shift(explode('#', $val));
// only keep legit domains (FILTER function requires http in front)
if (filter_var('http://'.$val, FILTER_VALIDATE_URL) == FALSE) {
unset($temp_array[$key]);
}
}
// Remove empty items
$temp_array = array_filter($temp_array);
return $temp_array;
}
// Via https://gist.github.com/aalfiann/736d44b3f4dfd3c35a3d9cb1ae34e2ef
// Returns an array of all the websites in array that are requested
function multiRequest($data, $options = array()) {
// array of curl handles
$curly = array();
// data to be returned
$result = array();
// multi handle
$mh = curl_multi_init();
// loop through $data and create curl handles
// then add them to the multi-handle
foreach ($data as $id => $d) {
$curly[$id] = curl_init();
$url = (is_array($d) && !empty($d['url'])) ? $d['url'] : $d;
curl_setopt($curly[$id], CURLOPT_URL, $url);
curl_setopt($curly[$id], CURLOPT_HEADER, 0);
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER, 1);
// post?
if (is_array($d)) {
if (!empty($d['post'])) {
curl_setopt($curly[$id], CURLOPT_POST, 1);
curl_setopt($curly[$id], CURLOPT_POSTFIELDS, $d['post']);
}
}
// extra options?
if (!empty($options)) {
curl_setopt_array($curly[$id], $options);
}
curl_multi_add_handle($mh, $curly[$id]);
}
// execute the handles
$running = null;
do {
curl_multi_exec($mh, $running);
} while ($running > 0);
// get content and remove handles
foreach ($curly as $id => $c) {
$result[$id] = curl_multi_getcontent($c);
curl_multi_remove_handle($mh, $c);
}
// all done
curl_multi_close($mh);
return $result;
}
// Time
$time = microtime(true) - $_SERVER["REQUEST_TIME_FLOAT"];
echo "\n# process time: {$time}";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment