Created
July 4, 2024 15:11
-
-
Save joby-lol/01c1b00e6f11fb76140464a0b1280dfa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| // script source https://raw.githubusercontent.com/r-a-y/mobile-hosts/master/converter.php | |
| // https://github.com/r-a-y/mobile-hosts/blob/master/ | |
| // License: GPL-3.0 https://github.com/r-a-y/mobile-hosts/blob/master/LICENSE | |
| // Add our lists. | |
| $lists = array( | |
| 'adAway' => 'https://raw.githubusercontent.com/AdAway/adaway.github.io/master/hosts.txt', | |
| 'adguardApps' => 'https://github.com/AdguardTeam/AdguardFilters/raw/master/MobileFilter/sections/specific_app.txt', | |
| 'adguardDNS' => 'https://adguardteam.github.io/AdGuardSDNSFilter/Filters/filter.txt', | |
| 'adguardMobileAds' => 'https://raw.githubusercontent.com/AdguardTeam/AdguardFilters/master/MobileFilter/sections/adservers.txt', | |
| 'antipopadsRe' => 'https://raw.githubusercontent.com/AdroitAdorKhan/antipopads-re/master/formats/hosts.txt', | |
| 'anudeepND' => 'https://raw.githubusercontent.com/anudeepND/blacklist/master/adservers.txt', | |
| 'BaddBoyzHosts' => 'https://raw.githubusercontent.com/mitchellkrogza/Badd-Boyz-Hosts/master/hosts', | |
| 'badmojr1Hosts' => 'https://raw.githubusercontent.com/badmojr/1Hosts/master/Xtra/domains.txt', | |
| 'BBcan177MS2' => 'https://gist.githubusercontent.com/BBcan177/4a8bf37c131be4803cb2/raw/343ff780e15205b4dd0de37c86af34cfb26b2fbe/MS-2', | |
| 'BBcan177MS4' => 'https://gist.githubusercontent.com/BBcan177/b6df57cef74e28d90acf1eec93d62d3b/raw/f0996cf5248657ada2adb396f3636be8716b99eb/MS-4', | |
| // 'blocklistProjectAbuse' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/abuse-nl.txt', | |
| // 'blocklistProjectAds' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/ads-nl.txt', | |
| // 'blocklistProjectCrypto' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/crypto-nl.txt', | |
| // 'blocklistProjectDrugs' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/drugs-nl.txt', | |
| // 'blocklistProjectFacebook' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/facebook-nl.txt', | |
| // 'blocklistProjectFraud' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/fraud-nl.txt', | |
| // 'blocklistProjectGambling' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/gambling-nl.txt', | |
| // 'blocklistProjectMalware' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/malware-nl.txt', | |
| // 'blocklistProjectPhishing' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/phishing-nl.txt', | |
| // 'blocklistProjectPiracy' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/piracy-nl.txt', | |
| // 'blocklistProjectPorn' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/porn-nl.txt', | |
| // 'blocklistProjectRansomware' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/ransomware-nl.txt', | |
| // 'blocklistProjectRedirect' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/redirect-nl.txt', | |
| // 'blocklistProjectScam' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/scam-nl.txt', | |
| // 'blocklistProjectTorrent' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/torrent-nl.txt', | |
| // 'blocklistProjectTracking' => 'https://raw.githubusercontent.com/blocklistproject/Lists/master/alt-version/tracking-nl.txt', | |
| // 'blocklistsFacebook' => 'https://raw.githubusercontent.com/jmdugan/blocklists/master/corporations/facebook/all', | |
| 'coinBlockerLists' => 'https://zerodot1.gitlab.io/CoinBlockerLists/list.txt', | |
| 'disconnectMe' => 'https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt', | |
| 'easyPrivacy3rdParty' => 'https://raw.githubusercontent.com/easylist/easylist/master/easyprivacy/easyprivacy_thirdparty.txt', | |
| 'easyPrivacySpecific' => 'https://github.com/easylist/easylist/raw/master/easyprivacy/easyprivacy_specific.txt', | |
| 'fadeMindAddRisk' => 'https://github.com/FadeMind/hosts.extras/raw/master/add.Risk/hosts', | |
| 'fadeMindAddSpam' => 'https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.Spam/hosts', | |
| 'geoffreyFrogeyeFirstPartyTrackers' => 'https://hostfiles.frogeye.fr/firstparty-trackers.txt', | |
| 'geoffreyFrogeyeMultiPartyTrackers' => 'https://hostfiles.frogeye.fr/multiparty-trackers.txt', | |
| 'migueldemouraAdsTracking' => 'https://raw.githubusercontent.com/migueldemoura/ublock-umatrix-rulesets/master/Hosts/ads-tracking', | |
| 'migueldemouraAdsTrackingBreaking' => 'https://raw.githubusercontent.com/migueldemoura/ublock-umatrix-rulesets/master/Hosts/ads-tracking-breaking', | |
| 'migueldemouraMalware' => 'https://raw.githubusercontent.com/migueldemoura/ublock-umatrix-rulesets/master/Hosts/malware', | |
| 'notrackBlocklist' => 'https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-blocklist.txt', | |
| 'notrackMalware' => 'https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-malware.txt', | |
| 'PglYoYo' => 'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=one-line&showintro=0&mimetype=plaintext', | |
| 'phishingArmy' => 'https://phishing.army/download/phishing_army_blocklist_extended.txt', | |
| 'Phishing.Database' => 'https://raw.githubusercontent.com/mitchellkrogza/Phishing.Database/master/phishing-domains-ACTIVE.txt', | |
| 'Phishing.DatabaseAll' => 'https://raw.githubusercontent.com/mitchellkrogza/Phishing.Database/master/ALL-phishing-domains.txt', | |
| // 'Phishing.DatabaseAllLinks' => 'https://raw.githubusercontent.com/mitchellkrogza/Phishing.Database/master/ALL-phishing-links.txt', | |
| 'QuidsupMixed' => 'https://quidsup.net/notrack/blocklist.php?download=trackersdomains', | |
| 'ShadowWhispererAds' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Ads', | |
| 'ShadowWhispererApple' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Apple', | |
| 'ShadowWhispererBloat' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Bloat', | |
| 'ShadowWhispererChat' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Chat', | |
| 'ShadowWhispererCryptocurrency' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Cryptocurrency', | |
| 'ShadowWhispererDating' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Dating', | |
| 'ShadowWhispererDynamic' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Dynamic', | |
| 'ShadowWhispererFree' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Free', | |
| 'ShadowWhispererJunk' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Junk', | |
| 'ShadowWhispererMalware' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Malware', | |
| 'ShadowWhispererMarketing' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Marketing', | |
| 'ShadowWhispererMarketingEmail ' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Marketing-Email', | |
| 'ShadowWhispererMicrosoft' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Microsoft', | |
| 'ShadowWhispererRemote' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Remote', | |
| 'ShadowWhispererRisk' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Risk', | |
| 'ShadowWhispererScam' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Scam', | |
| 'ShadowWhispererShock' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Shock', | |
| 'ShadowWhispererTracking' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Tracking', | |
| 'ShadowWhispererTunnels' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Tunnels', | |
| 'ShadowWhispererTypo' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/Typo', | |
| 'ShadowWhispererUrlShortener' => 'https://raw.githubusercontent.com/ShadowWhisperer/BlockLists/master/Lists/UrlShortener', | |
| 'someoneWhoCares' => 'https://someonewhocares.org/hosts/hosts', | |
| 'TheBigListofHackedMalwareWebSites' => 'https://raw.githubusercontent.com/mitchellkrogza/The-Big-List-of-Hacked-Malware-Web-Sites/master/hacked-domains.list', | |
| 'UltimateHostsBlacklist0' => 'https://raw.githubusercontent.com/Ultimate-Hosts-Blacklist/Ultimate.Hosts.Blacklist/master/domains/domains0.list', | |
| 'UltimateHostsBlacklist1' => 'https://raw.githubusercontent.com/Ultimate-Hosts-Blacklist/Ultimate.Hosts.Blacklist/master/domains/domains1.list', | |
| 'UltimateHostsBlacklist2' => 'https://raw.githubusercontent.com/Ultimate-Hosts-Blacklist/Ultimate.Hosts.Blacklist/master/domains/domains2.list', | |
| 'urlHaus' => 'https://urlhaus.abuse.ch/downloads/rpz/', | |
| 'winhelp2002' => 'https://winhelp2002.mvps.org/hosts.txt' | |
| ); | |
| $idn_to_ascii = function_exists('idn_to_ascii'); | |
| // ADDED: Path and array to save a list of all domains | |
| $allDomains = []; | |
| $allDomainsFile = 'data/_allDomains.txt'; | |
| foreach ($lists as $name => $list) { | |
| echo "Converting $name...\n"; | |
| // Fetch filter list and explode into an array. | |
| $lines = file_get_contents($list); | |
| $lines = explode("\n", $lines); | |
| // HOSTS header. | |
| $hosts = "# $name\n"; | |
| $hosts .= "#\n"; | |
| $hosts .= "# Converted from - $list\n"; | |
| $hosts .= '# Last converted - ' . date('r') . "\n"; | |
| $hosts .= "#\n\n"; | |
| $domains = $exceptions = array(); | |
| // Loop through each ad filter. | |
| foreach ($lines as $filter) { | |
| // Skip filter if matches the following: | |
| if (false === strpos($filter, '.')) { | |
| continue; | |
| } | |
| if (false !== strpos($filter, '*')) { | |
| continue; | |
| } | |
| if (false !== strpos($filter, '/')) { | |
| continue; | |
| } | |
| if (false !== strpos($filter, '#')) { | |
| continue; | |
| } | |
| if (false !== strpos($filter, ' ')) { | |
| continue; | |
| } | |
| if (false !== strpos($filter, 'abp?')) { | |
| continue; | |
| } | |
| // Skip Adguard HTML filtering syntax. | |
| if (false !== strpos($filter, '$$') || false !== strpos($filter, '$@$')) { | |
| continue; | |
| } | |
| // For $domain syntax, strip domain rules. | |
| if (false !== strpos($filter, '$domain') && false === strpos($filter, '@@')) { | |
| $filter = substr($filter, 0, strpos($filter, '$domain')); | |
| } elseif (false !== strpos($filter, '=')) { | |
| continue; | |
| } | |
| // Replace filter syntax with HOSTS syntax. | |
| // @todo Perhaps skip $third-party, $image and $popup? | |
| $filter = str_replace(array('||', '^third-party', '^', '$third-party', ',third-party', '$all', ',all', '$image', ',image', ',important', '$script', ',script', '$object', ',object', '$popup', ',popup', '$empty', '$object-subrequest', '$document', '$subdocument', ',subdocument', '$ping', '$important', '$badfilter', ',badfilter', '$websocket', '$cookie', '$other'), '', $filter); | |
| /* | |
| * Workarounds. Groan. | |
| */ | |
| // EasyPrivacySpecific. See https://github.com/r-a-y/mobile-hosts/issues/17. | |
| if ('soundcloud.com' === $filter) { | |
| continue; | |
| } | |
| // See https://github.com/r-a-y/mobile-hosts/issues/26. | |
| if ('global.ssl.fastly.net' === $filter) { | |
| continue; | |
| } | |
| // Skip rules matching 'xmlhttprequest' for now. | |
| if (false !== strpos($filter, 'xmlhttprequest')) { | |
| continue; | |
| } | |
| // Skip exclusion rules. | |
| if (false !== strpos($filter, '~')) { | |
| continue; | |
| } | |
| // Trim whitespace. | |
| $filter = trim($filter); | |
| // If starting or ending with '.', skip. | |
| if ('.' === substr($filter, 0, 1) || '.' === substr($filter, -1)) { | |
| continue; | |
| } | |
| // If starting with '-', skip. | |
| // https://github.com/r-a-y/mobile-hosts/issues/5 | |
| if ('-' === substr($filter, 0, 1) || '_' === substr($filter, 0, 1)) { | |
| continue; | |
| } | |
| // If starting with '!', skip. | |
| if ('!' === substr($filter, 0, 1)) { | |
| continue; | |
| } | |
| // Strip trailing |. | |
| if ('|' === substr($filter, -1)) { | |
| $filter = str_replace('|', '', $filter); | |
| } | |
| // Skip file extensions | |
| if ('.jpg' === substr($filter, -4) || '.gif' === substr($filter, -4)) { | |
| continue; | |
| } | |
| // Strip port numbers. | |
| if (false !== strpos($filter, ':')) { | |
| $filter = substr($filter, 0, strpos($filter, ':')); | |
| } | |
| // Convert internationalized domain names to punycode. | |
| if ($idn_to_ascii && preg_match('//u', $filter)) { | |
| $filter = idn_to_ascii($filter); | |
| } | |
| // If empty, skip. | |
| if (empty($filter)) { | |
| continue; | |
| } | |
| // Save exception to parse later. | |
| if (0 === strpos($filter, '@@')) { | |
| $exceptions[] = '0.0.0.0 ' . str_replace('@@', '', $filter); | |
| continue; | |
| } | |
| $domains[] = "$filter"; | |
| } | |
| // Generate the hosts list. | |
| if (!empty($domains)) { | |
| // Filter out duplicates. | |
| $domains = array_unique($domains); | |
| // ADDED: Filter domains for valid ones only | |
| $domains = array_filter(array_map( | |
| fn($d) => filter_var($d, FILTER_VALIDATE_DOMAIN), | |
| $domains | |
| )); | |
| // Remove exceptions. | |
| if (!empty($exceptions)) { | |
| $domains = array_diff($domains, $exceptions); | |
| } | |
| // ADDED: add all domains to all domains list | |
| $allDomains = array_merge($allDomains, $domains); | |
| $hosts .= implode("\n", $domains); | |
| unset($domains); | |
| } | |
| // Output the file. | |
| file_put_contents("data/${name}.txt", $hosts); | |
| echo "$name converted to domain records - see data/${name}.txt\n"; | |
| } | |
| // ADDED: Save all domains to a file | |
| $allDomains = array_unique($allDomains); | |
| sort($allDomains); | |
| file_put_contents($allDomainsFile, sprintf('# All domains from all lists - %s', date('r')) . PHP_EOL . implode(PHP_EOL, $allDomains)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment