Created
June 3, 2021 07:20
-
-
Save mor120/ae701054d01850adde55891b53104987 to your computer and use it in GitHub Desktop.
The main file of the script.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from Page import Page | |
import requests | |
import asyncio | |
if len(sys.argv) < 1: | |
print("Please specify keywords. Separate by comma.") | |
sys.exit() | |
else: | |
input_lst = sys.argv[1:] | |
keywords = " ".join(input_lst).split(",") | |
print("Keyword(s): ") | |
print(", ".join(keywords)) | |
dead_links = [] | |
def scan(): | |
links = [] | |
for keyword in keywords: | |
print("Starting to scan: ", keyword) | |
p = Page(keyword) | |
driver = p.get_items() | |
links_tmp = driver.find_elements_by_class_name("result-details") | |
links_tmp = [link.find_element_by_tag_name("a").text for link in links_tmp] | |
driver.close() | |
links.extend(links_tmp) | |
return links | |
def is_dead(link: str): | |
try: | |
print("Getting: ", link) | |
r = requests.get(link) | |
status_code = r.status_code | |
except requests.exceptions.ConnectionError: | |
status_code = 400 | |
if int((status_code / 10) / 10) != 2: | |
return True | |
print("Done: ", link) | |
return False | |
async def links_loop(links: list): | |
for link in links: | |
if is_dead(link): | |
dead_links.append(link) | |
if __name__ == '__main__': | |
links = scan() | |
loop = asyncio.get_event_loop() | |
loop.run_until_complete(links_loop(links)) | |
dead_links = list(set(dead_links)) | |
with open('dead_links.txt', 'w') as f: | |
for link in dead_links: | |
f.write("%s\n" % link) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment