Last active
March 13, 2024 22:01
-
-
Save victorgabrielbs/01ccb47cf5dbef5bbe26eb39d2153edf to your computer and use it in GitHub Desktop.
webscrapping explainshell.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from selenium import webdriver | |
from selenium.webdriver.edge.options import Options | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from bs4 import BeautifulSoup | |
def perform_scraping(command): | |
options = Options() | |
options.add_argument("--headless") | |
driver = webdriver.Edge(options=options) | |
driver.get("https://explainshell.com/") | |
try: | |
input_text = WebDriverWait(driver, 10).until( | |
EC.presence_of_element_located((By.XPATH, '//input[@type="text"]')) | |
) | |
input_text.send_keys(command) | |
input_text.send_keys(Keys.RETURN) | |
driver.implicitly_wait(5) | |
page_source = driver.page_source | |
soup = BeautifulSoup(page_source, "html.parser") | |
rows = soup.find_all("tr") | |
if not rows: | |
print("No 'tr' tags found.") | |
for row in rows: | |
columns = row.find_all("td") | |
if columns: | |
text_columns = [column.get_text() for column in columns] | |
print("\t".join(text_columns)) | |
else: | |
print("No 'td' tags found in the row.") | |
finally: | |
driver.quit() | |
if __name__ == "__main__": | |
if len(sys.argv) != 2: | |
print("Usage: python3 script.py <command>") | |
else: | |
command = sys.argv[1] | |
perform_scraping(command) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment