Skip to content

Instantly share code, notes, and snippets.

@victorgabrielbs
Last active March 13, 2024 22:01
Show Gist options
  • Save victorgabrielbs/01ccb47cf5dbef5bbe26eb39d2153edf to your computer and use it in GitHub Desktop.
Save victorgabrielbs/01ccb47cf5dbef5bbe26eb39d2153edf to your computer and use it in GitHub Desktop.
webscrapping explainshell.com
import sys
from selenium import webdriver
from selenium.webdriver.edge.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
def perform_scraping(command):
options = Options()
options.add_argument("--headless")
driver = webdriver.Edge(options=options)
driver.get("https://explainshell.com/")
try:
input_text = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//input[@type="text"]'))
)
input_text.send_keys(command)
input_text.send_keys(Keys.RETURN)
driver.implicitly_wait(5)
page_source = driver.page_source
soup = BeautifulSoup(page_source, "html.parser")
rows = soup.find_all("tr")
if not rows:
print("No 'tr' tags found.")
for row in rows:
columns = row.find_all("td")
if columns:
text_columns = [column.get_text() for column in columns]
print("\t".join(text_columns))
else:
print("No 'td' tags found in the row.")
finally:
driver.quit()
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python3 script.py <command>")
else:
command = sys.argv[1]
perform_scraping(command)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment