Skip to content

Instantly share code, notes, and snippets.

@tbbooher
Created May 27, 2025 01:34
Show Gist options
  • Save tbbooher/23f208af7a0899a91cec74626f055dde to your computer and use it in GitHub Desktop.
Save tbbooher/23f208af7a0899a91cec74626f055dde to your computer and use it in GitHub Desktop.
# this is from the bluebook bike website
# thre is a html file with the bike listings that i parse from this
# Tim Booher - May 26, 2025
from bs4 import BeautifulSoup
import pandas as pd
# Load the HTML content from file
with open("data/page.html", "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
# Extract all product cards
cards = soup.select("div.product_gridView__fxpIW")
# Parse relevant data
data = []
for card in cards:
name_tag = card.select_one("h5.product_bikeName__g3kKC")
type_tag = card.select_one("p.product_bikeType__3xlXW")
price_tag = card.select_one("p.product_bikePrice__S7rBh")
if name_tag and type_tag and price_tag:
name = name_tag.get_text(strip=True)
bike_type, size = map(str.strip, type_tag.get_text().split("●"))
price = price_tag.get_text(strip=True).replace("$", "").replace(",", "")
data.append((name, bike_type, size, price))
# Convert to DataFrame and save as TSV
df = pd.DataFrame(data, columns=["Name", "Type", "Size", "Price"])
df.to_csv("bikes.tsv", sep="\t", index=False)
print(f"Extracted {len(df)} bike listings to bikes.tsv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment