tbbooher · May 27, 2025 01:34
diff --git a/bb_extractor.py b/bb_extractor.py
 # this is from the bluebook bike website
 # thre is a html file with the bike listings that i parse from this
 # Tim Booher - May 26, 2025

 from bs4 import BeautifulSoup
 import pandas as pd

 # Load the HTML content from file
 with open("data/page.html", "r", encoding="utf-8") as f:
    soup = BeautifulSoup(f, "html.parser")

 # Extract all product cards
 cards = soup.select("div.product_gridView__fxpIW")

 # Parse relevant data
 data = []
 for card in cards:
    name_tag = card.select_one("h5.product_bikeName__g3kKC")
    type_tag = card.select_one("p.product_bikeType__3xlXW")
    price_tag = card.select_one("p.product_bikePrice__S7rBh")

    if name_tag and type_tag and price_tag:
        name = name_tag.get_text(strip=True)
        bike_type, size = map(str.strip, type_tag.get_text().split("●"))
        price = price_tag.get_text(strip=True).replace("$", "").replace(",", "")
        data.append((name, bike_type, size, price))

 # Convert to DataFrame and save as TSV
 df = pd.DataFrame(data, columns=["Name", "Type", "Size", "Price"])
 df.to_csv("bikes.tsv", sep="\t", index=False)

 print(f"Extracted {len(df)} bike listings to bikes.tsv")
	# this is from the bluebook bike website
	# thre is a html file with the bike listings that i parse from this
	# Tim Booher - May 26, 2025

	from bs4 import BeautifulSoup
	import pandas as pd

	# Load the HTML content from file
	with open("data/page.html", "r", encoding="utf-8") as f:
	soup = BeautifulSoup(f, "html.parser")

	# Extract all product cards
	cards = soup.select("div.product_gridView__fxpIW")

	# Parse relevant data
	data = []
	for card in cards:
	name_tag = card.select_one("h5.product_bikeName__g3kKC")
	type_tag = card.select_one("p.product_bikeType__3xlXW")
	price_tag = card.select_one("p.product_bikePrice__S7rBh")

	if name_tag and type_tag and price_tag:
	name = name_tag.get_text(strip=True)
	bike_type, size = map(str.strip, type_tag.get_text().split("●"))
	price = price_tag.get_text(strip=True).replace("$", "").replace(",", "")
	data.append((name, bike_type, size, price))

	# Convert to DataFrame and save as TSV
	df = pd.DataFrame(data, columns=["Name", "Type", "Size", "Price"])
	df.to_csv("bikes.tsv", sep="\t", index=False)

	print(f"Extracted {len(df)} bike listings to bikes.tsv")