WITH city_avgs AS (
SELECT
brand,
model,
year,
city,
COUNT(*) AS listings,
AVG(price)::numeric(10,2) AS avg_price_city
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH price_stats AS ( | |
SELECT | |
year, | |
model, | |
COUNT(*) FILTER (WHERE source = 'craigslist') AS cnt_craigslist, | |
ROUND(AVG(price) FILTER (WHERE source = 'craigslist')::numeric, 2) AS avg_craigslist, | |
COUNT(*) FILTER (WHERE source = 'facebook') AS cnt_facebook, | |
ROUND(AVG(price) FILTER (WHERE source = 'facebook')::numeric, 2) AS avg_facebook, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Fit a resale-price model for used mountain bikes. | |
""" | |
import os, psycopg2, pandas as pd, numpy as np, datetime as dt | |
from sklearn.model_selection import GroupKFold | |
from sklearn.compose import ColumnTransformer | |
from sklearn.pipeline import Pipeline | |
from sklearn.preprocessing import OneHotEncoder |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
'id' AS column_name, | |
COUNT(id) AS populated_count, | |
(COUNT(*) - COUNT(id)) AS missing_count, | |
COUNT(*) AS total_rows, | |
ROUND((COUNT(id)::DECIMAL * 100 / COUNT(*)), 2) AS percentage_populated | |
FROM bike_listings_new | |
UNION ALL |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import pandas as pd | |
import psycopg2 | |
from psycopg2 import sql | |
# 1) Load & clean CSV | |
df = pd.read_csv("data/bikes.csv") | |
# Rename 'original model' → valid identifier | |
df.rename(columns={"original model": "original_model"}, inplace=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\d bike_listings_new | |
Table "public.bike_listings_new" | |
Column | Type | Collation | Nullable | Default | |
----------------+--------------------------+-----------+----------+----------------------------------------------- | |
id | integer | | not null | nextval('bike_listings_new_id_seq'::regclass) | |
city | text | | not null | | |
post_date | timestamp with time zone | | | | |
price | numeric | | not null | | |
currency | text | | not null | | |
title | text | | not null | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this is from the bluebook bike website | |
# thre is a html file with the bike listings that i parse from this | |
# Tim Booher - May 26, 2025 | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
# Load the HTML content from file | |
with open("data/page.html", "r", encoding="utf-8") as f: | |
soup = BeautifulSoup(f, "html.parser") |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"city": "dallas", "scraped_at": "2025-05-25T12:08:11.438504", "item": {"@context": "http://schema.org", "offers": {"@type": "Offer", "priceCurrency": "USD", "availableAtOrFrom": {"address": {"postalCode": "", "addressLocality": "Weatherford", "addressRegion": "TX", "@type": "PostalAddress", "addressCountry": "", "streetAddress": ""}, "@type": "Place", "geo": {"@type": "GeoCoordinates", "longitude": -97.8094974319327, "latitude": 32.6835993447938}}, "price": "400.00"}, "image": ["https://images.craigslist.org/00202_k7Nhqswwz78_0t20CI_600x450.jpg", "https://images.craigslist.org/01010_gyzhLaO73or_0t20CI_600x450.jpg", "https://images.craigslist.org/00202_dO2TL2SYTnO_0t20CI_600x450.jpg", "https://images.craigslist.org/00d0d_iFXfiXFgV2o_0t20CI_600x450.jpg"], "@type": "Product", "name": "Mountain bike specialized hybrid areial", "description": ""}} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html><head> | |
<title>Bluebook</title> | |
</head> | |
<body> | |
<div class="col" style="min-width: 0px;"> | |
<div class="list-product_listProductRow__rE94l row"> | |
<div class="col-12 col-sm-6 col-xl-4"> | |
<div class="app-card product_gridView__fxpIW productCard"> | |
<h5 aria-level="1" class="product_hidden__Mzics" data-uw-rm-heading="level" role="heading">The item |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<title>facebook results</title> | |
</head> | |
<body> |
NewerOlder