Skip to content

Instantly share code, notes, and snippets.

View tbbooher's full-sized avatar

Tim Booher tbbooher

View GitHub Profile

sql

WITH city_avgs AS (
    SELECT
        brand,
        model,
        year,
        city,
        COUNT(*)        AS listings,
        AVG(price)::numeric(10,2) AS avg_price_city
WITH price_stats AS (
SELECT
year,
model,
COUNT(*) FILTER (WHERE source = 'craigslist') AS cnt_craigslist,
ROUND(AVG(price) FILTER (WHERE source = 'craigslist')::numeric, 2) AS avg_craigslist,
COUNT(*) FILTER (WHERE source = 'facebook') AS cnt_facebook,
ROUND(AVG(price) FILTER (WHERE source = 'facebook')::numeric, 2) AS avg_facebook,
#!/usr/bin/env python3
"""
Fit a resale-price model for used mountain bikes.
"""
import os, psycopg2, pandas as pd, numpy as np, datetime as dt
from sklearn.model_selection import GroupKFold
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
SELECT
'id' AS column_name,
COUNT(id) AS populated_count,
(COUNT(*) - COUNT(id)) AS missing_count,
COUNT(*) AS total_rows,
ROUND((COUNT(id)::DECIMAL * 100 / COUNT(*)), 2) AS percentage_populated
FROM bike_listings_new
UNION ALL
#!/usr/bin/env python3
import pandas as pd
import psycopg2
from psycopg2 import sql
# 1) Load & clean CSV
df = pd.read_csv("data/bikes.csv")
# Rename 'original model' → valid identifier
df.rename(columns={"original model": "original_model"}, inplace=True)
\d bike_listings_new
Table "public.bike_listings_new"
Column | Type | Collation | Nullable | Default
----------------+--------------------------+-----------+----------+-----------------------------------------------
id | integer | | not null | nextval('bike_listings_new_id_seq'::regclass)
city | text | | not null |
post_date | timestamp with time zone | | |
price | numeric | | not null |
currency | text | | not null |
title | text | | not null |
# this is from the bluebook bike website
# thre is a html file with the bike listings that i parse from this
# Tim Booher - May 26, 2025
from bs4 import BeautifulSoup
import pandas as pd
# Load the HTML content from file
with open("data/page.html", "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
This file has been truncated, but you can view the full file.
{"city": "dallas", "scraped_at": "2025-05-25T12:08:11.438504", "item": {"@context": "http://schema.org", "offers": {"@type": "Offer", "priceCurrency": "USD", "availableAtOrFrom": {"address": {"postalCode": "", "addressLocality": "Weatherford", "addressRegion": "TX", "@type": "PostalAddress", "addressCountry": "", "streetAddress": ""}, "@type": "Place", "geo": {"@type": "GeoCoordinates", "longitude": -97.8094974319327, "latitude": 32.6835993447938}}, "price": "400.00"}, "image": ["https://images.craigslist.org/00202_k7Nhqswwz78_0t20CI_600x450.jpg", "https://images.craigslist.org/01010_gyzhLaO73or_0t20CI_600x450.jpg", "https://images.craigslist.org/00202_dO2TL2SYTnO_0t20CI_600x450.jpg", "https://images.craigslist.org/00d0d_iFXfiXFgV2o_0t20CI_600x450.jpg"], "@type": "Product", "name": "Mountain bike specialized hybrid areial", "description": ""}}
<html><head>
<title>Bluebook</title>
</head>
<body>
<div class="col" style="min-width: 0px;">
<div class="list-product_listProductRow__rE94l row">
<div class="col-12 col-sm-6 col-xl-4">
<div class="app-card product_gridView__fxpIW productCard">
<h5 aria-level="1" class="product_hidden__Mzics" data-uw-rm-heading="level" role="heading">The item
This file has been truncated, but you can view the full file.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>facebook results</title>
</head>
<body>