Skip to content

Instantly share code, notes, and snippets.

@manzt
Last active December 12, 2025 23:01
Show Gist options
  • Select an option

  • Save manzt/bec5b9746a0a3f666e61cdd275985e34 to your computer and use it in GitHub Desktop.

Select an option

Save manzt/bec5b9746a0a3f666e61cdd275985e34 to your computer and use it in GitHub Desktop.
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "anthropic==0.75.0",
# "anywidget==0.9.21",
# "duckdb==1.4.3",
# "jupyter-scatter==0.22.2",
# "polars==1.36.1",
# "sqlglot==28.3.0",
# "traitlets==5.14.3",
# ]
#
# [tool.uv]
# exclude-newer = "2025-12-12T09:26:13.607822-08:00"
# ///
import marimo
__generated_with = "0.18.4"
app = marimo.App(width="medium")
@app.cell
def _(load_data):
df = load_data()
df
return (df,)
@app.cell
def _(df):
subset = df
return (subset,)
@app.cell
def _(jscatter, mo, subset):
# jscatter requires a pandas...
pdf = subset.to_pandas()
scatter = jscatter.Scatter(x="x", y="y", data=pdf)
scatter.color(by="type")
scatter.legend(True)
scatter.tooltip(True, preview="thumburl", preview_type="image")
scatter.height(600)
selection, set_selection = mo.state(scatter.widget.selection)
scatter.widget.observe(
lambda _: set_selection(scatter.widget.selection), names="selection"
)
scatter.widget
return pdf, selection
@app.cell(hide_code=True)
def _(GalleryWidget, pdf, pl, selection):
_subset = pdf.iloc[selection()]
GalleryWidget(
pl.from_pandas(_subset.sample(min(100, len(_subset)), replace=False))
)
return
@app.cell(hide_code=True)
def _():
import polars as pl
import marimo as mo
import jscatter
import anywidget
import traitlets
import urllib.request
_GALLERY_WIDGET_ESM = """
import * as flech from "https://esm.sh/@uwdata/[email protected]";
function render({ model, el }) {
let objects = flech.tableFromIPC(new Uint8Array(model.get("_ipc").buffer));
let container = document.createElement("div");
container.className = "gallery";
let paginationControls = document.createElement("div");
paginationControls.className = "pagination-controls";
let prevButton = document.createElement("button");
prevButton.innerText = "← Previous";
let pageIndicator = document.createElement("span");
pageIndicator.className = "page-indicator";
let nextButton = document.createElement("button");
nextButton.innerText = "Next →";
paginationControls.appendChild(prevButton);
paginationControls.appendChild(pageIndicator);
paginationControls.appendChild(nextButton);
el.appendChild(container);
el.appendChild(paginationControls);
function update() {
container.replaceChildren();
let size = model.get("size");
let page = model.get("page");
let pageSize = model.get("page_size");
let totalPages = Math.ceil(objects.numRows / pageSize);
container.style.gridTemplateColumns = `repeat(auto-fill, minmax(${size}px, 1fr))`;
let startIdx = page * pageSize;
let endIdx = Math.min(startIdx + pageSize, objects.numRows);
for (let i = startIdx; i < endIdx; i++) {
let row = objects.get(i);
let item = document.createElement("div");
item.className = "gallery-item";
let link = Object.assign(document.createElement("a"), {
className: "thumb-link",
href: `https://www.nga.gov/collection/art-object-page.${row.objectid}.html`,
target: "_blank",
rel: "noopener noreferrer",
});
link.style.width = `${size}px`;
link.style.height = `${size}px`;
let img = Object.assign(document.createElement("img"), {
src: row.thumburl,
alt: row.title,
});
link.appendChild(img);
if (row.public) {
let badge = Object.assign(document.createElement("img"), {
src: "https://mirrors.creativecommons.org/presskit/icons/zero.svg",
alt: "Public Domain",
className: "public-domain-badge",
});
link.appendChild(badge);
}
item.appendChild(link);
container.appendChild(item);
}
pageIndicator.innerText = `Page ${page + 1} of ${totalPages}`;
prevButton.disabled = page <= 0;
nextButton.disabled = page >= totalPages - 1;
}
update();
prevButton.addEventListener("click", () => {
let page = model.get("page");
if (page > 0) {
model.set("page", page - 1);
model.save_changes();
}
});
nextButton.addEventListener("click", () => {
let page = model.get("page");
let pageSize = model.get("page_size");
let totalPages = Math.ceil(objects.numRows / pageSize);
if (page < totalPages - 1) {
model.set("page", page + 1);
model.save_changes();
}
});
model.on("change:page", update);
model.on("change:size", update);
model.on("change:page_size", update);
}
export default { render };
"""
_GALLERY_WIDGET_STYLES = """
.gallery {
display: grid;
gap: 8px;
margin-bottom: 15px;
}
.gallery-item {
position: relative;
text-align: center;
}
.thumb-link {
display: block;
position: relative;
}
.thumb-link img:first-child {
width: 100%;
height: 100%;
object-fit: cover;
border-radius: 5px;
}
.public-domain-badge {
position: absolute;
bottom: 3px;
right: 3px;
width: 20px;
height: 20px;
opacity: 0.6;
}
.pagination-controls {
display: flex;
justify-content: center;
align-items: center;
margin-top: 10px;
gap: 15px;
}
.pagination-controls button {
padding: 5px 10px;
background-color: var(--background);
border: 1px solid #ccc;
border-radius: 4px;
cursor: pointer;
}
.pagination-controls button:disabled {
background-color: var(--background);
color: #999;
cursor: not-allowed;
}
.page-indicator {
font-size: 14px;
}
"""
class GalleryWidget(anywidget.AnyWidget):
_esm = _GALLERY_WIDGET_ESM
_css = _GALLERY_WIDGET_STYLES
_ipc = traitlets.Any().tag(sync=True)
size = traitlets.Int(100).tag(sync=True)
page = traitlets.Int(0).tag(sync=True)
page_size = traitlets.Int(12).tag(sync=True)
def __init__(
self, objects: pl.DataFrame, *, size: int = 90, page_size: int = 20
) -> None:
super().__init__(
_ipc=objects.write_ipc(None).getvalue(),
size=size,
page=0,
page_size=page_size,
)
@mo.persistent_cache
def load_data() -> pl.DataFrame:
# Fetch public domain IDs from GitHub
with urllib.request.urlopen(
"https://raw.githubusercontent.com/manzt/marimo-lunch-and-learn/main/notebooks/public_domain_ids.txt"
) as response:
public_domain_ids = [
int(id)
for id in response.read().decode("utf-8").strip().split("\n")
if id
]
# tsne embedding
tsne = pl.read_parquet(
"https://github.com/manzt/marimo-lunch-and-learn/raw/refs/heads/main/notebooks/tsne.parquet"
)
# rest of the public database dump
url = "https://github.com/NationalGalleryOfArt/opendata/raw/refs/heads/main/data/"
objects = pl.read_csv(url + "objects.csv", ignore_errors=True).select(
pl.col("objectid"),
pl.col("title"),
pl.col("beginyear").alias("year"),
pl.col("medium"),
pl.col("visualbrowserclassification").alias("type"),
)
constituents = pl.read_csv(
url + "constituents.csv", ignore_errors=True
).select(
pl.col("constituentid"),
pl.col("forwarddisplayname").alias("name"),
pl.col("visualbrowsernationality").alias("nationality"),
)
published_images = pl.read_csv(url + "published_images.csv").select(
pl.col("depictstmsobjectid").alias("objectid"),
pl.col("uuid"),
pl.col("iiifthumburl").alias("thumburl"),
)
objects_constituents = (
pl.read_csv(url + "objects_constituents.csv", ignore_errors=True)
.filter(pl.col("role").eq(pl.lit("artist")))
.sort(by="displayorder")
.group_by("objectid")
.first()
.select("objectid", "constituentid")
)
return (
objects.join(objects_constituents, on="objectid")
.join(constituents, on="constituentid")
.join(published_images, on="objectid")
.join(tsne, on="objectid")
.select(pl.col("thumburl"), pl.exclude("constituentid", "thumburl"))
.with_columns(
pl.col("objectid").is_in(public_domain_ids).alias("public")
)
.sort(by="year", descending=True, nulls_last=True)
)
return GalleryWidget, jscatter, load_data, mo, pl
@app.cell
def _():
return
if __name__ == "__main__":
app.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment