Last active
December 12, 2025 23:01
-
-
Save manzt/bec5b9746a0a3f666e61cdd275985e34 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # requires-python = ">=3.13" | |
| # dependencies = [ | |
| # "anthropic==0.75.0", | |
| # "anywidget==0.9.21", | |
| # "duckdb==1.4.3", | |
| # "jupyter-scatter==0.22.2", | |
| # "polars==1.36.1", | |
| # "sqlglot==28.3.0", | |
| # "traitlets==5.14.3", | |
| # ] | |
| # | |
| # [tool.uv] | |
| # exclude-newer = "2025-12-12T09:26:13.607822-08:00" | |
| # /// | |
| import marimo | |
| __generated_with = "0.18.4" | |
| app = marimo.App(width="medium") | |
| @app.cell | |
| def _(load_data): | |
| df = load_data() | |
| df | |
| return (df,) | |
| @app.cell | |
| def _(df): | |
| subset = df | |
| return (subset,) | |
| @app.cell | |
| def _(jscatter, mo, subset): | |
| # jscatter requires a pandas... | |
| pdf = subset.to_pandas() | |
| scatter = jscatter.Scatter(x="x", y="y", data=pdf) | |
| scatter.color(by="type") | |
| scatter.legend(True) | |
| scatter.tooltip(True, preview="thumburl", preview_type="image") | |
| scatter.height(600) | |
| selection, set_selection = mo.state(scatter.widget.selection) | |
| scatter.widget.observe( | |
| lambda _: set_selection(scatter.widget.selection), names="selection" | |
| ) | |
| scatter.widget | |
| return pdf, selection | |
| @app.cell(hide_code=True) | |
| def _(GalleryWidget, pdf, pl, selection): | |
| _subset = pdf.iloc[selection()] | |
| GalleryWidget( | |
| pl.from_pandas(_subset.sample(min(100, len(_subset)), replace=False)) | |
| ) | |
| return | |
| @app.cell(hide_code=True) | |
| def _(): | |
| import polars as pl | |
| import marimo as mo | |
| import jscatter | |
| import anywidget | |
| import traitlets | |
| import urllib.request | |
| _GALLERY_WIDGET_ESM = """ | |
| import * as flech from "https://esm.sh/@uwdata/[email protected]"; | |
| function render({ model, el }) { | |
| let objects = flech.tableFromIPC(new Uint8Array(model.get("_ipc").buffer)); | |
| let container = document.createElement("div"); | |
| container.className = "gallery"; | |
| let paginationControls = document.createElement("div"); | |
| paginationControls.className = "pagination-controls"; | |
| let prevButton = document.createElement("button"); | |
| prevButton.innerText = "← Previous"; | |
| let pageIndicator = document.createElement("span"); | |
| pageIndicator.className = "page-indicator"; | |
| let nextButton = document.createElement("button"); | |
| nextButton.innerText = "Next →"; | |
| paginationControls.appendChild(prevButton); | |
| paginationControls.appendChild(pageIndicator); | |
| paginationControls.appendChild(nextButton); | |
| el.appendChild(container); | |
| el.appendChild(paginationControls); | |
| function update() { | |
| container.replaceChildren(); | |
| let size = model.get("size"); | |
| let page = model.get("page"); | |
| let pageSize = model.get("page_size"); | |
| let totalPages = Math.ceil(objects.numRows / pageSize); | |
| container.style.gridTemplateColumns = `repeat(auto-fill, minmax(${size}px, 1fr))`; | |
| let startIdx = page * pageSize; | |
| let endIdx = Math.min(startIdx + pageSize, objects.numRows); | |
| for (let i = startIdx; i < endIdx; i++) { | |
| let row = objects.get(i); | |
| let item = document.createElement("div"); | |
| item.className = "gallery-item"; | |
| let link = Object.assign(document.createElement("a"), { | |
| className: "thumb-link", | |
| href: `https://www.nga.gov/collection/art-object-page.${row.objectid}.html`, | |
| target: "_blank", | |
| rel: "noopener noreferrer", | |
| }); | |
| link.style.width = `${size}px`; | |
| link.style.height = `${size}px`; | |
| let img = Object.assign(document.createElement("img"), { | |
| src: row.thumburl, | |
| alt: row.title, | |
| }); | |
| link.appendChild(img); | |
| if (row.public) { | |
| let badge = Object.assign(document.createElement("img"), { | |
| src: "https://mirrors.creativecommons.org/presskit/icons/zero.svg", | |
| alt: "Public Domain", | |
| className: "public-domain-badge", | |
| }); | |
| link.appendChild(badge); | |
| } | |
| item.appendChild(link); | |
| container.appendChild(item); | |
| } | |
| pageIndicator.innerText = `Page ${page + 1} of ${totalPages}`; | |
| prevButton.disabled = page <= 0; | |
| nextButton.disabled = page >= totalPages - 1; | |
| } | |
| update(); | |
| prevButton.addEventListener("click", () => { | |
| let page = model.get("page"); | |
| if (page > 0) { | |
| model.set("page", page - 1); | |
| model.save_changes(); | |
| } | |
| }); | |
| nextButton.addEventListener("click", () => { | |
| let page = model.get("page"); | |
| let pageSize = model.get("page_size"); | |
| let totalPages = Math.ceil(objects.numRows / pageSize); | |
| if (page < totalPages - 1) { | |
| model.set("page", page + 1); | |
| model.save_changes(); | |
| } | |
| }); | |
| model.on("change:page", update); | |
| model.on("change:size", update); | |
| model.on("change:page_size", update); | |
| } | |
| export default { render }; | |
| """ | |
| _GALLERY_WIDGET_STYLES = """ | |
| .gallery { | |
| display: grid; | |
| gap: 8px; | |
| margin-bottom: 15px; | |
| } | |
| .gallery-item { | |
| position: relative; | |
| text-align: center; | |
| } | |
| .thumb-link { | |
| display: block; | |
| position: relative; | |
| } | |
| .thumb-link img:first-child { | |
| width: 100%; | |
| height: 100%; | |
| object-fit: cover; | |
| border-radius: 5px; | |
| } | |
| .public-domain-badge { | |
| position: absolute; | |
| bottom: 3px; | |
| right: 3px; | |
| width: 20px; | |
| height: 20px; | |
| opacity: 0.6; | |
| } | |
| .pagination-controls { | |
| display: flex; | |
| justify-content: center; | |
| align-items: center; | |
| margin-top: 10px; | |
| gap: 15px; | |
| } | |
| .pagination-controls button { | |
| padding: 5px 10px; | |
| background-color: var(--background); | |
| border: 1px solid #ccc; | |
| border-radius: 4px; | |
| cursor: pointer; | |
| } | |
| .pagination-controls button:disabled { | |
| background-color: var(--background); | |
| color: #999; | |
| cursor: not-allowed; | |
| } | |
| .page-indicator { | |
| font-size: 14px; | |
| } | |
| """ | |
| class GalleryWidget(anywidget.AnyWidget): | |
| _esm = _GALLERY_WIDGET_ESM | |
| _css = _GALLERY_WIDGET_STYLES | |
| _ipc = traitlets.Any().tag(sync=True) | |
| size = traitlets.Int(100).tag(sync=True) | |
| page = traitlets.Int(0).tag(sync=True) | |
| page_size = traitlets.Int(12).tag(sync=True) | |
| def __init__( | |
| self, objects: pl.DataFrame, *, size: int = 90, page_size: int = 20 | |
| ) -> None: | |
| super().__init__( | |
| _ipc=objects.write_ipc(None).getvalue(), | |
| size=size, | |
| page=0, | |
| page_size=page_size, | |
| ) | |
| @mo.persistent_cache | |
| def load_data() -> pl.DataFrame: | |
| # Fetch public domain IDs from GitHub | |
| with urllib.request.urlopen( | |
| "https://raw.githubusercontent.com/manzt/marimo-lunch-and-learn/main/notebooks/public_domain_ids.txt" | |
| ) as response: | |
| public_domain_ids = [ | |
| int(id) | |
| for id in response.read().decode("utf-8").strip().split("\n") | |
| if id | |
| ] | |
| # tsne embedding | |
| tsne = pl.read_parquet( | |
| "https://github.com/manzt/marimo-lunch-and-learn/raw/refs/heads/main/notebooks/tsne.parquet" | |
| ) | |
| # rest of the public database dump | |
| url = "https://github.com/NationalGalleryOfArt/opendata/raw/refs/heads/main/data/" | |
| objects = pl.read_csv(url + "objects.csv", ignore_errors=True).select( | |
| pl.col("objectid"), | |
| pl.col("title"), | |
| pl.col("beginyear").alias("year"), | |
| pl.col("medium"), | |
| pl.col("visualbrowserclassification").alias("type"), | |
| ) | |
| constituents = pl.read_csv( | |
| url + "constituents.csv", ignore_errors=True | |
| ).select( | |
| pl.col("constituentid"), | |
| pl.col("forwarddisplayname").alias("name"), | |
| pl.col("visualbrowsernationality").alias("nationality"), | |
| ) | |
| published_images = pl.read_csv(url + "published_images.csv").select( | |
| pl.col("depictstmsobjectid").alias("objectid"), | |
| pl.col("uuid"), | |
| pl.col("iiifthumburl").alias("thumburl"), | |
| ) | |
| objects_constituents = ( | |
| pl.read_csv(url + "objects_constituents.csv", ignore_errors=True) | |
| .filter(pl.col("role").eq(pl.lit("artist"))) | |
| .sort(by="displayorder") | |
| .group_by("objectid") | |
| .first() | |
| .select("objectid", "constituentid") | |
| ) | |
| return ( | |
| objects.join(objects_constituents, on="objectid") | |
| .join(constituents, on="constituentid") | |
| .join(published_images, on="objectid") | |
| .join(tsne, on="objectid") | |
| .select(pl.col("thumburl"), pl.exclude("constituentid", "thumburl")) | |
| .with_columns( | |
| pl.col("objectid").is_in(public_domain_ids).alias("public") | |
| ) | |
| .sort(by="year", descending=True, nulls_last=True) | |
| ) | |
| return GalleryWidget, jscatter, load_data, mo, pl | |
| @app.cell | |
| def _(): | |
| return | |
| if __name__ == "__main__": | |
| app.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment