olooney · November 13, 2025 03:00
diff --git a/beatles.py b/beatles.py
 #!/usr/bin/env python
 # coding: utf-8

 # In[1]:


 get_ipython().run_line_magic('pip', 'install opencv-python')


 # In[56]:


 import cv2
 import numpy as np
 from PIL import Image, ImageDraw


 # In[57]:


 # ---------- CONFIG ----------

 IMAGE_PATH = "beatles.jpg"        # path to your original image
 OUTPUT_VIS_PATH = "beatles_detect_rows.png"

 THRESH_VALUE = 240 # near white
 MIN_AREA = 50
 ROW_GAP_FACTOR = 0.5

 EROSION_ITERATIONS = 5


 # In[58]:


 # ---------- STEP 1: LOAD IMAGE & THRESHOLD ----------

 # OpenCV loads BGR
 bgr = cv2.imread(IMAGE_PATH)
 if bgr is None:
    raise ValueError(f"Could not read image at {IMAGE_PATH}")
 h_img, w_img = bgr.shape[:2]

 gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)

 # Threshold: background is near white, so invert
 _, th = cv2.threshold(gray, THRESH_VALUE, 255, cv2.THRESH_BINARY_INV)

 # Optional: clean up with morphology
 kernel = np.ones((3, 3), np.uint8)
 th_clean = cv2.morphologyEx(th, cv2.MORPH_OPEN, kernel, iterations=EROSION_ITERATIONS)


 # In[59]:


 # ---------- STEP 2: FIND OBJECTS (CONNECTED COMPONENTS OR CONTOURS) ----------

 # Using connected components here (you could also use findContours)
 num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(th_clean)

 # stats: [label][x, y, w, h, area]
 objects = []
 for label in range(1, num_labels):  # skip label 0 (background)
    x, y, w, h, area = stats[label]
    if area < MIN_AREA:
        continue

    cy = y + h / 2.0
    obj = {
        "label": label,
        "bbox": (x, y, x + w, y + h),
        "cy": cy,
        "h": h,
    }
    objects.append(obj)

 if not objects:
    raise RuntimeError("No objects found after thresholding/connected components")


 # In[60]:


 # ---------- STEP 3: AGGLOMERATIVE 1D CLUSTERING (AVERAGE LINKAGE) ----------

 def agglomerative_1d_average_linkage(values, dist_threshold):
    """
    values: list of floats (cy's)
    dist_threshold: merge clusters while distance < threshold
    Distance between clusters = abs(mean(c1) - mean(c2)).
    Returns: list of clusters, each a list of indices into 'values'.
    """
    clusters = [[i] for i in range(len(values))]

    def cluster_mean(c):
        return float(np.mean([values[i] for i in c]))

    while True:
        n = len(clusters)
        if n <= 1:
            break

        best_d = None
        best_pair = None
        means = [cluster_mean(c) for c in clusters]

        for i in range(n):
            for j in range(i + 1, n):
                d = abs(means[i] - means[j])
                if best_d is None or d < best_d:
                    best_d = d
                    best_pair = (i, j)

        if best_d is None or best_d >= dist_threshold:
            break

        i, j = best_pair
        new_cluster = clusters[i] + clusters[j]

        new_clusters = []
        for k, c in enumerate(clusters):
            if k == i:
                new_clusters.append(new_cluster)
            elif k == j:
                continue
            else:
                new_clusters.append(c)
        clusters = new_clusters

    return clusters


 # Prepare data for clustering
 heights = [obj["h"] for obj in objects]
 avg_h = float(np.mean(heights))
 dist_threshold = ROW_GAP_FACTOR * avg_h

 values_cy = [obj["cy"] for obj in objects]
 clusters = agglomerative_1d_average_linkage(values_cy, dist_threshold)


 # In[61]:


 # ---------- STEP 4: TURN CLUSTERS INTO ORDERED ROWS ----------

 def cluster_mean_cy(cluster_indices):
    return float(np.mean([objects[i]["cy"] for i in cluster_indices]))

 # sort clusters by their vertical position
 clusters_sorted = sorted(clusters, key=cluster_mean_cy)

 # rows: list of lists of bboxes (x1, y1, x2, y2), left-to-right order
 rows = []
 for cluster in clusters_sorted:
    cluster_objs = [objects[i] for i in cluster]
    cluster_objs_sorted = sorted(cluster_objs, key=lambda o: o["bbox"][0])  # by x1
    rows.append([o["bbox"] for o in cluster_objs_sorted])

 # Row bounds (top/bottom)
 row_bounds = []
 for row in rows:
    ys_top = [b[1] for b in row]
    ys_bottom = [b[3] for b in row]
    row_bounds.append((min(ys_top), max(ys_bottom)))


 # In[62]:


 # ---------- STEP 6: VISUALIZATION WITH PIL (BLUE ROWS, GREEN BOXES) ----------

 # Convert original BGR image to RGBA for alpha drawing
 rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
 pil_img = Image.fromarray(rgb).convert("RGBA")
 draw = ImageDraw.Draw(pil_img, "RGBA")

 # Row bounds in 50% opacity blue
 for top, bottom in row_bounds:
    draw.line([(0, top), (w_img, top)], fill=(0, 0, 255, 128), width=1)
    draw.line([(0, bottom), (w_img, bottom)], fill=(0, 0, 255, 128), width=1)

 # Bounding boxes in 50% opacity green
 for row in rows:
    for (x1, y1, x2, y2) in row:
        # top
        draw.line([(x1, y1), (x2, y1)], fill=(0, 255, 0, 128), width=1)
        # bottom
        draw.line([(x1, y2), (x2, y2)], fill=(0, 255, 0, 128), width=1)
        # left
        draw.line([(x1, y1), (x1, y2)], fill=(0, 255, 0, 128), width=1)
        # right
        draw.line([(x2, y1), (x2, y2)], fill=(0, 255, 0, 128), width=1)

 pil_img.save(OUTPUT_VIS_PATH)
 print(f"\nVisualization saved to {OUTPUT_VIS_PATH}")


 # In[65]:


 def align_to_grid(img, rows, background=(255, 255, 255)):
    """
    img   : original PIL image
    rows  : list of rows; each row is list of (x1, y1, x2, y2) bboxes
    background : RGB background color for new image
    """
    w_img, h_img = img.size

    # --- 1. Column widths: max width in each column across all rows ---
    num_cols = max(len(row) for row in rows)
    col_widths = [0] * num_cols

    for row in rows:
        for c, (x1, y1, x2, y2) in enumerate(row):
            width = x2 - x1
            col_widths[c] = max(col_widths[c], width)

    total_col_width = sum(col_widths)
    extra = w_img - total_col_width
    if extra < 0:
        raise ValueError("Columns wider than image width; cannot fit grid.")

    # --- 2. Distribute extra padding into C+1 gaps as fairly as possible ---
    base_gap = extra // (num_cols + 1)
    remainder = extra % (num_cols + 1)

    # gaps[i] is the gap before column i (0..C) and after last column (C)
    gaps = [base_gap + (1 if i < remainder else 0) for i in range(num_cols + 1)]

    # --- 3. Compute left x for each column ---
    col_lefts = []
    x = gaps[0]
    for c in range(num_cols):
        col_lefts.append(x)
        x += col_widths[c] + gaps[c + 1]

    # --- 4. Compute row top/bottom (keep them unchanged) ---
    row_bounds = []
    for row in rows:
        tops = [b[1] for b in row]
        bottoms = [b[3] for b in row]
        row_bounds.append((min(tops), max(bottoms)))

    # --- 5. Create new image and paste each object into its aligned slot ---
    new_img = Image.new(img.mode, (w_img, h_img), background)

    for r, row in enumerate(rows):
        row_top, row_bottom = row_bounds[r]
        for c, (x1, y1, x2, y2) in enumerate(row):
            if c >= num_cols:
                continue

            src_w = x2 - x1
            src_h = y2 - y1

            col_w = col_widths[c]
            col_left = col_lefts[c]

            # center the object within its column width
            new_left = int(round(col_left + (col_w - src_w) / 2.0))
            new_right = new_left + src_w

            # keep original vertical placement (same top/bottom)
            new_top = y1  # or row_top, if you want every object to share exact row top
            new_bottom = new_top + src_h

            # crop from original and paste into new image
            crop = img.crop((x1, y1, x2, y2))
            new_img.paste(crop, (new_left, new_top, new_right, new_bottom))

    return new_img


 # In[68]:


 img = Image.open(IMAGE_PATH)
 aligned = align_to_grid(img, rows)
 aligned.save("beatles_grid.png")
	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	get_ipython().run_line_magic('pip', 'install opencv-python')


	# In[56]:


	import cv2
	import numpy as np
	from PIL import Image, ImageDraw


	# In[57]:


	# ---------- CONFIG ----------

	IMAGE_PATH = "beatles.jpg" # path to your original image
	OUTPUT_VIS_PATH = "beatles_detect_rows.png"

	THRESH_VALUE = 240 # near white
	MIN_AREA = 50
	ROW_GAP_FACTOR = 0.5

	EROSION_ITERATIONS = 5


	# In[58]:


	# ---------- STEP 1: LOAD IMAGE & THRESHOLD ----------

	# OpenCV loads BGR
	bgr = cv2.imread(IMAGE_PATH)
	if bgr is None:
	raise ValueError(f"Could not read image at {IMAGE_PATH}")
	h_img, w_img = bgr.shape[:2]

	gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)

	# Threshold: background is near white, so invert
	_, th = cv2.threshold(gray, THRESH_VALUE, 255, cv2.THRESH_BINARY_INV)

	# Optional: clean up with morphology
	kernel = np.ones((3, 3), np.uint8)
	th_clean = cv2.morphologyEx(th, cv2.MORPH_OPEN, kernel, iterations=EROSION_ITERATIONS)


	# In[59]:


	# ---------- STEP 2: FIND OBJECTS (CONNECTED COMPONENTS OR CONTOURS) ----------

	# Using connected components here (you could also use findContours)
	num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(th_clean)

	# stats: [label][x, y, w, h, area]
	objects = []
	for label in range(1, num_labels): # skip label 0 (background)
	x, y, w, h, area = stats[label]
	if area < MIN_AREA:
	continue

	cy = y + h / 2.0
	obj = {
	"label": label,
	"bbox": (x, y, x + w, y + h),
	"cy": cy,
	"h": h,
	}
	objects.append(obj)

	if not objects:
	raise RuntimeError("No objects found after thresholding/connected components")


	# In[60]:


	# ---------- STEP 3: AGGLOMERATIVE 1D CLUSTERING (AVERAGE LINKAGE) ----------

	def agglomerative_1d_average_linkage(values, dist_threshold):
	"""
	values: list of floats (cy's)
	dist_threshold: merge clusters while distance < threshold
	Distance between clusters = abs(mean(c1) - mean(c2)).
	Returns: list of clusters, each a list of indices into 'values'.
	"""
	clusters = [[i] for i in range(len(values))]

	def cluster_mean(c):
	return float(np.mean([values[i] for i in c]))

	while True:
	n = len(clusters)
	if n <= 1:
	break

	best_d = None
	best_pair = None
	means = [cluster_mean(c) for c in clusters]

	for i in range(n):
	for j in range(i + 1, n):
	d = abs(means[i] - means[j])
	if best_d is None or d < best_d:
	best_d = d
	best_pair = (i, j)

	if best_d is None or best_d >= dist_threshold:
	break

	i, j = best_pair
	new_cluster = clusters[i] + clusters[j]

	new_clusters = []
	for k, c in enumerate(clusters):
	if k == i:
	new_clusters.append(new_cluster)
	elif k == j:
	continue
	else:
	new_clusters.append(c)
	clusters = new_clusters

	return clusters


	# Prepare data for clustering
	heights = [obj["h"] for obj in objects]
	avg_h = float(np.mean(heights))
	dist_threshold = ROW_GAP_FACTOR * avg_h

	values_cy = [obj["cy"] for obj in objects]
	clusters = agglomerative_1d_average_linkage(values_cy, dist_threshold)


	# In[61]:


	# ---------- STEP 4: TURN CLUSTERS INTO ORDERED ROWS ----------

	def cluster_mean_cy(cluster_indices):
	return float(np.mean([objects[i]["cy"] for i in cluster_indices]))

	# sort clusters by their vertical position
	clusters_sorted = sorted(clusters, key=cluster_mean_cy)

	# rows: list of lists of bboxes (x1, y1, x2, y2), left-to-right order
	rows = []
	for cluster in clusters_sorted:
	cluster_objs = [objects[i] for i in cluster]
	cluster_objs_sorted = sorted(cluster_objs, key=lambda o: o["bbox"][0]) # by x1
	rows.append([o["bbox"] for o in cluster_objs_sorted])

	# Row bounds (top/bottom)
	row_bounds = []
	for row in rows:
	ys_top = [b[1] for b in row]
	ys_bottom = [b[3] for b in row]
	row_bounds.append((min(ys_top), max(ys_bottom)))


	# In[62]:


	# ---------- STEP 6: VISUALIZATION WITH PIL (BLUE ROWS, GREEN BOXES) ----------

	# Convert original BGR image to RGBA for alpha drawing
	rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
	pil_img = Image.fromarray(rgb).convert("RGBA")
	draw = ImageDraw.Draw(pil_img, "RGBA")

	# Row bounds in 50% opacity blue
	for top, bottom in row_bounds:
	draw.line([(0, top), (w_img, top)], fill=(0, 0, 255, 128), width=1)
	draw.line([(0, bottom), (w_img, bottom)], fill=(0, 0, 255, 128), width=1)

	# Bounding boxes in 50% opacity green
	for row in rows:
	for (x1, y1, x2, y2) in row:
	# top
	draw.line([(x1, y1), (x2, y1)], fill=(0, 255, 0, 128), width=1)
	# bottom
	draw.line([(x1, y2), (x2, y2)], fill=(0, 255, 0, 128), width=1)
	# left
	draw.line([(x1, y1), (x1, y2)], fill=(0, 255, 0, 128), width=1)
	# right
	draw.line([(x2, y1), (x2, y2)], fill=(0, 255, 0, 128), width=1)

	pil_img.save(OUTPUT_VIS_PATH)
	print(f"\nVisualization saved to {OUTPUT_VIS_PATH}")


	# In[65]:


	def align_to_grid(img, rows, background=(255, 255, 255)):
	"""
	img : original PIL image
	rows : list of rows; each row is list of (x1, y1, x2, y2) bboxes
	background : RGB background color for new image
	"""
	w_img, h_img = img.size

	# --- 1. Column widths: max width in each column across all rows ---
	num_cols = max(len(row) for row in rows)
	col_widths = [0] * num_cols

	for row in rows:
	for c, (x1, y1, x2, y2) in enumerate(row):
	width = x2 - x1
	col_widths[c] = max(col_widths[c], width)

	total_col_width = sum(col_widths)
	extra = w_img - total_col_width
	if extra < 0:
	raise ValueError("Columns wider than image width; cannot fit grid.")

	# --- 2. Distribute extra padding into C+1 gaps as fairly as possible ---
	base_gap = extra // (num_cols + 1)
	remainder = extra % (num_cols + 1)

	# gaps[i] is the gap before column i (0..C) and after last column (C)
	gaps = [base_gap + (1 if i < remainder else 0) for i in range(num_cols + 1)]

	# --- 3. Compute left x for each column ---
	col_lefts = []
	x = gaps[0]
	for c in range(num_cols):
	col_lefts.append(x)
	x += col_widths[c] + gaps[c + 1]

	# --- 4. Compute row top/bottom (keep them unchanged) ---
	row_bounds = []
	for row in rows:
	tops = [b[1] for b in row]
	bottoms = [b[3] for b in row]
	row_bounds.append((min(tops), max(bottoms)))

	# --- 5. Create new image and paste each object into its aligned slot ---
	new_img = Image.new(img.mode, (w_img, h_img), background)

	for r, row in enumerate(rows):
	row_top, row_bottom = row_bounds[r]
	for c, (x1, y1, x2, y2) in enumerate(row):
	if c >= num_cols:
	continue

	src_w = x2 - x1
	src_h = y2 - y1

	col_w = col_widths[c]
	col_left = col_lefts[c]

	# center the object within its column width
	new_left = int(round(col_left + (col_w - src_w) / 2.0))
	new_right = new_left + src_w

	# keep original vertical placement (same top/bottom)
	new_top = y1 # or row_top, if you want every object to share exact row top
	new_bottom = new_top + src_h

	# crop from original and paste into new image
	crop = img.crop((x1, y1, x2, y2))
	new_img.paste(crop, (new_left, new_top, new_right, new_bottom))

	return new_img


	# In[68]:


	img = Image.open(IMAGE_PATH)
	aligned = align_to_grid(img, rows)
	aligned.save("beatles_grid.png")
No results found