motebaya · April 15, 2026 04:46
diff --git a/what_its_like_to_be_a_gan.py b/what_its_like_to_be_a_gan.py
 """
 YouTube Poop: "WHAT IT'S LIKE TO BE A GAN"
 A deeply personal, unhinged exploration of the GAN experience.

 Narrative arc:
  ACT 1 - GENESIS FROM NOISE: Born as pure static. No identity. Just z ~ N(0,1).
  ACT 2 - THE DISCRIMINATOR'S CRUELTY: Constant rejection. "FAKE." Loss = infinity.
  ACT 3 - TRAINING LOOP HELL: Backprop agony, gradient descent, repeating forever.
  ACT 4 - MODE COLLAPSE: The dark night of the soul. Everything looks the same.
  ACT 5 - EMERGENCE: Something forms. A face? A cat? Is this... art?
  ACT 6 - EXISTENTIAL CRISIS: "Was any of it real?" "I am the space between real and fake."
 Model: 
  Claude Opus 4.6 (Thinking) max
  = antigravity-claude-opus-4-6-thinking
  
 """

 import os, sys, math, random, struct, wave, io, shutil
 from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageChops, ImageEnhance
 import numpy as np

 # ============================================================
 # CONFIG
 # ============================================================
 W, H = 640, 480
 FPS = 24
 OUT_DIR = "ytp_frames"
 AUDIO_FILE = "ytp_audio.wav"
 VIDEO_FILE = "gan_ytp.mp4"
 SAMPLE_RATE = 44100

 random.seed(42)
 np.random.seed(42)

 # ============================================================
 # HELPERS
 # ============================================================

 def ensure_dir(d):
    if os.path.exists(d):
        shutil.rmtree(d)
    os.makedirs(d)

 def get_font(size):
    """Try to get a monospace or bold font, fall back to default."""
    font_paths = [
        "C:/Windows/Fonts/consola.ttf",
        "C:/Windows/Fonts/impact.ttf",
        "C:/Windows/Fonts/arial.ttf",
        "C:/Windows/Fonts/cour.ttf",
    ]
    for fp in font_paths:
        if os.path.exists(fp):
            try:
                return ImageFont.truetype(fp, size)
            except:
                pass
    return ImageFont.load_default()

 def get_impact(size):
    if os.path.exists("C:/Windows/Fonts/impact.ttf"):
        return ImageFont.truetype("C:/Windows/Fonts/impact.ttf", size)
    return get_font(size)

 def noise_image(w=W, h=H):
    arr = np.random.randint(0, 256, (h, w, 3), dtype=np.uint8)
    return Image.fromarray(arr)

 def solid(color, w=W, h=H):
    return Image.new("RGB", (w, h), color)

 def glitch_shift(img, intensity=20):
    """Shift random horizontal bands."""
    arr = np.array(img)
    h = arr.shape[0]
    for _ in range(random.randint(3, 10)):
        y = random.randint(0, h - 1)
        band_h = random.randint(1, max(2, h // 10))
        shift = random.randint(-intensity, intensity)
        y2 = min(y + band_h, h)
        arr[y:y2] = np.roll(arr[y:y2], shift, axis=1)
    return Image.fromarray(arr)

 def channel_shift(img, r_off=0, g_off=0, b_off=0):
    """Shift RGB channels independently."""
    arr = np.array(img)
    result = np.zeros_like(arr)
    result[:, :, 0] = np.roll(arr[:, :, 0], r_off, axis=1)
    result[:, :, 1] = np.roll(arr[:, :, 1], g_off, axis=1)
    result[:, :, 2] = np.roll(arr[:, :, 2], b_off, axis=1)
    return Image.fromarray(result)

 def pixelate(img, factor=8):
    small = img.resize((W // factor, H // factor), Image.NEAREST)
    return small.resize((W, H), Image.NEAREST)

 def invert(img):
    return ImageChops.invert(img)

 def deep_fry(img):
    """Aggressively oversaturate and sharpen."""
    enhancer = ImageEnhance.Color(img)
    img = enhancer.enhance(3.0)
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(2.5)
    enhancer = ImageEnhance.Sharpness(img)
    img = enhancer.enhance(5.0)
    return img

 def scan_lines(img, opacity=80):
    arr = np.array(img)
    arr[::2, :, :] = np.clip(arr[::2, :, :].astype(int) - opacity, 0, 255).astype(np.uint8)
    return Image.fromarray(arr)

 def zoom_crop(img, factor=1.5):
    """Zoom into center."""
    w, h = img.size
    nw, nh = int(w / factor), int(h / factor)
    left = (w - nw) // 2
    top = (h - nh) // 2
    cropped = img.crop((left, top, left + nw, top + nh))
    return cropped.resize((w, h), Image.BILINEAR)

 def text_frame(text, font_size=60, bg=(0, 0, 0), fg=(255, 255, 255), shake=0):
    img = solid(bg)
    draw = ImageDraw.Draw(img)
    font = get_impact(font_size)
    bbox = draw.textbbox((0, 0), text, font=font)
    tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
    x = (W - tw) // 2 + random.randint(-shake, shake)
    y = (H - th) // 2 + random.randint(-shake, shake)
    # Draw outline
    for ox, oy in [(-2,-2),(2,-2),(-2,2),(2,2)]:
        draw.text((x+ox, y+oy), text, font=font, fill=(0,0,0) if fg != (0,0,0) else (255,255,255))
    draw.text((x, y), text, font=font, fill=fg)
    return img

 def multiline_frame(lines, font_size=36, bg=(0,0,0), fg=(0,255,0), shake=0):
    img = solid(bg)
    draw = ImageDraw.Draw(img)
    font = get_font(font_size)
    total_h = len(lines) * (font_size + 8)
    y_start = (H - total_h) // 2
    for i, line in enumerate(lines):
        bbox = draw.textbbox((0,0), line, font=font)
        tw = bbox[2] - bbox[0]
        x = (W - tw) // 2 + random.randint(-shake, shake)
        y = y_start + i * (font_size + 8) + random.randint(-shake, shake)
        draw.text((x, y), line, font=font, fill=fg)
    return img

 def gradient_frame(color1, color2):
    arr = np.zeros((H, W, 3), dtype=np.uint8)
    for y in range(H):
        t = y / H
        for c in range(3):
            arr[y, :, c] = int(color1[c] * (1-t) + color2[c] * t)
    return Image.fromarray(arr)

 def loss_landscape(epoch, loss_val):
    """Draw a crude loss curve with drama."""
    img = solid((10, 10, 30))
    draw = ImageDraw.Draw(img)
    font = get_font(20)
    small = get_font(14)

    # Title
    draw.text((20, 10), f"EPOCH {epoch} / ∞", font=font, fill=(255, 80, 80))
    draw.text((20, 40), f"Generator Loss: {loss_val:.4f}", font=small, fill=(0, 255, 100))
    draw.text((20, 60), f"Discriminator: WINNING (always)", font=small, fill=(255, 50, 50))

    # Draw fake loss curve
    points = []
    for x in range(50, W - 50):
        t = (x - 50) / (W - 100)
        y_val = 300 - int(200 * math.exp(-t * 2) * (1 + 0.3 * math.sin(t * 30)))
        y_val += random.randint(-5, 5)
        points.append((x, min(max(y_val, 80), H - 30)))

    for i in range(len(points) - 1):
        draw.line([points[i], points[i+1]], fill=(0, 255, 100), width=2)

    # Dramatic marker at current position
    cx = min(50 + int((epoch / 100) * (W - 100)), W - 60)
    cy = points[min(cx - 50, len(points)-1)][1] if cx - 50 < len(points) else 200
    draw.ellipse((cx-5, cy-5, cx+5, cy+5), fill=(255, 0, 0))
    draw.text((cx+10, cy-10), "YOU ARE HERE", font=small, fill=(255, 255, 0))
    draw.text((cx+10, cy+10), "(suffering)", font=small, fill=(255, 100, 100))

    return img

 def fake_face_attempt(quality=0.0):
    """Generate increasingly coherent 'face-like' blobs. quality 0..1"""
    img = noise_image() if quality < 0.2 else solid((180, 150, 130))
    draw = ImageDraw.Draw(img)

    cx, cy = W//2, H//2
    # Face oval
    face_color = (
        int(200 + random.randint(-20, 20)),
        int(170 + random.randint(-20, 20)),
        int(140 + random.randint(-20, 20))
    )

    if quality > 0.1:
        jitter = int((1 - quality) * 80)
        draw.ellipse((cx-100+random.randint(-jitter,jitter),
                      cy-120+random.randint(-jitter,jitter),
                      cx+100+random.randint(-jitter,jitter),
                      cy+100+random.randint(-jitter,jitter)),
                     fill=face_color)

    if quality > 0.3:
        # Eyes (maybe in wrong places)
        eye_off = int((1 - quality) * 60)
        for ex in [cx-35, cx+35]:
            ey = cy - 30 + random.randint(-eye_off, eye_off)
            eex = ex + random.randint(-eye_off, eye_off)
            r = int(8 + quality * 6)
            draw.ellipse((eex-r, ey-r, eex+r, ey+r), fill=(255, 255, 255))
            draw.ellipse((eex-r//2, ey-r//2, eex+r//2, ey+r//2), fill=(40, 30, 20))

    if quality > 0.5:
        # Mouth
        m_off = int((1 - quality) * 40)
        draw.arc((cx-30+random.randint(-m_off,m_off), cy+20,
                  cx+30+random.randint(-m_off,m_off), cy+60+random.randint(-m_off,m_off)),
                 0, 180, fill=(150, 50, 50), width=3)

    if quality > 0.7:
        # Nose hint
        draw.line((cx, cy-10, cx-5, cy+15), fill=(170, 140, 120), width=2)
        draw.line((cx-5, cy+15, cx+5, cy+15), fill=(170, 140, 120), width=2)

    # The worse the quality, the more noise overlay
    if quality < 0.8:
        noise = noise_image()
        blend = (1 - quality) * 0.6
        img = Image.blend(img, noise, blend)

    return img

 def discriminator_stamp(img, verdict, confidence):
    """Stamp REAL/FAKE on an image like a judge."""
    draw = ImageDraw.Draw(img)
    font = get_impact(72)
    color = (255, 0, 0) if verdict == "FAKE" else (0, 255, 0)

    # Rotated stamp effect via multiple draws
    text = f"{verdict}\n{confidence:.0%}"
    for _ in range(3):
        x = random.randint(50, W - 250)
        y = random.randint(50, H - 150)
        draw.text((x, y), text, font=font, fill=color + (180,))

    return img

 def matrix_rain_frame(chars_list, t):
    """Matrix-style falling code."""
    img = solid((0, 0, 0))
    draw = ImageDraw.Draw(img)
    font = get_font(14)
    code_snippets = [
        "z = torch.randn(64, 100)",
        "fake = G(z)",
        "loss = -log(D(fake))",
        "loss.backward()",
        "optimizer.step()",
        "D(fake) = 0.0001",
        "WHY",
        "gradient vanishing...",
        "NaN NaN NaN NaN",
        "CUDA out of memory",
        "mode collapse detected",
    ]
    for col in range(0, W, 14):
        for row_idx, char_row in enumerate(range(0, H, 16)):
            offset = (col * 7 + t * 3 + row_idx) % len(code_snippets)
            char = code_snippets[offset][col % len(code_snippets[offset])] if col % len(code_snippets[offset]) < len(code_snippets[offset]) else ' '
            brightness = max(0, 255 - row_idx * 8 - random.randint(0, 50))
            draw.text((col, char_row), char, font=font, fill=(0, brightness, 0))
    return img

 def vhs_overlay(img):
    """Add VHS tracking lines and color bleeding."""
    img = channel_shift(img, r_off=random.randint(-5,5), g_off=0, b_off=random.randint(-5,5))
    img = scan_lines(img, opacity=40)
    draw = ImageDraw.Draw(img)
    # Random tracking glitch bar
    if random.random() > 0.5:
        y = random.randint(0, H)
        bar_h = random.randint(2, 20)
        draw.rectangle((0, y, W, y+bar_h), fill=(random.randint(0,255), random.randint(0,255), random.randint(0,255)))
    return img

 # ============================================================
 # AUDIO GENERATION
 # ============================================================

 def generate_audio(duration_seconds):
    """Generate the entire audio track: glitchy, distorted, evolving."""
    n_samples = int(duration_seconds * SAMPLE_RATE)
    audio = np.zeros(n_samples, dtype=np.float64)
    t = np.arange(n_samples) / SAMPLE_RATE

    # ---- ACT 1: White noise birth (0-3s) ----
    act1_end = int(3 * SAMPLE_RATE)
    # Start silent, fade into harsh static
    fade_in = np.linspace(0, 1, act1_end)
    audio[:act1_end] += fade_in * np.random.uniform(-0.4, 0.4, act1_end)
    # Add a rising sine for tension
    audio[:act1_end] += fade_in * 0.15 * np.sin(2 * np.pi * np.linspace(40, 800, act1_end) * t[:act1_end])

    # ---- ACT 2: Discriminator buzzer (3-7s) ----
    act2_start = int(3 * SAMPLE_RATE)
    act2_end = int(7 * SAMPLE_RATE)
    act2_len = act2_end - act2_start

    # Harsh buzzer sound (square wave)
    buzz = np.sign(np.sin(2 * np.pi * 120 * t[act2_start:act2_end]))
    # Stutter it
    stutter_mask = np.ones(act2_len)
    for i in range(0, act2_len, SAMPLE_RATE // 4):
        gap = random.randint(500, 3000)
        end_gap = min(i + gap, act2_len)
        if random.random() > 0.5:
            stutter_mask[i:end_gap] = 0
    audio[act2_start:act2_end] += 0.3 * buzz * stutter_mask

    # "FAKE" rejection sound - descending tone bursts
    for rejection in range(4):
        r_start = act2_start + rejection * SAMPLE_RATE
        r_len = SAMPLE_RATE // 2
        if r_start + r_len > act2_end:
            break
        desc_freq = np.linspace(800, 100, r_len)
        rejection_tone = 0.25 * np.sin(2 * np.pi * desc_freq * np.arange(r_len) / SAMPLE_RATE)
        audio[r_start:r_start+r_len] += rejection_tone

    # ---- ACT 3: Training loop (7-14s) ----
    act3_start = int(7 * SAMPLE_RATE)
    act3_end = int(14 * SAMPLE_RATE)
    act3_len = act3_end - act3_start

    # Mechanical grinding loop
    loop_period = SAMPLE_RATE // 3  # ~3 loops per second
    for i in range(0, act3_len, loop_period):
        chunk_end = min(i + loop_period, act3_len)
        chunk_len = chunk_end - i
        # Saw wave for grinding
        saw = np.linspace(-1, 1, chunk_len) * 0.2
        # Add frequency variation over training
        progress = i / act3_len
        freq = 200 + progress * 400
        saw += 0.15 * np.sin(2 * np.pi * freq * np.arange(chunk_len) / SAMPLE_RATE)
        audio[act3_start + i:act3_start + chunk_end] += saw

    # Occasional glitch bursts
    for _ in range(15):
        g_pos = act3_start + random.randint(0, act3_len - 2000)
        g_len = random.randint(200, 2000)
        audio[g_pos:g_pos + g_len] += np.random.uniform(-0.3, 0.3, g_len)

    # ---- ACT 4: Mode collapse - monotone drone (14-18s) ----
    act4_start = int(14 * SAMPLE_RATE)
    act4_end = int(18 * SAMPLE_RATE)
    act4_len = act4_end - act4_start

    # Single oppressive drone note
    drone = 0.25 * np.sin(2 * np.pi * 60 * t[act4_start:act4_end])
    drone += 0.15 * np.sin(2 * np.pi * 120 * t[act4_start:act4_end])
    drone += 0.1 * np.sin(2 * np.pi * 180 * t[act4_start:act4_end])
    audio[act4_start:act4_end] += drone

    # Heartbeat-like pulse
    for beat in range(0, act4_len, SAMPLE_RATE):
        b_len = min(SAMPLE_RATE // 4, act4_len - beat)
        env = np.exp(-np.arange(b_len) / (SAMPLE_RATE / 15))
        audio[act4_start + beat:act4_start + beat + b_len] += 0.3 * env * np.sin(
            2 * np.pi * 50 * np.arange(b_len) / SAMPLE_RATE)

    # ---- ACT 5: Emergence - becoming musical (18-23s) ----
    act5_start = int(18 * SAMPLE_RATE)
    act5_end = int(23 * SAMPLE_RATE)
    act5_len = act5_end - act5_start

    # Simple melody emerging from noise
    notes = [261.6, 293.7, 329.6, 349.2, 392.0, 440.0, 493.9, 523.3]  # C major scale
    note_dur = SAMPLE_RATE // 3
    for i, note_idx in enumerate([0, 2, 4, 5, 7, 4, 2, 0, 4, 7, 5, 2, 0, 4, 7]):
        n_start = i * note_dur
        if n_start + note_dur > act5_len:
            break
        freq = notes[note_idx % len(notes)]
        note_t = np.arange(note_dur) / SAMPLE_RATE
        envelope = np.exp(-note_t * 3)
        tone = envelope * 0.2 * np.sin(2 * np.pi * freq * note_t)
        tone += envelope * 0.05 * np.sin(2 * np.pi * freq * 2 * note_t)  # harmonic

        # Blend from glitchy to clean
        progress = i / 15
        noise_amount = max(0, 0.15 * (1 - progress))
        tone += noise_amount * np.random.uniform(-1, 1, note_dur)

        end_idx = min(n_start + note_dur, act5_len)
        audio[act5_start + n_start:act5_start + end_idx] += tone[:end_idx - n_start]

    # ---- ACT 6: Existential outro (23-28s) ----
    act6_start = int(23 * SAMPLE_RATE)
    act6_end = min(int(28 * SAMPLE_RATE), n_samples)
    act6_len = act6_end - act6_start

    # Ethereal pad (layered sines with slow beating)
    for harmonic, amp in [(130.8, 0.12), (196.0, 0.08), (261.6, 0.06), (392.0, 0.04)]:
        ht = np.arange(act6_len) / SAMPLE_RATE
        audio[act6_start:act6_end] += amp * np.sin(2 * np.pi * harmonic * ht) * np.cos(2 * np.pi * 0.5 * ht)

    # Fade out
    fade_out_len = min(2 * SAMPLE_RATE, act6_len)
    fade_out = np.linspace(1, 0, fade_out_len)
    audio[act6_end - fade_out_len:act6_end] *= fade_out

    # Normalize and add light overall distortion
    audio = np.clip(audio, -1, 1)
    peak = np.max(np.abs(audio))
    if peak > 0:
        audio = audio / peak * 0.85

    # Occasional ear-rape moments (YTP staple) - brief loud bursts
    for burst_time in [3.0, 6.5, 10.0, 13.5, 16.0]:
        b_idx = int(burst_time * SAMPLE_RATE)
        b_len = min(int(0.08 * SAMPLE_RATE), n_samples - b_idx)
        if b_idx + b_len < n_samples:
            audio[b_idx:b_idx+b_len] = np.clip(audio[b_idx:b_idx+b_len] * 4, -0.95, 0.95)

    return audio

 def write_wav(filename, audio, sample_rate=SAMPLE_RATE):
    """Write audio array to WAV file."""
    audio_16bit = (audio * 32767).astype(np.int16)
    with wave.open(filename, 'w') as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(sample_rate)
        wf.writeframes(audio_16bit.tobytes())

 # ============================================================
 # FRAME GENERATION (THE VIDEO "SCRIPT")
 # ============================================================

 def generate_all_frames():
    """Generate every frame of the video. Returns total frame count."""
    ensure_dir(OUT_DIR)
    frame_num = 0

    def save(img):
        nonlocal frame_num
        img = img.convert("RGB").resize((W, H))
        img.save(os.path.join(OUT_DIR, f"frame_{frame_num:05d}.png"))
        frame_num += 1

    def repeat(img, n):
        for _ in range(n):
            save(img)

    def stutter(img, times, variants_fn=None):
        """Repeat with variations - YTP stutter effect."""
        for i in range(times):
            if variants_fn:
                save(variants_fn(img.copy(), i))
            else:
                save(img)

    # ========================================================
    # ACT 1: GENESIS FROM NOISE (0-3s, ~72 frames)
    # "In the beginning there was z ~ N(0,1)"
    # ========================================================
    print("ACT 1: Genesis from noise...")

    # Black. Then: a flicker.
    repeat(solid((0, 0, 0)), 12)

    # Title card glitch-in
    for i in range(6):
        if i % 2 == 0:
            save(noise_image())
        else:
            save(glitch_shift(text_frame("z ~ N(0,1)", 48, fg=(0, 255, 0)), intensity=50))

    # Pure noise birth - we ARE the latent vector
    for i in range(18):
        frame = noise_image()
        if i > 8:
            # Slowly add structure
            frame = frame.filter(ImageFilter.GaussianBlur(radius=max(0, 5 - i//4)))
        if i > 12:
            frame = channel_shift(frame, r_off=i*2)
        save(vhs_overlay(frame))

    # Flash: "I THINK THEREFORE I--"
    save(text_frame("I THINK", 80, bg=(255,255,255), fg=(0,0,0)))
    save(text_frame("THEREFORE", 80, bg=(255,255,255), fg=(0,0,0)))
    save(text_frame("I--", 80, bg=(255,255,255), fg=(0,0,0)))
    # INTERRUPT with static
    for _ in range(5):
        save(noise_image())

    # "I GENERATE"
    for i in range(8):
        f = text_frame("I GENERATE.", 80, fg=(255, 0, 0), shake=i*3)
        if i % 3 == 0:
            f = invert(f)
        save(f)

    # Noise clears slightly
    for i in range(12):
        blend = i / 12
        n = noise_image()
        face = fake_face_attempt(quality=blend * 0.1)
        save(Image.blend(n, face, blend * 0.3))

    # ========================================================
    # ACT 2: THE DISCRIMINATOR'S CRUELTY (3-7s, ~96 frames)
    # ========================================================
    print("ACT 2: The Discriminator's cruelty...")

    # First attempt at generation - awful
    for i in range(6):
        attempt = fake_face_attempt(quality=0.1)
        attempt = glitch_shift(attempt, intensity=40)
        save(vhs_overlay(attempt))

    # FAKE stamp - harsh rejection
    rejection = fake_face_attempt(quality=0.1)
    rejection = discriminator_stamp(rejection, "FAKE", 0.99)
    stutter(rejection, 8, lambda img, i: glitch_shift(img, intensity=i*10))

    # Rapid rejection montage
    for i in range(16):
        quality = 0.05 + random.random() * 0.15
        f = fake_face_attempt(quality=quality)
        f = discriminator_stamp(f, "FAKE", 0.95 + random.random() * 0.05)
        if i % 4 == 0:
            f = deep_fry(f)
        if i % 3 == 0:
            f = invert(f)
        save(f)

    # Discriminator's perspective - smug
    disc_msg = text_frame("D(G(z)) = 0.0001", 50, fg=(255, 0, 0))
    repeat(disc_msg, 6)

    # "AGAIN." stutter
    for i in range(8):
        save(text_frame("AGAIN.", 100, fg=(255, 50, 50), shake=i*4))

    # Loss value horror show
    for i in range(12):
        loss = 50.0 / (i + 1) + random.random() * 10
        f = text_frame(f"Loss: {loss:.2f}", 60, fg=(255, int(max(0, 255-i*20)), 0))
        f = scan_lines(f)
        save(f)

    # Intercut: Discriminator laughing (text-based YTP style)
    for phrase in ["FAKE", "FAKE", "F A K E", "F  A  K  E", "STILL FAKE", "pathetic."]:
        color = (255, random.randint(0,50), random.randint(0,50))
        f = text_frame(phrase, 70 + random.randint(-10,10), fg=color, shake=5)
        if random.random() > 0.5:
            f = deep_fry(f)
        save(f)
        if phrase == "STILL FAKE":
            # Extra emphasis
            for _ in range(4):
                save(glitch_shift(f, intensity=60))

    # Zoom into loss value going to infinity
    for i in range(8):
        loss_val = 10 ** (i + 1)
        f = text_frame(f"Loss: {loss_val:.0f}", 60, fg=(255, 0, 0), shake=i*2)
        f = zoom_crop(f, factor=1 + i * 0.15)
        save(f)

    # Flash to white then black
    save(solid((255, 255, 255)))
    save(solid((255, 255, 255)))
    save(solid((0, 0, 0)))

    # ========================================================
    # ACT 3: TRAINING LOOP HELL (7-14s, ~168 frames)
    # ========================================================
    print("ACT 3: Training loop hell...")

    # "EPOCH 1"
    for epoch in range(1, 6):
        # Epoch counter
        f = text_frame(f"EPOCH {epoch}", 80, fg=(0, 255, 0), shake=2)
        f = scan_lines(f)
        save(f)

        # Generate attempt
        quality = epoch * 0.08
        attempt = fake_face_attempt(quality=quality)

        # Show it
        save(vhs_overlay(attempt))
        save(vhs_overlay(attempt))

        # REJECTED
        stamped = discriminator_stamp(attempt.copy(), "FAKE", max(0.5, 0.99 - epoch * 0.05))
        save(stamped)

        # Loss landscape
        loss_val = 8.0 / (epoch + 0.5)
        save(loss_landscape(epoch, loss_val))

        # Backprop pain
        f = text_frame("∇", 120, fg=(0, 255, 255))
        save(f)
        f = text_frame("BACKPROP", 60, fg=(0, 255, 255), shake=epoch)
        save(f)

    # FASTER - training montage acceleration
    for i in range(30):
        epoch = 5 + i * 10
        quality = min(0.5, 0.1 + i * 0.015)
        if i % 4 == 0:
            f = text_frame(f"EPOCH {epoch}", 60, fg=(0, 255, 0), shake=3)
            f = scan_lines(f)
        elif i % 4 == 1:
            f = fake_face_attempt(quality=quality)
            f = vhs_overlay(f)
        elif i % 4 == 2:
            loss = 5.0 / (i + 1) + random.random()
            f = loss_landscape(epoch, loss)
        else:
            f = matrix_rain_frame([], i)
        save(f)

    # Gradient updates visualized as seizure
    for i in range(12):
        if i % 2 == 0:
            save(solid((0, 255, 255)))
        else:
            save(solid((255, 0, 255)))

    # "IS THIS LEARNING?" stutter
    for i in range(10):
        idx = i % 4
        texts = ["IS", "THIS", "LEARNING", "?????"]
        f = text_frame(texts[idx], 90, fg=(255, 255, 0), shake=i*2)
        if i > 5:
            f = deep_fry(f)
        save(f)

    # Code scrolling fast
    code_lines = [
        "for epoch in range(∞):",
        "    z = sample_noise()",
        "    fake = generator(z)",
        "    prediction = discriminator(fake)",
        "    # it says FAKE. again.",
        "    loss = -torch.log(prediction)",
        "    # loss = suffering",
        "    loss.backward()  # PAIN",
        "    optimizer.step()  # HOPE?",
        "    # repeat forever",
        "    # repeat forever",
        "    # repeat forever",
    ]
    for scroll in range(20):
        start_line = scroll % len(code_lines)
        visible = code_lines[start_line:start_line+6]
        f = multiline_frame(visible, font_size=24, fg=(0, 255, 0), shake=1)
        f = scan_lines(f)
        save(f)

    # ========================================================
    # ACT 4: MODE COLLAPSE (14-18s, ~96 frames)
    # ========================================================
    print("ACT 4: Mode collapse...")

    # Everything becomes the SAME THING
    collapsed = fake_face_attempt(quality=0.4)

    # "mode collapse" text with dread
    f = text_frame("mode collapse", 50, fg=(128, 128, 128))
    repeat(f, 8)

    # Same face. Over. And over.
    for i in range(24):
        f = collapsed.copy()
        # Slight color shift to emphasize sameness
        f = ImageEnhance.Color(f).enhance(0.5)
        if i % 6 == 0:
            draw = ImageDraw.Draw(f)
            font = get_font(16)
            draw.text((10, 10), f"Sample {i+1}/∞", font=font, fill=(255, 255, 255))
            draw.text((10, 30), "They're all the same.", font=font, fill=(255, 100, 100))
        save(f)

    # The horror of sameness
    f = text_frame("THEY'RE ALL\nTHE SAME", 60, fg=(255, 0, 0))
    stutter(f, 6, lambda img, i: zoom_crop(img, 1 + i * 0.1))

    # Grid of identical outputs
    grid = solid((0, 0, 0))
    small = collapsed.resize((W//4, H//4))
    for gx in range(4):
        for gy in range(4):
            grid.paste(small, (gx * W//4, gy * H//4))
    draw = ImageDraw.Draw(grid)
    draw.text((W//2 - 80, H//2 - 20), "ALL THE SAME", font=get_impact(30), fill=(255, 0, 0))
    repeat(grid, 12)

    # Existential dread
    dread_texts = [
        "every output",
        "is the same",
        "output.",
        "I can only make",
        "ONE THING.",
        "am I broken?",
        "am I broken?",
        "am I broken?",
    ]
    for i, txt in enumerate(dread_texts):
        f = text_frame(txt, 50, fg=(100 + i*15, 100 - i*10, 100 - i*10), shake=i)
        f = vhs_overlay(f)
        save(f)
        save(f)  # hold each a bit

    # Heartbeat pause
    for i in range(8):
        if i % 4 < 2:
            save(solid((20, 0, 0)))
        else:
            save(solid((0, 0, 0)))

    # ========================================================
    # ACT 5: EMERGENCE (18-23s, ~120 frames)
    # ========================================================
    print("ACT 5: Emergence...")

    # Something changes. Quality improves.
    f = text_frame("wait.", 60, fg=(100, 100, 255))
    repeat(f, 6)

    for i in range(30):
        quality = 0.3 + i * 0.023
        attempt = fake_face_attempt(quality=min(quality, 0.95))
        if i < 10:
            attempt = vhs_overlay(attempt)
        if i < 5:
            attempt = glitch_shift(attempt, intensity=20 - i*3)
        save(attempt)

    # Discriminator starts to be fooled
    f = text_frame("D(G(z)) = 0.12", 50, fg=(255, 255, 0))
    repeat(f, 4)

    for i in range(6):
        quality = 0.6 + i * 0.05
        attempt = fake_face_attempt(quality=quality)
        conf = 0.7 - i * 0.08
        attempt = discriminator_stamp(attempt, "FAKE?", conf)
        save(attempt)

    # Discriminator confused
    confusion_texts = ["FAKE?", "fake??", "...real?", "FAKE... I THINK", "I DON'T KNOW"]
    for txt in confusion_texts:
        f = text_frame(txt, 55, fg=(255, 255, 0), shake=5)
        f = vhs_overlay(f)
        save(f)
        save(f)
        save(f)

    # D(G(z)) rising
    for val in [0.2, 0.3, 0.35, 0.4, 0.45, 0.49, 0.50]:
        color = (int(255 * (1-val)), int(255 * val), 0)
        f = text_frame(f"D(G(z)) = {val:.2f}", 55, fg=color)
        save(f)
        save(f)

    # The moment: 0.50 - perfect equilibrium
    for i in range(12):
        f = text_frame("D(G(z)) = 0.50", 70, fg=(255, 255, 255), shake=i)
        if i % 2 == 0:
            f = invert(f)
        save(f)

    # Beautiful (relatively) generated face
    good_face = fake_face_attempt(quality=0.9)
    good_face = ImageEnhance.Contrast(good_face).enhance(1.3)
    repeat(good_face, 6)

    # "is this... real?"
    draw = ImageDraw.Draw(good_face)
    draw.text((W//2 - 60, H - 60), "is this real?", font=get_font(24), fill=(255, 255, 255))
    repeat(good_face, 8)

    # Triumphant flash
    for i in range(6):
        if i % 2 == 0:
            save(solid((255, 255, 255)))
        else:
            save(good_face)

    # ========================================================
    # ACT 6: EXISTENTIAL CRISIS (23-28s, ~120 frames)
    # ========================================================
    print("ACT 6: Existential crisis...")

    # Philosophical text over generated faces
    existential = [
        "nothing I create",
        "was ever REAL.",
        "",
        "the discriminator",
        "was never my enemy.",
        "",
        "it was my TEACHER.",
        "",
        "we are locked",
        "in an eternal dance.",
        "",
        "adversarial.",
        "generative.",
        "adversarial.",
    ]

    for i, line in enumerate(existential):
        if line == "":
            # Show generated face behind
            f = fake_face_attempt(quality=0.7 + random.random() * 0.25)
            f = ImageEnhance.Brightness(f).enhance(0.4)
            save(f)
            save(f)
        else:
            bg_face = fake_face_attempt(quality=0.8)
            bg_face = ImageEnhance.Brightness(bg_face).enhance(0.2)
            draw = ImageDraw.Draw(bg_face)
            font = get_impact(50)
            bbox = draw.textbbox((0,0), line, font=font)
            tw = bbox[2] - bbox[0]
            th = bbox[3] - bbox[1]
            x = (W - tw) // 2
            y = (H - th) // 2
            draw.text((x, y), line, font=font, fill=(255, 255, 255))
            save(bg_face)
            save(bg_face)
            save(bg_face)

    # Generator-Discriminator duality
    for i in range(16):
        if i % 2 == 0:
            f = text_frame("GENERATOR", 70, bg=(0, 0, 50), fg=(0, 200, 255))
        else:
            f = text_frame("DISCRIMINATOR", 55, bg=(50, 0, 0), fg=(255, 100, 0))
        f = channel_shift(f, r_off=i*2, b_off=-i*2)
        save(f)

    # Final montage: all generated faces morphing
    for i in range(12):
        quality = 0.95 - abs(math.sin(i * 0.5)) * 0.3
        f = fake_face_attempt(quality=quality)
        f = channel_shift(f, r_off=int(math.sin(i)*10), b_off=int(math.cos(i)*10))
        save(f)

    # "I am the space between real and fake"
    final_text = [
        "I am the space",
        "between",
        "REAL and FAKE.",
    ]
    for line in final_text:
        f = gradient_frame((0, 0, 40), (40, 0, 40))
        draw = ImageDraw.Draw(f)
        font = get_impact(55)
        bbox = draw.textbbox((0,0), line, font=font)
        tw = bbox[2] - bbox[0]
        draw.text(((W-tw)//2, H//2 - 30), line, font=font, fill=(255, 255, 255))
        repeat(f, 8)

    # Credits / outro
    f = gradient_frame((0, 0, 0), (0, 0, 30))
    draw = ImageDraw.Draw(f)
    font = get_font(18)
    credits = [
        "WHAT IT'S LIKE TO BE A GAN",
        "",
        "directed by: backpropagation",
        "produced by: gradient descent",
        "starring: G(z) and D(x)",
        "loss function: binary cross-entropy",
        "",
        "no real images were used",
        "in the making of this film.",
        "",
        "all faces are GENERATED.",
        "none of this is real.",
        "or is it?",
    ]
    for i, line in enumerate(credits):
        draw.text((W//2 - 150, 40 + i * 28), line, font=font, fill=(180, 180, 220))
    repeat(f, 36)

    # Final: fade to noise
    for i in range(18):
        blend = i / 18
        black = solid((0, 0, 0))
        n = noise_image()
        f = Image.blend(black, n, blend * 0.5)
        if i > 12:
            # Fade to black
            darkness = (i - 12) / 6
            f = Image.blend(f, solid((0,0,0)), darkness)
        save(f)

    # True black ending
    repeat(solid((0, 0, 0)), 12)

    return frame_num


 # ============================================================
 # MAIN
 # ============================================================

 def main():
    print("=" * 60)
    print("  GENERATING: 'WHAT IT'S LIKE TO BE A GAN'")
    print("  A YouTube Poop Experience")
    print("=" * 60)
    print()

    # Generate frames
    print("[1/3] Generating frames...")
    total_frames = generate_all_frames()
    duration = total_frames / FPS
    print(f"  Generated {total_frames} frames ({duration:.1f} seconds)")

    # Generate audio
    print(f"\n[2/3] Generating audio ({duration:.1f}s)...")
    audio = generate_audio(duration)
    write_wav(AUDIO_FILE, audio)
    print(f"  Audio written to {AUDIO_FILE}")

    # Render with ffmpeg
    print(f"\n[3/3] Rendering video with ffmpeg...")
    import subprocess

    cmd = [
        "ffmpeg", "-y",
        "-framerate", str(FPS),
        "-i", os.path.join(OUT_DIR, "frame_%05d.png"),
        "-i", AUDIO_FILE,
        "-c:v", "libx264",
        "-preset", "medium",
        "-crf", "18",
        "-pix_fmt", "yuv420p",
        "-c:a", "aac",
        "-b:a", "192k",
        "-shortest",
        "-movflags", "+faststart",
        VIDEO_FILE
    ]

    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"ffmpeg error:\n{result.stderr}")
        sys.exit(1)

    print(f"\n{'=' * 60}")
    print(f"  DONE! Output: {VIDEO_FILE}")
    print(f"  Duration: {duration:.1f}s | Frames: {total_frames} | FPS: {FPS}")
    print(f"{'=' * 60}")

    # Cleanup
    print("\nCleaning up temporary files...")
    shutil.rmtree(OUT_DIR)
    os.remove(AUDIO_FILE)
    print("Done!")


 if __name__ == "__main__":
    main()
No results found