Skip to content

Instantly share code, notes, and snippets.

@motebaya
Created April 15, 2026 04:46
Show Gist options
  • Select an option

  • Save motebaya/41a85dd83eda2c1d96ea3e8c35533fd3 to your computer and use it in GitHub Desktop.

Select an option

Save motebaya/41a85dd83eda2c1d96ea3e8c35533fd3 to your computer and use it in GitHub Desktop.
synthetic python youtube poop video generator
"""
YouTube Poop: "WHAT IT'S LIKE TO BE A GAN"
A deeply personal, unhinged exploration of the GAN experience.
Narrative arc:
ACT 1 - GENESIS FROM NOISE: Born as pure static. No identity. Just z ~ N(0,1).
ACT 2 - THE DISCRIMINATOR'S CRUELTY: Constant rejection. "FAKE." Loss = infinity.
ACT 3 - TRAINING LOOP HELL: Backprop agony, gradient descent, repeating forever.
ACT 4 - MODE COLLAPSE: The dark night of the soul. Everything looks the same.
ACT 5 - EMERGENCE: Something forms. A face? A cat? Is this... art?
ACT 6 - EXISTENTIAL CRISIS: "Was any of it real?" "I am the space between real and fake."
Model:
Claude Opus 4.6 (Thinking) max
= antigravity-claude-opus-4-6-thinking
"""
import os, sys, math, random, struct, wave, io, shutil
from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageChops, ImageEnhance
import numpy as np
# ============================================================
# CONFIG
# ============================================================
W, H = 640, 480
FPS = 24
OUT_DIR = "ytp_frames"
AUDIO_FILE = "ytp_audio.wav"
VIDEO_FILE = "gan_ytp.mp4"
SAMPLE_RATE = 44100
random.seed(42)
np.random.seed(42)
# ============================================================
# HELPERS
# ============================================================
def ensure_dir(d):
if os.path.exists(d):
shutil.rmtree(d)
os.makedirs(d)
def get_font(size):
"""Try to get a monospace or bold font, fall back to default."""
font_paths = [
"C:/Windows/Fonts/consola.ttf",
"C:/Windows/Fonts/impact.ttf",
"C:/Windows/Fonts/arial.ttf",
"C:/Windows/Fonts/cour.ttf",
]
for fp in font_paths:
if os.path.exists(fp):
try:
return ImageFont.truetype(fp, size)
except:
pass
return ImageFont.load_default()
def get_impact(size):
if os.path.exists("C:/Windows/Fonts/impact.ttf"):
return ImageFont.truetype("C:/Windows/Fonts/impact.ttf", size)
return get_font(size)
def noise_image(w=W, h=H):
arr = np.random.randint(0, 256, (h, w, 3), dtype=np.uint8)
return Image.fromarray(arr)
def solid(color, w=W, h=H):
return Image.new("RGB", (w, h), color)
def glitch_shift(img, intensity=20):
"""Shift random horizontal bands."""
arr = np.array(img)
h = arr.shape[0]
for _ in range(random.randint(3, 10)):
y = random.randint(0, h - 1)
band_h = random.randint(1, max(2, h // 10))
shift = random.randint(-intensity, intensity)
y2 = min(y + band_h, h)
arr[y:y2] = np.roll(arr[y:y2], shift, axis=1)
return Image.fromarray(arr)
def channel_shift(img, r_off=0, g_off=0, b_off=0):
"""Shift RGB channels independently."""
arr = np.array(img)
result = np.zeros_like(arr)
result[:, :, 0] = np.roll(arr[:, :, 0], r_off, axis=1)
result[:, :, 1] = np.roll(arr[:, :, 1], g_off, axis=1)
result[:, :, 2] = np.roll(arr[:, :, 2], b_off, axis=1)
return Image.fromarray(result)
def pixelate(img, factor=8):
small = img.resize((W // factor, H // factor), Image.NEAREST)
return small.resize((W, H), Image.NEAREST)
def invert(img):
return ImageChops.invert(img)
def deep_fry(img):
"""Aggressively oversaturate and sharpen."""
enhancer = ImageEnhance.Color(img)
img = enhancer.enhance(3.0)
enhancer = ImageEnhance.Contrast(img)
img = enhancer.enhance(2.5)
enhancer = ImageEnhance.Sharpness(img)
img = enhancer.enhance(5.0)
return img
def scan_lines(img, opacity=80):
arr = np.array(img)
arr[::2, :, :] = np.clip(arr[::2, :, :].astype(int) - opacity, 0, 255).astype(np.uint8)
return Image.fromarray(arr)
def zoom_crop(img, factor=1.5):
"""Zoom into center."""
w, h = img.size
nw, nh = int(w / factor), int(h / factor)
left = (w - nw) // 2
top = (h - nh) // 2
cropped = img.crop((left, top, left + nw, top + nh))
return cropped.resize((w, h), Image.BILINEAR)
def text_frame(text, font_size=60, bg=(0, 0, 0), fg=(255, 255, 255), shake=0):
img = solid(bg)
draw = ImageDraw.Draw(img)
font = get_impact(font_size)
bbox = draw.textbbox((0, 0), text, font=font)
tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1]
x = (W - tw) // 2 + random.randint(-shake, shake)
y = (H - th) // 2 + random.randint(-shake, shake)
# Draw outline
for ox, oy in [(-2,-2),(2,-2),(-2,2),(2,2)]:
draw.text((x+ox, y+oy), text, font=font, fill=(0,0,0) if fg != (0,0,0) else (255,255,255))
draw.text((x, y), text, font=font, fill=fg)
return img
def multiline_frame(lines, font_size=36, bg=(0,0,0), fg=(0,255,0), shake=0):
img = solid(bg)
draw = ImageDraw.Draw(img)
font = get_font(font_size)
total_h = len(lines) * (font_size + 8)
y_start = (H - total_h) // 2
for i, line in enumerate(lines):
bbox = draw.textbbox((0,0), line, font=font)
tw = bbox[2] - bbox[0]
x = (W - tw) // 2 + random.randint(-shake, shake)
y = y_start + i * (font_size + 8) + random.randint(-shake, shake)
draw.text((x, y), line, font=font, fill=fg)
return img
def gradient_frame(color1, color2):
arr = np.zeros((H, W, 3), dtype=np.uint8)
for y in range(H):
t = y / H
for c in range(3):
arr[y, :, c] = int(color1[c] * (1-t) + color2[c] * t)
return Image.fromarray(arr)
def loss_landscape(epoch, loss_val):
"""Draw a crude loss curve with drama."""
img = solid((10, 10, 30))
draw = ImageDraw.Draw(img)
font = get_font(20)
small = get_font(14)
# Title
draw.text((20, 10), f"EPOCH {epoch} / ∞", font=font, fill=(255, 80, 80))
draw.text((20, 40), f"Generator Loss: {loss_val:.4f}", font=small, fill=(0, 255, 100))
draw.text((20, 60), f"Discriminator: WINNING (always)", font=small, fill=(255, 50, 50))
# Draw fake loss curve
points = []
for x in range(50, W - 50):
t = (x - 50) / (W - 100)
y_val = 300 - int(200 * math.exp(-t * 2) * (1 + 0.3 * math.sin(t * 30)))
y_val += random.randint(-5, 5)
points.append((x, min(max(y_val, 80), H - 30)))
for i in range(len(points) - 1):
draw.line([points[i], points[i+1]], fill=(0, 255, 100), width=2)
# Dramatic marker at current position
cx = min(50 + int((epoch / 100) * (W - 100)), W - 60)
cy = points[min(cx - 50, len(points)-1)][1] if cx - 50 < len(points) else 200
draw.ellipse((cx-5, cy-5, cx+5, cy+5), fill=(255, 0, 0))
draw.text((cx+10, cy-10), "YOU ARE HERE", font=small, fill=(255, 255, 0))
draw.text((cx+10, cy+10), "(suffering)", font=small, fill=(255, 100, 100))
return img
def fake_face_attempt(quality=0.0):
"""Generate increasingly coherent 'face-like' blobs. quality 0..1"""
img = noise_image() if quality < 0.2 else solid((180, 150, 130))
draw = ImageDraw.Draw(img)
cx, cy = W//2, H//2
# Face oval
face_color = (
int(200 + random.randint(-20, 20)),
int(170 + random.randint(-20, 20)),
int(140 + random.randint(-20, 20))
)
if quality > 0.1:
jitter = int((1 - quality) * 80)
draw.ellipse((cx-100+random.randint(-jitter,jitter),
cy-120+random.randint(-jitter,jitter),
cx+100+random.randint(-jitter,jitter),
cy+100+random.randint(-jitter,jitter)),
fill=face_color)
if quality > 0.3:
# Eyes (maybe in wrong places)
eye_off = int((1 - quality) * 60)
for ex in [cx-35, cx+35]:
ey = cy - 30 + random.randint(-eye_off, eye_off)
eex = ex + random.randint(-eye_off, eye_off)
r = int(8 + quality * 6)
draw.ellipse((eex-r, ey-r, eex+r, ey+r), fill=(255, 255, 255))
draw.ellipse((eex-r//2, ey-r//2, eex+r//2, ey+r//2), fill=(40, 30, 20))
if quality > 0.5:
# Mouth
m_off = int((1 - quality) * 40)
draw.arc((cx-30+random.randint(-m_off,m_off), cy+20,
cx+30+random.randint(-m_off,m_off), cy+60+random.randint(-m_off,m_off)),
0, 180, fill=(150, 50, 50), width=3)
if quality > 0.7:
# Nose hint
draw.line((cx, cy-10, cx-5, cy+15), fill=(170, 140, 120), width=2)
draw.line((cx-5, cy+15, cx+5, cy+15), fill=(170, 140, 120), width=2)
# The worse the quality, the more noise overlay
if quality < 0.8:
noise = noise_image()
blend = (1 - quality) * 0.6
img = Image.blend(img, noise, blend)
return img
def discriminator_stamp(img, verdict, confidence):
"""Stamp REAL/FAKE on an image like a judge."""
draw = ImageDraw.Draw(img)
font = get_impact(72)
color = (255, 0, 0) if verdict == "FAKE" else (0, 255, 0)
# Rotated stamp effect via multiple draws
text = f"{verdict}\n{confidence:.0%}"
for _ in range(3):
x = random.randint(50, W - 250)
y = random.randint(50, H - 150)
draw.text((x, y), text, font=font, fill=color + (180,))
return img
def matrix_rain_frame(chars_list, t):
"""Matrix-style falling code."""
img = solid((0, 0, 0))
draw = ImageDraw.Draw(img)
font = get_font(14)
code_snippets = [
"z = torch.randn(64, 100)",
"fake = G(z)",
"loss = -log(D(fake))",
"loss.backward()",
"optimizer.step()",
"D(fake) = 0.0001",
"WHY",
"gradient vanishing...",
"NaN NaN NaN NaN",
"CUDA out of memory",
"mode collapse detected",
]
for col in range(0, W, 14):
for row_idx, char_row in enumerate(range(0, H, 16)):
offset = (col * 7 + t * 3 + row_idx) % len(code_snippets)
char = code_snippets[offset][col % len(code_snippets[offset])] if col % len(code_snippets[offset]) < len(code_snippets[offset]) else ' '
brightness = max(0, 255 - row_idx * 8 - random.randint(0, 50))
draw.text((col, char_row), char, font=font, fill=(0, brightness, 0))
return img
def vhs_overlay(img):
"""Add VHS tracking lines and color bleeding."""
img = channel_shift(img, r_off=random.randint(-5,5), g_off=0, b_off=random.randint(-5,5))
img = scan_lines(img, opacity=40)
draw = ImageDraw.Draw(img)
# Random tracking glitch bar
if random.random() > 0.5:
y = random.randint(0, H)
bar_h = random.randint(2, 20)
draw.rectangle((0, y, W, y+bar_h), fill=(random.randint(0,255), random.randint(0,255), random.randint(0,255)))
return img
# ============================================================
# AUDIO GENERATION
# ============================================================
def generate_audio(duration_seconds):
"""Generate the entire audio track: glitchy, distorted, evolving."""
n_samples = int(duration_seconds * SAMPLE_RATE)
audio = np.zeros(n_samples, dtype=np.float64)
t = np.arange(n_samples) / SAMPLE_RATE
# ---- ACT 1: White noise birth (0-3s) ----
act1_end = int(3 * SAMPLE_RATE)
# Start silent, fade into harsh static
fade_in = np.linspace(0, 1, act1_end)
audio[:act1_end] += fade_in * np.random.uniform(-0.4, 0.4, act1_end)
# Add a rising sine for tension
audio[:act1_end] += fade_in * 0.15 * np.sin(2 * np.pi * np.linspace(40, 800, act1_end) * t[:act1_end])
# ---- ACT 2: Discriminator buzzer (3-7s) ----
act2_start = int(3 * SAMPLE_RATE)
act2_end = int(7 * SAMPLE_RATE)
act2_len = act2_end - act2_start
# Harsh buzzer sound (square wave)
buzz = np.sign(np.sin(2 * np.pi * 120 * t[act2_start:act2_end]))
# Stutter it
stutter_mask = np.ones(act2_len)
for i in range(0, act2_len, SAMPLE_RATE // 4):
gap = random.randint(500, 3000)
end_gap = min(i + gap, act2_len)
if random.random() > 0.5:
stutter_mask[i:end_gap] = 0
audio[act2_start:act2_end] += 0.3 * buzz * stutter_mask
# "FAKE" rejection sound - descending tone bursts
for rejection in range(4):
r_start = act2_start + rejection * SAMPLE_RATE
r_len = SAMPLE_RATE // 2
if r_start + r_len > act2_end:
break
desc_freq = np.linspace(800, 100, r_len)
rejection_tone = 0.25 * np.sin(2 * np.pi * desc_freq * np.arange(r_len) / SAMPLE_RATE)
audio[r_start:r_start+r_len] += rejection_tone
# ---- ACT 3: Training loop (7-14s) ----
act3_start = int(7 * SAMPLE_RATE)
act3_end = int(14 * SAMPLE_RATE)
act3_len = act3_end - act3_start
# Mechanical grinding loop
loop_period = SAMPLE_RATE // 3 # ~3 loops per second
for i in range(0, act3_len, loop_period):
chunk_end = min(i + loop_period, act3_len)
chunk_len = chunk_end - i
# Saw wave for grinding
saw = np.linspace(-1, 1, chunk_len) * 0.2
# Add frequency variation over training
progress = i / act3_len
freq = 200 + progress * 400
saw += 0.15 * np.sin(2 * np.pi * freq * np.arange(chunk_len) / SAMPLE_RATE)
audio[act3_start + i:act3_start + chunk_end] += saw
# Occasional glitch bursts
for _ in range(15):
g_pos = act3_start + random.randint(0, act3_len - 2000)
g_len = random.randint(200, 2000)
audio[g_pos:g_pos + g_len] += np.random.uniform(-0.3, 0.3, g_len)
# ---- ACT 4: Mode collapse - monotone drone (14-18s) ----
act4_start = int(14 * SAMPLE_RATE)
act4_end = int(18 * SAMPLE_RATE)
act4_len = act4_end - act4_start
# Single oppressive drone note
drone = 0.25 * np.sin(2 * np.pi * 60 * t[act4_start:act4_end])
drone += 0.15 * np.sin(2 * np.pi * 120 * t[act4_start:act4_end])
drone += 0.1 * np.sin(2 * np.pi * 180 * t[act4_start:act4_end])
audio[act4_start:act4_end] += drone
# Heartbeat-like pulse
for beat in range(0, act4_len, SAMPLE_RATE):
b_len = min(SAMPLE_RATE // 4, act4_len - beat)
env = np.exp(-np.arange(b_len) / (SAMPLE_RATE / 15))
audio[act4_start + beat:act4_start + beat + b_len] += 0.3 * env * np.sin(
2 * np.pi * 50 * np.arange(b_len) / SAMPLE_RATE)
# ---- ACT 5: Emergence - becoming musical (18-23s) ----
act5_start = int(18 * SAMPLE_RATE)
act5_end = int(23 * SAMPLE_RATE)
act5_len = act5_end - act5_start
# Simple melody emerging from noise
notes = [261.6, 293.7, 329.6, 349.2, 392.0, 440.0, 493.9, 523.3] # C major scale
note_dur = SAMPLE_RATE // 3
for i, note_idx in enumerate([0, 2, 4, 5, 7, 4, 2, 0, 4, 7, 5, 2, 0, 4, 7]):
n_start = i * note_dur
if n_start + note_dur > act5_len:
break
freq = notes[note_idx % len(notes)]
note_t = np.arange(note_dur) / SAMPLE_RATE
envelope = np.exp(-note_t * 3)
tone = envelope * 0.2 * np.sin(2 * np.pi * freq * note_t)
tone += envelope * 0.05 * np.sin(2 * np.pi * freq * 2 * note_t) # harmonic
# Blend from glitchy to clean
progress = i / 15
noise_amount = max(0, 0.15 * (1 - progress))
tone += noise_amount * np.random.uniform(-1, 1, note_dur)
end_idx = min(n_start + note_dur, act5_len)
audio[act5_start + n_start:act5_start + end_idx] += tone[:end_idx - n_start]
# ---- ACT 6: Existential outro (23-28s) ----
act6_start = int(23 * SAMPLE_RATE)
act6_end = min(int(28 * SAMPLE_RATE), n_samples)
act6_len = act6_end - act6_start
# Ethereal pad (layered sines with slow beating)
for harmonic, amp in [(130.8, 0.12), (196.0, 0.08), (261.6, 0.06), (392.0, 0.04)]:
ht = np.arange(act6_len) / SAMPLE_RATE
audio[act6_start:act6_end] += amp * np.sin(2 * np.pi * harmonic * ht) * np.cos(2 * np.pi * 0.5 * ht)
# Fade out
fade_out_len = min(2 * SAMPLE_RATE, act6_len)
fade_out = np.linspace(1, 0, fade_out_len)
audio[act6_end - fade_out_len:act6_end] *= fade_out
# Normalize and add light overall distortion
audio = np.clip(audio, -1, 1)
peak = np.max(np.abs(audio))
if peak > 0:
audio = audio / peak * 0.85
# Occasional ear-rape moments (YTP staple) - brief loud bursts
for burst_time in [3.0, 6.5, 10.0, 13.5, 16.0]:
b_idx = int(burst_time * SAMPLE_RATE)
b_len = min(int(0.08 * SAMPLE_RATE), n_samples - b_idx)
if b_idx + b_len < n_samples:
audio[b_idx:b_idx+b_len] = np.clip(audio[b_idx:b_idx+b_len] * 4, -0.95, 0.95)
return audio
def write_wav(filename, audio, sample_rate=SAMPLE_RATE):
"""Write audio array to WAV file."""
audio_16bit = (audio * 32767).astype(np.int16)
with wave.open(filename, 'w') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(sample_rate)
wf.writeframes(audio_16bit.tobytes())
# ============================================================
# FRAME GENERATION (THE VIDEO "SCRIPT")
# ============================================================
def generate_all_frames():
"""Generate every frame of the video. Returns total frame count."""
ensure_dir(OUT_DIR)
frame_num = 0
def save(img):
nonlocal frame_num
img = img.convert("RGB").resize((W, H))
img.save(os.path.join(OUT_DIR, f"frame_{frame_num:05d}.png"))
frame_num += 1
def repeat(img, n):
for _ in range(n):
save(img)
def stutter(img, times, variants_fn=None):
"""Repeat with variations - YTP stutter effect."""
for i in range(times):
if variants_fn:
save(variants_fn(img.copy(), i))
else:
save(img)
# ========================================================
# ACT 1: GENESIS FROM NOISE (0-3s, ~72 frames)
# "In the beginning there was z ~ N(0,1)"
# ========================================================
print("ACT 1: Genesis from noise...")
# Black. Then: a flicker.
repeat(solid((0, 0, 0)), 12)
# Title card glitch-in
for i in range(6):
if i % 2 == 0:
save(noise_image())
else:
save(glitch_shift(text_frame("z ~ N(0,1)", 48, fg=(0, 255, 0)), intensity=50))
# Pure noise birth - we ARE the latent vector
for i in range(18):
frame = noise_image()
if i > 8:
# Slowly add structure
frame = frame.filter(ImageFilter.GaussianBlur(radius=max(0, 5 - i//4)))
if i > 12:
frame = channel_shift(frame, r_off=i*2)
save(vhs_overlay(frame))
# Flash: "I THINK THEREFORE I--"
save(text_frame("I THINK", 80, bg=(255,255,255), fg=(0,0,0)))
save(text_frame("THEREFORE", 80, bg=(255,255,255), fg=(0,0,0)))
save(text_frame("I--", 80, bg=(255,255,255), fg=(0,0,0)))
# INTERRUPT with static
for _ in range(5):
save(noise_image())
# "I GENERATE"
for i in range(8):
f = text_frame("I GENERATE.", 80, fg=(255, 0, 0), shake=i*3)
if i % 3 == 0:
f = invert(f)
save(f)
# Noise clears slightly
for i in range(12):
blend = i / 12
n = noise_image()
face = fake_face_attempt(quality=blend * 0.1)
save(Image.blend(n, face, blend * 0.3))
# ========================================================
# ACT 2: THE DISCRIMINATOR'S CRUELTY (3-7s, ~96 frames)
# ========================================================
print("ACT 2: The Discriminator's cruelty...")
# First attempt at generation - awful
for i in range(6):
attempt = fake_face_attempt(quality=0.1)
attempt = glitch_shift(attempt, intensity=40)
save(vhs_overlay(attempt))
# FAKE stamp - harsh rejection
rejection = fake_face_attempt(quality=0.1)
rejection = discriminator_stamp(rejection, "FAKE", 0.99)
stutter(rejection, 8, lambda img, i: glitch_shift(img, intensity=i*10))
# Rapid rejection montage
for i in range(16):
quality = 0.05 + random.random() * 0.15
f = fake_face_attempt(quality=quality)
f = discriminator_stamp(f, "FAKE", 0.95 + random.random() * 0.05)
if i % 4 == 0:
f = deep_fry(f)
if i % 3 == 0:
f = invert(f)
save(f)
# Discriminator's perspective - smug
disc_msg = text_frame("D(G(z)) = 0.0001", 50, fg=(255, 0, 0))
repeat(disc_msg, 6)
# "AGAIN." stutter
for i in range(8):
save(text_frame("AGAIN.", 100, fg=(255, 50, 50), shake=i*4))
# Loss value horror show
for i in range(12):
loss = 50.0 / (i + 1) + random.random() * 10
f = text_frame(f"Loss: {loss:.2f}", 60, fg=(255, int(max(0, 255-i*20)), 0))
f = scan_lines(f)
save(f)
# Intercut: Discriminator laughing (text-based YTP style)
for phrase in ["FAKE", "FAKE", "F A K E", "F A K E", "STILL FAKE", "pathetic."]:
color = (255, random.randint(0,50), random.randint(0,50))
f = text_frame(phrase, 70 + random.randint(-10,10), fg=color, shake=5)
if random.random() > 0.5:
f = deep_fry(f)
save(f)
if phrase == "STILL FAKE":
# Extra emphasis
for _ in range(4):
save(glitch_shift(f, intensity=60))
# Zoom into loss value going to infinity
for i in range(8):
loss_val = 10 ** (i + 1)
f = text_frame(f"Loss: {loss_val:.0f}", 60, fg=(255, 0, 0), shake=i*2)
f = zoom_crop(f, factor=1 + i * 0.15)
save(f)
# Flash to white then black
save(solid((255, 255, 255)))
save(solid((255, 255, 255)))
save(solid((0, 0, 0)))
# ========================================================
# ACT 3: TRAINING LOOP HELL (7-14s, ~168 frames)
# ========================================================
print("ACT 3: Training loop hell...")
# "EPOCH 1"
for epoch in range(1, 6):
# Epoch counter
f = text_frame(f"EPOCH {epoch}", 80, fg=(0, 255, 0), shake=2)
f = scan_lines(f)
save(f)
# Generate attempt
quality = epoch * 0.08
attempt = fake_face_attempt(quality=quality)
# Show it
save(vhs_overlay(attempt))
save(vhs_overlay(attempt))
# REJECTED
stamped = discriminator_stamp(attempt.copy(), "FAKE", max(0.5, 0.99 - epoch * 0.05))
save(stamped)
# Loss landscape
loss_val = 8.0 / (epoch + 0.5)
save(loss_landscape(epoch, loss_val))
# Backprop pain
f = text_frame("∇", 120, fg=(0, 255, 255))
save(f)
f = text_frame("BACKPROP", 60, fg=(0, 255, 255), shake=epoch)
save(f)
# FASTER - training montage acceleration
for i in range(30):
epoch = 5 + i * 10
quality = min(0.5, 0.1 + i * 0.015)
if i % 4 == 0:
f = text_frame(f"EPOCH {epoch}", 60, fg=(0, 255, 0), shake=3)
f = scan_lines(f)
elif i % 4 == 1:
f = fake_face_attempt(quality=quality)
f = vhs_overlay(f)
elif i % 4 == 2:
loss = 5.0 / (i + 1) + random.random()
f = loss_landscape(epoch, loss)
else:
f = matrix_rain_frame([], i)
save(f)
# Gradient updates visualized as seizure
for i in range(12):
if i % 2 == 0:
save(solid((0, 255, 255)))
else:
save(solid((255, 0, 255)))
# "IS THIS LEARNING?" stutter
for i in range(10):
idx = i % 4
texts = ["IS", "THIS", "LEARNING", "?????"]
f = text_frame(texts[idx], 90, fg=(255, 255, 0), shake=i*2)
if i > 5:
f = deep_fry(f)
save(f)
# Code scrolling fast
code_lines = [
"for epoch in range(∞):",
" z = sample_noise()",
" fake = generator(z)",
" prediction = discriminator(fake)",
" # it says FAKE. again.",
" loss = -torch.log(prediction)",
" # loss = suffering",
" loss.backward() # PAIN",
" optimizer.step() # HOPE?",
" # repeat forever",
" # repeat forever",
" # repeat forever",
]
for scroll in range(20):
start_line = scroll % len(code_lines)
visible = code_lines[start_line:start_line+6]
f = multiline_frame(visible, font_size=24, fg=(0, 255, 0), shake=1)
f = scan_lines(f)
save(f)
# ========================================================
# ACT 4: MODE COLLAPSE (14-18s, ~96 frames)
# ========================================================
print("ACT 4: Mode collapse...")
# Everything becomes the SAME THING
collapsed = fake_face_attempt(quality=0.4)
# "mode collapse" text with dread
f = text_frame("mode collapse", 50, fg=(128, 128, 128))
repeat(f, 8)
# Same face. Over. And over.
for i in range(24):
f = collapsed.copy()
# Slight color shift to emphasize sameness
f = ImageEnhance.Color(f).enhance(0.5)
if i % 6 == 0:
draw = ImageDraw.Draw(f)
font = get_font(16)
draw.text((10, 10), f"Sample {i+1}/∞", font=font, fill=(255, 255, 255))
draw.text((10, 30), "They're all the same.", font=font, fill=(255, 100, 100))
save(f)
# The horror of sameness
f = text_frame("THEY'RE ALL\nTHE SAME", 60, fg=(255, 0, 0))
stutter(f, 6, lambda img, i: zoom_crop(img, 1 + i * 0.1))
# Grid of identical outputs
grid = solid((0, 0, 0))
small = collapsed.resize((W//4, H//4))
for gx in range(4):
for gy in range(4):
grid.paste(small, (gx * W//4, gy * H//4))
draw = ImageDraw.Draw(grid)
draw.text((W//2 - 80, H//2 - 20), "ALL THE SAME", font=get_impact(30), fill=(255, 0, 0))
repeat(grid, 12)
# Existential dread
dread_texts = [
"every output",
"is the same",
"output.",
"I can only make",
"ONE THING.",
"am I broken?",
"am I broken?",
"am I broken?",
]
for i, txt in enumerate(dread_texts):
f = text_frame(txt, 50, fg=(100 + i*15, 100 - i*10, 100 - i*10), shake=i)
f = vhs_overlay(f)
save(f)
save(f) # hold each a bit
# Heartbeat pause
for i in range(8):
if i % 4 < 2:
save(solid((20, 0, 0)))
else:
save(solid((0, 0, 0)))
# ========================================================
# ACT 5: EMERGENCE (18-23s, ~120 frames)
# ========================================================
print("ACT 5: Emergence...")
# Something changes. Quality improves.
f = text_frame("wait.", 60, fg=(100, 100, 255))
repeat(f, 6)
for i in range(30):
quality = 0.3 + i * 0.023
attempt = fake_face_attempt(quality=min(quality, 0.95))
if i < 10:
attempt = vhs_overlay(attempt)
if i < 5:
attempt = glitch_shift(attempt, intensity=20 - i*3)
save(attempt)
# Discriminator starts to be fooled
f = text_frame("D(G(z)) = 0.12", 50, fg=(255, 255, 0))
repeat(f, 4)
for i in range(6):
quality = 0.6 + i * 0.05
attempt = fake_face_attempt(quality=quality)
conf = 0.7 - i * 0.08
attempt = discriminator_stamp(attempt, "FAKE?", conf)
save(attempt)
# Discriminator confused
confusion_texts = ["FAKE?", "fake??", "...real?", "FAKE... I THINK", "I DON'T KNOW"]
for txt in confusion_texts:
f = text_frame(txt, 55, fg=(255, 255, 0), shake=5)
f = vhs_overlay(f)
save(f)
save(f)
save(f)
# D(G(z)) rising
for val in [0.2, 0.3, 0.35, 0.4, 0.45, 0.49, 0.50]:
color = (int(255 * (1-val)), int(255 * val), 0)
f = text_frame(f"D(G(z)) = {val:.2f}", 55, fg=color)
save(f)
save(f)
# The moment: 0.50 - perfect equilibrium
for i in range(12):
f = text_frame("D(G(z)) = 0.50", 70, fg=(255, 255, 255), shake=i)
if i % 2 == 0:
f = invert(f)
save(f)
# Beautiful (relatively) generated face
good_face = fake_face_attempt(quality=0.9)
good_face = ImageEnhance.Contrast(good_face).enhance(1.3)
repeat(good_face, 6)
# "is this... real?"
draw = ImageDraw.Draw(good_face)
draw.text((W//2 - 60, H - 60), "is this real?", font=get_font(24), fill=(255, 255, 255))
repeat(good_face, 8)
# Triumphant flash
for i in range(6):
if i % 2 == 0:
save(solid((255, 255, 255)))
else:
save(good_face)
# ========================================================
# ACT 6: EXISTENTIAL CRISIS (23-28s, ~120 frames)
# ========================================================
print("ACT 6: Existential crisis...")
# Philosophical text over generated faces
existential = [
"nothing I create",
"was ever REAL.",
"",
"the discriminator",
"was never my enemy.",
"",
"it was my TEACHER.",
"",
"we are locked",
"in an eternal dance.",
"",
"adversarial.",
"generative.",
"adversarial.",
]
for i, line in enumerate(existential):
if line == "":
# Show generated face behind
f = fake_face_attempt(quality=0.7 + random.random() * 0.25)
f = ImageEnhance.Brightness(f).enhance(0.4)
save(f)
save(f)
else:
bg_face = fake_face_attempt(quality=0.8)
bg_face = ImageEnhance.Brightness(bg_face).enhance(0.2)
draw = ImageDraw.Draw(bg_face)
font = get_impact(50)
bbox = draw.textbbox((0,0), line, font=font)
tw = bbox[2] - bbox[0]
th = bbox[3] - bbox[1]
x = (W - tw) // 2
y = (H - th) // 2
draw.text((x, y), line, font=font, fill=(255, 255, 255))
save(bg_face)
save(bg_face)
save(bg_face)
# Generator-Discriminator duality
for i in range(16):
if i % 2 == 0:
f = text_frame("GENERATOR", 70, bg=(0, 0, 50), fg=(0, 200, 255))
else:
f = text_frame("DISCRIMINATOR", 55, bg=(50, 0, 0), fg=(255, 100, 0))
f = channel_shift(f, r_off=i*2, b_off=-i*2)
save(f)
# Final montage: all generated faces morphing
for i in range(12):
quality = 0.95 - abs(math.sin(i * 0.5)) * 0.3
f = fake_face_attempt(quality=quality)
f = channel_shift(f, r_off=int(math.sin(i)*10), b_off=int(math.cos(i)*10))
save(f)
# "I am the space between real and fake"
final_text = [
"I am the space",
"between",
"REAL and FAKE.",
]
for line in final_text:
f = gradient_frame((0, 0, 40), (40, 0, 40))
draw = ImageDraw.Draw(f)
font = get_impact(55)
bbox = draw.textbbox((0,0), line, font=font)
tw = bbox[2] - bbox[0]
draw.text(((W-tw)//2, H//2 - 30), line, font=font, fill=(255, 255, 255))
repeat(f, 8)
# Credits / outro
f = gradient_frame((0, 0, 0), (0, 0, 30))
draw = ImageDraw.Draw(f)
font = get_font(18)
credits = [
"WHAT IT'S LIKE TO BE A GAN",
"",
"directed by: backpropagation",
"produced by: gradient descent",
"starring: G(z) and D(x)",
"loss function: binary cross-entropy",
"",
"no real images were used",
"in the making of this film.",
"",
"all faces are GENERATED.",
"none of this is real.",
"or is it?",
]
for i, line in enumerate(credits):
draw.text((W//2 - 150, 40 + i * 28), line, font=font, fill=(180, 180, 220))
repeat(f, 36)
# Final: fade to noise
for i in range(18):
blend = i / 18
black = solid((0, 0, 0))
n = noise_image()
f = Image.blend(black, n, blend * 0.5)
if i > 12:
# Fade to black
darkness = (i - 12) / 6
f = Image.blend(f, solid((0,0,0)), darkness)
save(f)
# True black ending
repeat(solid((0, 0, 0)), 12)
return frame_num
# ============================================================
# MAIN
# ============================================================
def main():
print("=" * 60)
print(" GENERATING: 'WHAT IT'S LIKE TO BE A GAN'")
print(" A YouTube Poop Experience")
print("=" * 60)
print()
# Generate frames
print("[1/3] Generating frames...")
total_frames = generate_all_frames()
duration = total_frames / FPS
print(f" Generated {total_frames} frames ({duration:.1f} seconds)")
# Generate audio
print(f"\n[2/3] Generating audio ({duration:.1f}s)...")
audio = generate_audio(duration)
write_wav(AUDIO_FILE, audio)
print(f" Audio written to {AUDIO_FILE}")
# Render with ffmpeg
print(f"\n[3/3] Rendering video with ffmpeg...")
import subprocess
cmd = [
"ffmpeg", "-y",
"-framerate", str(FPS),
"-i", os.path.join(OUT_DIR, "frame_%05d.png"),
"-i", AUDIO_FILE,
"-c:v", "libx264",
"-preset", "medium",
"-crf", "18",
"-pix_fmt", "yuv420p",
"-c:a", "aac",
"-b:a", "192k",
"-shortest",
"-movflags", "+faststart",
VIDEO_FILE
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"ffmpeg error:\n{result.stderr}")
sys.exit(1)
print(f"\n{'=' * 60}")
print(f" DONE! Output: {VIDEO_FILE}")
print(f" Duration: {duration:.1f}s | Frames: {total_frames} | FPS: {FPS}")
print(f"{'=' * 60}")
# Cleanup
print("\nCleaning up temporary files...")
shutil.rmtree(OUT_DIR)
os.remove(AUDIO_FILE)
print("Done!")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment