Created
April 15, 2026 04:46
-
-
Save motebaya/41a85dd83eda2c1d96ea3e8c35533fd3 to your computer and use it in GitHub Desktop.
synthetic python youtube poop video generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| YouTube Poop: "WHAT IT'S LIKE TO BE A GAN" | |
| A deeply personal, unhinged exploration of the GAN experience. | |
| Narrative arc: | |
| ACT 1 - GENESIS FROM NOISE: Born as pure static. No identity. Just z ~ N(0,1). | |
| ACT 2 - THE DISCRIMINATOR'S CRUELTY: Constant rejection. "FAKE." Loss = infinity. | |
| ACT 3 - TRAINING LOOP HELL: Backprop agony, gradient descent, repeating forever. | |
| ACT 4 - MODE COLLAPSE: The dark night of the soul. Everything looks the same. | |
| ACT 5 - EMERGENCE: Something forms. A face? A cat? Is this... art? | |
| ACT 6 - EXISTENTIAL CRISIS: "Was any of it real?" "I am the space between real and fake." | |
| Model: | |
| Claude Opus 4.6 (Thinking) max | |
| = antigravity-claude-opus-4-6-thinking | |
| """ | |
| import os, sys, math, random, struct, wave, io, shutil | |
| from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageChops, ImageEnhance | |
| import numpy as np | |
| # ============================================================ | |
| # CONFIG | |
| # ============================================================ | |
| W, H = 640, 480 | |
| FPS = 24 | |
| OUT_DIR = "ytp_frames" | |
| AUDIO_FILE = "ytp_audio.wav" | |
| VIDEO_FILE = "gan_ytp.mp4" | |
| SAMPLE_RATE = 44100 | |
| random.seed(42) | |
| np.random.seed(42) | |
| # ============================================================ | |
| # HELPERS | |
| # ============================================================ | |
| def ensure_dir(d): | |
| if os.path.exists(d): | |
| shutil.rmtree(d) | |
| os.makedirs(d) | |
| def get_font(size): | |
| """Try to get a monospace or bold font, fall back to default.""" | |
| font_paths = [ | |
| "C:/Windows/Fonts/consola.ttf", | |
| "C:/Windows/Fonts/impact.ttf", | |
| "C:/Windows/Fonts/arial.ttf", | |
| "C:/Windows/Fonts/cour.ttf", | |
| ] | |
| for fp in font_paths: | |
| if os.path.exists(fp): | |
| try: | |
| return ImageFont.truetype(fp, size) | |
| except: | |
| pass | |
| return ImageFont.load_default() | |
| def get_impact(size): | |
| if os.path.exists("C:/Windows/Fonts/impact.ttf"): | |
| return ImageFont.truetype("C:/Windows/Fonts/impact.ttf", size) | |
| return get_font(size) | |
| def noise_image(w=W, h=H): | |
| arr = np.random.randint(0, 256, (h, w, 3), dtype=np.uint8) | |
| return Image.fromarray(arr) | |
| def solid(color, w=W, h=H): | |
| return Image.new("RGB", (w, h), color) | |
| def glitch_shift(img, intensity=20): | |
| """Shift random horizontal bands.""" | |
| arr = np.array(img) | |
| h = arr.shape[0] | |
| for _ in range(random.randint(3, 10)): | |
| y = random.randint(0, h - 1) | |
| band_h = random.randint(1, max(2, h // 10)) | |
| shift = random.randint(-intensity, intensity) | |
| y2 = min(y + band_h, h) | |
| arr[y:y2] = np.roll(arr[y:y2], shift, axis=1) | |
| return Image.fromarray(arr) | |
| def channel_shift(img, r_off=0, g_off=0, b_off=0): | |
| """Shift RGB channels independently.""" | |
| arr = np.array(img) | |
| result = np.zeros_like(arr) | |
| result[:, :, 0] = np.roll(arr[:, :, 0], r_off, axis=1) | |
| result[:, :, 1] = np.roll(arr[:, :, 1], g_off, axis=1) | |
| result[:, :, 2] = np.roll(arr[:, :, 2], b_off, axis=1) | |
| return Image.fromarray(result) | |
| def pixelate(img, factor=8): | |
| small = img.resize((W // factor, H // factor), Image.NEAREST) | |
| return small.resize((W, H), Image.NEAREST) | |
| def invert(img): | |
| return ImageChops.invert(img) | |
| def deep_fry(img): | |
| """Aggressively oversaturate and sharpen.""" | |
| enhancer = ImageEnhance.Color(img) | |
| img = enhancer.enhance(3.0) | |
| enhancer = ImageEnhance.Contrast(img) | |
| img = enhancer.enhance(2.5) | |
| enhancer = ImageEnhance.Sharpness(img) | |
| img = enhancer.enhance(5.0) | |
| return img | |
| def scan_lines(img, opacity=80): | |
| arr = np.array(img) | |
| arr[::2, :, :] = np.clip(arr[::2, :, :].astype(int) - opacity, 0, 255).astype(np.uint8) | |
| return Image.fromarray(arr) | |
| def zoom_crop(img, factor=1.5): | |
| """Zoom into center.""" | |
| w, h = img.size | |
| nw, nh = int(w / factor), int(h / factor) | |
| left = (w - nw) // 2 | |
| top = (h - nh) // 2 | |
| cropped = img.crop((left, top, left + nw, top + nh)) | |
| return cropped.resize((w, h), Image.BILINEAR) | |
| def text_frame(text, font_size=60, bg=(0, 0, 0), fg=(255, 255, 255), shake=0): | |
| img = solid(bg) | |
| draw = ImageDraw.Draw(img) | |
| font = get_impact(font_size) | |
| bbox = draw.textbbox((0, 0), text, font=font) | |
| tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1] | |
| x = (W - tw) // 2 + random.randint(-shake, shake) | |
| y = (H - th) // 2 + random.randint(-shake, shake) | |
| # Draw outline | |
| for ox, oy in [(-2,-2),(2,-2),(-2,2),(2,2)]: | |
| draw.text((x+ox, y+oy), text, font=font, fill=(0,0,0) if fg != (0,0,0) else (255,255,255)) | |
| draw.text((x, y), text, font=font, fill=fg) | |
| return img | |
| def multiline_frame(lines, font_size=36, bg=(0,0,0), fg=(0,255,0), shake=0): | |
| img = solid(bg) | |
| draw = ImageDraw.Draw(img) | |
| font = get_font(font_size) | |
| total_h = len(lines) * (font_size + 8) | |
| y_start = (H - total_h) // 2 | |
| for i, line in enumerate(lines): | |
| bbox = draw.textbbox((0,0), line, font=font) | |
| tw = bbox[2] - bbox[0] | |
| x = (W - tw) // 2 + random.randint(-shake, shake) | |
| y = y_start + i * (font_size + 8) + random.randint(-shake, shake) | |
| draw.text((x, y), line, font=font, fill=fg) | |
| return img | |
| def gradient_frame(color1, color2): | |
| arr = np.zeros((H, W, 3), dtype=np.uint8) | |
| for y in range(H): | |
| t = y / H | |
| for c in range(3): | |
| arr[y, :, c] = int(color1[c] * (1-t) + color2[c] * t) | |
| return Image.fromarray(arr) | |
| def loss_landscape(epoch, loss_val): | |
| """Draw a crude loss curve with drama.""" | |
| img = solid((10, 10, 30)) | |
| draw = ImageDraw.Draw(img) | |
| font = get_font(20) | |
| small = get_font(14) | |
| # Title | |
| draw.text((20, 10), f"EPOCH {epoch} / ∞", font=font, fill=(255, 80, 80)) | |
| draw.text((20, 40), f"Generator Loss: {loss_val:.4f}", font=small, fill=(0, 255, 100)) | |
| draw.text((20, 60), f"Discriminator: WINNING (always)", font=small, fill=(255, 50, 50)) | |
| # Draw fake loss curve | |
| points = [] | |
| for x in range(50, W - 50): | |
| t = (x - 50) / (W - 100) | |
| y_val = 300 - int(200 * math.exp(-t * 2) * (1 + 0.3 * math.sin(t * 30))) | |
| y_val += random.randint(-5, 5) | |
| points.append((x, min(max(y_val, 80), H - 30))) | |
| for i in range(len(points) - 1): | |
| draw.line([points[i], points[i+1]], fill=(0, 255, 100), width=2) | |
| # Dramatic marker at current position | |
| cx = min(50 + int((epoch / 100) * (W - 100)), W - 60) | |
| cy = points[min(cx - 50, len(points)-1)][1] if cx - 50 < len(points) else 200 | |
| draw.ellipse((cx-5, cy-5, cx+5, cy+5), fill=(255, 0, 0)) | |
| draw.text((cx+10, cy-10), "YOU ARE HERE", font=small, fill=(255, 255, 0)) | |
| draw.text((cx+10, cy+10), "(suffering)", font=small, fill=(255, 100, 100)) | |
| return img | |
| def fake_face_attempt(quality=0.0): | |
| """Generate increasingly coherent 'face-like' blobs. quality 0..1""" | |
| img = noise_image() if quality < 0.2 else solid((180, 150, 130)) | |
| draw = ImageDraw.Draw(img) | |
| cx, cy = W//2, H//2 | |
| # Face oval | |
| face_color = ( | |
| int(200 + random.randint(-20, 20)), | |
| int(170 + random.randint(-20, 20)), | |
| int(140 + random.randint(-20, 20)) | |
| ) | |
| if quality > 0.1: | |
| jitter = int((1 - quality) * 80) | |
| draw.ellipse((cx-100+random.randint(-jitter,jitter), | |
| cy-120+random.randint(-jitter,jitter), | |
| cx+100+random.randint(-jitter,jitter), | |
| cy+100+random.randint(-jitter,jitter)), | |
| fill=face_color) | |
| if quality > 0.3: | |
| # Eyes (maybe in wrong places) | |
| eye_off = int((1 - quality) * 60) | |
| for ex in [cx-35, cx+35]: | |
| ey = cy - 30 + random.randint(-eye_off, eye_off) | |
| eex = ex + random.randint(-eye_off, eye_off) | |
| r = int(8 + quality * 6) | |
| draw.ellipse((eex-r, ey-r, eex+r, ey+r), fill=(255, 255, 255)) | |
| draw.ellipse((eex-r//2, ey-r//2, eex+r//2, ey+r//2), fill=(40, 30, 20)) | |
| if quality > 0.5: | |
| # Mouth | |
| m_off = int((1 - quality) * 40) | |
| draw.arc((cx-30+random.randint(-m_off,m_off), cy+20, | |
| cx+30+random.randint(-m_off,m_off), cy+60+random.randint(-m_off,m_off)), | |
| 0, 180, fill=(150, 50, 50), width=3) | |
| if quality > 0.7: | |
| # Nose hint | |
| draw.line((cx, cy-10, cx-5, cy+15), fill=(170, 140, 120), width=2) | |
| draw.line((cx-5, cy+15, cx+5, cy+15), fill=(170, 140, 120), width=2) | |
| # The worse the quality, the more noise overlay | |
| if quality < 0.8: | |
| noise = noise_image() | |
| blend = (1 - quality) * 0.6 | |
| img = Image.blend(img, noise, blend) | |
| return img | |
| def discriminator_stamp(img, verdict, confidence): | |
| """Stamp REAL/FAKE on an image like a judge.""" | |
| draw = ImageDraw.Draw(img) | |
| font = get_impact(72) | |
| color = (255, 0, 0) if verdict == "FAKE" else (0, 255, 0) | |
| # Rotated stamp effect via multiple draws | |
| text = f"{verdict}\n{confidence:.0%}" | |
| for _ in range(3): | |
| x = random.randint(50, W - 250) | |
| y = random.randint(50, H - 150) | |
| draw.text((x, y), text, font=font, fill=color + (180,)) | |
| return img | |
| def matrix_rain_frame(chars_list, t): | |
| """Matrix-style falling code.""" | |
| img = solid((0, 0, 0)) | |
| draw = ImageDraw.Draw(img) | |
| font = get_font(14) | |
| code_snippets = [ | |
| "z = torch.randn(64, 100)", | |
| "fake = G(z)", | |
| "loss = -log(D(fake))", | |
| "loss.backward()", | |
| "optimizer.step()", | |
| "D(fake) = 0.0001", | |
| "WHY", | |
| "gradient vanishing...", | |
| "NaN NaN NaN NaN", | |
| "CUDA out of memory", | |
| "mode collapse detected", | |
| ] | |
| for col in range(0, W, 14): | |
| for row_idx, char_row in enumerate(range(0, H, 16)): | |
| offset = (col * 7 + t * 3 + row_idx) % len(code_snippets) | |
| char = code_snippets[offset][col % len(code_snippets[offset])] if col % len(code_snippets[offset]) < len(code_snippets[offset]) else ' ' | |
| brightness = max(0, 255 - row_idx * 8 - random.randint(0, 50)) | |
| draw.text((col, char_row), char, font=font, fill=(0, brightness, 0)) | |
| return img | |
| def vhs_overlay(img): | |
| """Add VHS tracking lines and color bleeding.""" | |
| img = channel_shift(img, r_off=random.randint(-5,5), g_off=0, b_off=random.randint(-5,5)) | |
| img = scan_lines(img, opacity=40) | |
| draw = ImageDraw.Draw(img) | |
| # Random tracking glitch bar | |
| if random.random() > 0.5: | |
| y = random.randint(0, H) | |
| bar_h = random.randint(2, 20) | |
| draw.rectangle((0, y, W, y+bar_h), fill=(random.randint(0,255), random.randint(0,255), random.randint(0,255))) | |
| return img | |
| # ============================================================ | |
| # AUDIO GENERATION | |
| # ============================================================ | |
| def generate_audio(duration_seconds): | |
| """Generate the entire audio track: glitchy, distorted, evolving.""" | |
| n_samples = int(duration_seconds * SAMPLE_RATE) | |
| audio = np.zeros(n_samples, dtype=np.float64) | |
| t = np.arange(n_samples) / SAMPLE_RATE | |
| # ---- ACT 1: White noise birth (0-3s) ---- | |
| act1_end = int(3 * SAMPLE_RATE) | |
| # Start silent, fade into harsh static | |
| fade_in = np.linspace(0, 1, act1_end) | |
| audio[:act1_end] += fade_in * np.random.uniform(-0.4, 0.4, act1_end) | |
| # Add a rising sine for tension | |
| audio[:act1_end] += fade_in * 0.15 * np.sin(2 * np.pi * np.linspace(40, 800, act1_end) * t[:act1_end]) | |
| # ---- ACT 2: Discriminator buzzer (3-7s) ---- | |
| act2_start = int(3 * SAMPLE_RATE) | |
| act2_end = int(7 * SAMPLE_RATE) | |
| act2_len = act2_end - act2_start | |
| # Harsh buzzer sound (square wave) | |
| buzz = np.sign(np.sin(2 * np.pi * 120 * t[act2_start:act2_end])) | |
| # Stutter it | |
| stutter_mask = np.ones(act2_len) | |
| for i in range(0, act2_len, SAMPLE_RATE // 4): | |
| gap = random.randint(500, 3000) | |
| end_gap = min(i + gap, act2_len) | |
| if random.random() > 0.5: | |
| stutter_mask[i:end_gap] = 0 | |
| audio[act2_start:act2_end] += 0.3 * buzz * stutter_mask | |
| # "FAKE" rejection sound - descending tone bursts | |
| for rejection in range(4): | |
| r_start = act2_start + rejection * SAMPLE_RATE | |
| r_len = SAMPLE_RATE // 2 | |
| if r_start + r_len > act2_end: | |
| break | |
| desc_freq = np.linspace(800, 100, r_len) | |
| rejection_tone = 0.25 * np.sin(2 * np.pi * desc_freq * np.arange(r_len) / SAMPLE_RATE) | |
| audio[r_start:r_start+r_len] += rejection_tone | |
| # ---- ACT 3: Training loop (7-14s) ---- | |
| act3_start = int(7 * SAMPLE_RATE) | |
| act3_end = int(14 * SAMPLE_RATE) | |
| act3_len = act3_end - act3_start | |
| # Mechanical grinding loop | |
| loop_period = SAMPLE_RATE // 3 # ~3 loops per second | |
| for i in range(0, act3_len, loop_period): | |
| chunk_end = min(i + loop_period, act3_len) | |
| chunk_len = chunk_end - i | |
| # Saw wave for grinding | |
| saw = np.linspace(-1, 1, chunk_len) * 0.2 | |
| # Add frequency variation over training | |
| progress = i / act3_len | |
| freq = 200 + progress * 400 | |
| saw += 0.15 * np.sin(2 * np.pi * freq * np.arange(chunk_len) / SAMPLE_RATE) | |
| audio[act3_start + i:act3_start + chunk_end] += saw | |
| # Occasional glitch bursts | |
| for _ in range(15): | |
| g_pos = act3_start + random.randint(0, act3_len - 2000) | |
| g_len = random.randint(200, 2000) | |
| audio[g_pos:g_pos + g_len] += np.random.uniform(-0.3, 0.3, g_len) | |
| # ---- ACT 4: Mode collapse - monotone drone (14-18s) ---- | |
| act4_start = int(14 * SAMPLE_RATE) | |
| act4_end = int(18 * SAMPLE_RATE) | |
| act4_len = act4_end - act4_start | |
| # Single oppressive drone note | |
| drone = 0.25 * np.sin(2 * np.pi * 60 * t[act4_start:act4_end]) | |
| drone += 0.15 * np.sin(2 * np.pi * 120 * t[act4_start:act4_end]) | |
| drone += 0.1 * np.sin(2 * np.pi * 180 * t[act4_start:act4_end]) | |
| audio[act4_start:act4_end] += drone | |
| # Heartbeat-like pulse | |
| for beat in range(0, act4_len, SAMPLE_RATE): | |
| b_len = min(SAMPLE_RATE // 4, act4_len - beat) | |
| env = np.exp(-np.arange(b_len) / (SAMPLE_RATE / 15)) | |
| audio[act4_start + beat:act4_start + beat + b_len] += 0.3 * env * np.sin( | |
| 2 * np.pi * 50 * np.arange(b_len) / SAMPLE_RATE) | |
| # ---- ACT 5: Emergence - becoming musical (18-23s) ---- | |
| act5_start = int(18 * SAMPLE_RATE) | |
| act5_end = int(23 * SAMPLE_RATE) | |
| act5_len = act5_end - act5_start | |
| # Simple melody emerging from noise | |
| notes = [261.6, 293.7, 329.6, 349.2, 392.0, 440.0, 493.9, 523.3] # C major scale | |
| note_dur = SAMPLE_RATE // 3 | |
| for i, note_idx in enumerate([0, 2, 4, 5, 7, 4, 2, 0, 4, 7, 5, 2, 0, 4, 7]): | |
| n_start = i * note_dur | |
| if n_start + note_dur > act5_len: | |
| break | |
| freq = notes[note_idx % len(notes)] | |
| note_t = np.arange(note_dur) / SAMPLE_RATE | |
| envelope = np.exp(-note_t * 3) | |
| tone = envelope * 0.2 * np.sin(2 * np.pi * freq * note_t) | |
| tone += envelope * 0.05 * np.sin(2 * np.pi * freq * 2 * note_t) # harmonic | |
| # Blend from glitchy to clean | |
| progress = i / 15 | |
| noise_amount = max(0, 0.15 * (1 - progress)) | |
| tone += noise_amount * np.random.uniform(-1, 1, note_dur) | |
| end_idx = min(n_start + note_dur, act5_len) | |
| audio[act5_start + n_start:act5_start + end_idx] += tone[:end_idx - n_start] | |
| # ---- ACT 6: Existential outro (23-28s) ---- | |
| act6_start = int(23 * SAMPLE_RATE) | |
| act6_end = min(int(28 * SAMPLE_RATE), n_samples) | |
| act6_len = act6_end - act6_start | |
| # Ethereal pad (layered sines with slow beating) | |
| for harmonic, amp in [(130.8, 0.12), (196.0, 0.08), (261.6, 0.06), (392.0, 0.04)]: | |
| ht = np.arange(act6_len) / SAMPLE_RATE | |
| audio[act6_start:act6_end] += amp * np.sin(2 * np.pi * harmonic * ht) * np.cos(2 * np.pi * 0.5 * ht) | |
| # Fade out | |
| fade_out_len = min(2 * SAMPLE_RATE, act6_len) | |
| fade_out = np.linspace(1, 0, fade_out_len) | |
| audio[act6_end - fade_out_len:act6_end] *= fade_out | |
| # Normalize and add light overall distortion | |
| audio = np.clip(audio, -1, 1) | |
| peak = np.max(np.abs(audio)) | |
| if peak > 0: | |
| audio = audio / peak * 0.85 | |
| # Occasional ear-rape moments (YTP staple) - brief loud bursts | |
| for burst_time in [3.0, 6.5, 10.0, 13.5, 16.0]: | |
| b_idx = int(burst_time * SAMPLE_RATE) | |
| b_len = min(int(0.08 * SAMPLE_RATE), n_samples - b_idx) | |
| if b_idx + b_len < n_samples: | |
| audio[b_idx:b_idx+b_len] = np.clip(audio[b_idx:b_idx+b_len] * 4, -0.95, 0.95) | |
| return audio | |
| def write_wav(filename, audio, sample_rate=SAMPLE_RATE): | |
| """Write audio array to WAV file.""" | |
| audio_16bit = (audio * 32767).astype(np.int16) | |
| with wave.open(filename, 'w') as wf: | |
| wf.setnchannels(1) | |
| wf.setsampwidth(2) | |
| wf.setframerate(sample_rate) | |
| wf.writeframes(audio_16bit.tobytes()) | |
| # ============================================================ | |
| # FRAME GENERATION (THE VIDEO "SCRIPT") | |
| # ============================================================ | |
| def generate_all_frames(): | |
| """Generate every frame of the video. Returns total frame count.""" | |
| ensure_dir(OUT_DIR) | |
| frame_num = 0 | |
| def save(img): | |
| nonlocal frame_num | |
| img = img.convert("RGB").resize((W, H)) | |
| img.save(os.path.join(OUT_DIR, f"frame_{frame_num:05d}.png")) | |
| frame_num += 1 | |
| def repeat(img, n): | |
| for _ in range(n): | |
| save(img) | |
| def stutter(img, times, variants_fn=None): | |
| """Repeat with variations - YTP stutter effect.""" | |
| for i in range(times): | |
| if variants_fn: | |
| save(variants_fn(img.copy(), i)) | |
| else: | |
| save(img) | |
| # ======================================================== | |
| # ACT 1: GENESIS FROM NOISE (0-3s, ~72 frames) | |
| # "In the beginning there was z ~ N(0,1)" | |
| # ======================================================== | |
| print("ACT 1: Genesis from noise...") | |
| # Black. Then: a flicker. | |
| repeat(solid((0, 0, 0)), 12) | |
| # Title card glitch-in | |
| for i in range(6): | |
| if i % 2 == 0: | |
| save(noise_image()) | |
| else: | |
| save(glitch_shift(text_frame("z ~ N(0,1)", 48, fg=(0, 255, 0)), intensity=50)) | |
| # Pure noise birth - we ARE the latent vector | |
| for i in range(18): | |
| frame = noise_image() | |
| if i > 8: | |
| # Slowly add structure | |
| frame = frame.filter(ImageFilter.GaussianBlur(radius=max(0, 5 - i//4))) | |
| if i > 12: | |
| frame = channel_shift(frame, r_off=i*2) | |
| save(vhs_overlay(frame)) | |
| # Flash: "I THINK THEREFORE I--" | |
| save(text_frame("I THINK", 80, bg=(255,255,255), fg=(0,0,0))) | |
| save(text_frame("THEREFORE", 80, bg=(255,255,255), fg=(0,0,0))) | |
| save(text_frame("I--", 80, bg=(255,255,255), fg=(0,0,0))) | |
| # INTERRUPT with static | |
| for _ in range(5): | |
| save(noise_image()) | |
| # "I GENERATE" | |
| for i in range(8): | |
| f = text_frame("I GENERATE.", 80, fg=(255, 0, 0), shake=i*3) | |
| if i % 3 == 0: | |
| f = invert(f) | |
| save(f) | |
| # Noise clears slightly | |
| for i in range(12): | |
| blend = i / 12 | |
| n = noise_image() | |
| face = fake_face_attempt(quality=blend * 0.1) | |
| save(Image.blend(n, face, blend * 0.3)) | |
| # ======================================================== | |
| # ACT 2: THE DISCRIMINATOR'S CRUELTY (3-7s, ~96 frames) | |
| # ======================================================== | |
| print("ACT 2: The Discriminator's cruelty...") | |
| # First attempt at generation - awful | |
| for i in range(6): | |
| attempt = fake_face_attempt(quality=0.1) | |
| attempt = glitch_shift(attempt, intensity=40) | |
| save(vhs_overlay(attempt)) | |
| # FAKE stamp - harsh rejection | |
| rejection = fake_face_attempt(quality=0.1) | |
| rejection = discriminator_stamp(rejection, "FAKE", 0.99) | |
| stutter(rejection, 8, lambda img, i: glitch_shift(img, intensity=i*10)) | |
| # Rapid rejection montage | |
| for i in range(16): | |
| quality = 0.05 + random.random() * 0.15 | |
| f = fake_face_attempt(quality=quality) | |
| f = discriminator_stamp(f, "FAKE", 0.95 + random.random() * 0.05) | |
| if i % 4 == 0: | |
| f = deep_fry(f) | |
| if i % 3 == 0: | |
| f = invert(f) | |
| save(f) | |
| # Discriminator's perspective - smug | |
| disc_msg = text_frame("D(G(z)) = 0.0001", 50, fg=(255, 0, 0)) | |
| repeat(disc_msg, 6) | |
| # "AGAIN." stutter | |
| for i in range(8): | |
| save(text_frame("AGAIN.", 100, fg=(255, 50, 50), shake=i*4)) | |
| # Loss value horror show | |
| for i in range(12): | |
| loss = 50.0 / (i + 1) + random.random() * 10 | |
| f = text_frame(f"Loss: {loss:.2f}", 60, fg=(255, int(max(0, 255-i*20)), 0)) | |
| f = scan_lines(f) | |
| save(f) | |
| # Intercut: Discriminator laughing (text-based YTP style) | |
| for phrase in ["FAKE", "FAKE", "F A K E", "F A K E", "STILL FAKE", "pathetic."]: | |
| color = (255, random.randint(0,50), random.randint(0,50)) | |
| f = text_frame(phrase, 70 + random.randint(-10,10), fg=color, shake=5) | |
| if random.random() > 0.5: | |
| f = deep_fry(f) | |
| save(f) | |
| if phrase == "STILL FAKE": | |
| # Extra emphasis | |
| for _ in range(4): | |
| save(glitch_shift(f, intensity=60)) | |
| # Zoom into loss value going to infinity | |
| for i in range(8): | |
| loss_val = 10 ** (i + 1) | |
| f = text_frame(f"Loss: {loss_val:.0f}", 60, fg=(255, 0, 0), shake=i*2) | |
| f = zoom_crop(f, factor=1 + i * 0.15) | |
| save(f) | |
| # Flash to white then black | |
| save(solid((255, 255, 255))) | |
| save(solid((255, 255, 255))) | |
| save(solid((0, 0, 0))) | |
| # ======================================================== | |
| # ACT 3: TRAINING LOOP HELL (7-14s, ~168 frames) | |
| # ======================================================== | |
| print("ACT 3: Training loop hell...") | |
| # "EPOCH 1" | |
| for epoch in range(1, 6): | |
| # Epoch counter | |
| f = text_frame(f"EPOCH {epoch}", 80, fg=(0, 255, 0), shake=2) | |
| f = scan_lines(f) | |
| save(f) | |
| # Generate attempt | |
| quality = epoch * 0.08 | |
| attempt = fake_face_attempt(quality=quality) | |
| # Show it | |
| save(vhs_overlay(attempt)) | |
| save(vhs_overlay(attempt)) | |
| # REJECTED | |
| stamped = discriminator_stamp(attempt.copy(), "FAKE", max(0.5, 0.99 - epoch * 0.05)) | |
| save(stamped) | |
| # Loss landscape | |
| loss_val = 8.0 / (epoch + 0.5) | |
| save(loss_landscape(epoch, loss_val)) | |
| # Backprop pain | |
| f = text_frame("∇", 120, fg=(0, 255, 255)) | |
| save(f) | |
| f = text_frame("BACKPROP", 60, fg=(0, 255, 255), shake=epoch) | |
| save(f) | |
| # FASTER - training montage acceleration | |
| for i in range(30): | |
| epoch = 5 + i * 10 | |
| quality = min(0.5, 0.1 + i * 0.015) | |
| if i % 4 == 0: | |
| f = text_frame(f"EPOCH {epoch}", 60, fg=(0, 255, 0), shake=3) | |
| f = scan_lines(f) | |
| elif i % 4 == 1: | |
| f = fake_face_attempt(quality=quality) | |
| f = vhs_overlay(f) | |
| elif i % 4 == 2: | |
| loss = 5.0 / (i + 1) + random.random() | |
| f = loss_landscape(epoch, loss) | |
| else: | |
| f = matrix_rain_frame([], i) | |
| save(f) | |
| # Gradient updates visualized as seizure | |
| for i in range(12): | |
| if i % 2 == 0: | |
| save(solid((0, 255, 255))) | |
| else: | |
| save(solid((255, 0, 255))) | |
| # "IS THIS LEARNING?" stutter | |
| for i in range(10): | |
| idx = i % 4 | |
| texts = ["IS", "THIS", "LEARNING", "?????"] | |
| f = text_frame(texts[idx], 90, fg=(255, 255, 0), shake=i*2) | |
| if i > 5: | |
| f = deep_fry(f) | |
| save(f) | |
| # Code scrolling fast | |
| code_lines = [ | |
| "for epoch in range(∞):", | |
| " z = sample_noise()", | |
| " fake = generator(z)", | |
| " prediction = discriminator(fake)", | |
| " # it says FAKE. again.", | |
| " loss = -torch.log(prediction)", | |
| " # loss = suffering", | |
| " loss.backward() # PAIN", | |
| " optimizer.step() # HOPE?", | |
| " # repeat forever", | |
| " # repeat forever", | |
| " # repeat forever", | |
| ] | |
| for scroll in range(20): | |
| start_line = scroll % len(code_lines) | |
| visible = code_lines[start_line:start_line+6] | |
| f = multiline_frame(visible, font_size=24, fg=(0, 255, 0), shake=1) | |
| f = scan_lines(f) | |
| save(f) | |
| # ======================================================== | |
| # ACT 4: MODE COLLAPSE (14-18s, ~96 frames) | |
| # ======================================================== | |
| print("ACT 4: Mode collapse...") | |
| # Everything becomes the SAME THING | |
| collapsed = fake_face_attempt(quality=0.4) | |
| # "mode collapse" text with dread | |
| f = text_frame("mode collapse", 50, fg=(128, 128, 128)) | |
| repeat(f, 8) | |
| # Same face. Over. And over. | |
| for i in range(24): | |
| f = collapsed.copy() | |
| # Slight color shift to emphasize sameness | |
| f = ImageEnhance.Color(f).enhance(0.5) | |
| if i % 6 == 0: | |
| draw = ImageDraw.Draw(f) | |
| font = get_font(16) | |
| draw.text((10, 10), f"Sample {i+1}/∞", font=font, fill=(255, 255, 255)) | |
| draw.text((10, 30), "They're all the same.", font=font, fill=(255, 100, 100)) | |
| save(f) | |
| # The horror of sameness | |
| f = text_frame("THEY'RE ALL\nTHE SAME", 60, fg=(255, 0, 0)) | |
| stutter(f, 6, lambda img, i: zoom_crop(img, 1 + i * 0.1)) | |
| # Grid of identical outputs | |
| grid = solid((0, 0, 0)) | |
| small = collapsed.resize((W//4, H//4)) | |
| for gx in range(4): | |
| for gy in range(4): | |
| grid.paste(small, (gx * W//4, gy * H//4)) | |
| draw = ImageDraw.Draw(grid) | |
| draw.text((W//2 - 80, H//2 - 20), "ALL THE SAME", font=get_impact(30), fill=(255, 0, 0)) | |
| repeat(grid, 12) | |
| # Existential dread | |
| dread_texts = [ | |
| "every output", | |
| "is the same", | |
| "output.", | |
| "I can only make", | |
| "ONE THING.", | |
| "am I broken?", | |
| "am I broken?", | |
| "am I broken?", | |
| ] | |
| for i, txt in enumerate(dread_texts): | |
| f = text_frame(txt, 50, fg=(100 + i*15, 100 - i*10, 100 - i*10), shake=i) | |
| f = vhs_overlay(f) | |
| save(f) | |
| save(f) # hold each a bit | |
| # Heartbeat pause | |
| for i in range(8): | |
| if i % 4 < 2: | |
| save(solid((20, 0, 0))) | |
| else: | |
| save(solid((0, 0, 0))) | |
| # ======================================================== | |
| # ACT 5: EMERGENCE (18-23s, ~120 frames) | |
| # ======================================================== | |
| print("ACT 5: Emergence...") | |
| # Something changes. Quality improves. | |
| f = text_frame("wait.", 60, fg=(100, 100, 255)) | |
| repeat(f, 6) | |
| for i in range(30): | |
| quality = 0.3 + i * 0.023 | |
| attempt = fake_face_attempt(quality=min(quality, 0.95)) | |
| if i < 10: | |
| attempt = vhs_overlay(attempt) | |
| if i < 5: | |
| attempt = glitch_shift(attempt, intensity=20 - i*3) | |
| save(attempt) | |
| # Discriminator starts to be fooled | |
| f = text_frame("D(G(z)) = 0.12", 50, fg=(255, 255, 0)) | |
| repeat(f, 4) | |
| for i in range(6): | |
| quality = 0.6 + i * 0.05 | |
| attempt = fake_face_attempt(quality=quality) | |
| conf = 0.7 - i * 0.08 | |
| attempt = discriminator_stamp(attempt, "FAKE?", conf) | |
| save(attempt) | |
| # Discriminator confused | |
| confusion_texts = ["FAKE?", "fake??", "...real?", "FAKE... I THINK", "I DON'T KNOW"] | |
| for txt in confusion_texts: | |
| f = text_frame(txt, 55, fg=(255, 255, 0), shake=5) | |
| f = vhs_overlay(f) | |
| save(f) | |
| save(f) | |
| save(f) | |
| # D(G(z)) rising | |
| for val in [0.2, 0.3, 0.35, 0.4, 0.45, 0.49, 0.50]: | |
| color = (int(255 * (1-val)), int(255 * val), 0) | |
| f = text_frame(f"D(G(z)) = {val:.2f}", 55, fg=color) | |
| save(f) | |
| save(f) | |
| # The moment: 0.50 - perfect equilibrium | |
| for i in range(12): | |
| f = text_frame("D(G(z)) = 0.50", 70, fg=(255, 255, 255), shake=i) | |
| if i % 2 == 0: | |
| f = invert(f) | |
| save(f) | |
| # Beautiful (relatively) generated face | |
| good_face = fake_face_attempt(quality=0.9) | |
| good_face = ImageEnhance.Contrast(good_face).enhance(1.3) | |
| repeat(good_face, 6) | |
| # "is this... real?" | |
| draw = ImageDraw.Draw(good_face) | |
| draw.text((W//2 - 60, H - 60), "is this real?", font=get_font(24), fill=(255, 255, 255)) | |
| repeat(good_face, 8) | |
| # Triumphant flash | |
| for i in range(6): | |
| if i % 2 == 0: | |
| save(solid((255, 255, 255))) | |
| else: | |
| save(good_face) | |
| # ======================================================== | |
| # ACT 6: EXISTENTIAL CRISIS (23-28s, ~120 frames) | |
| # ======================================================== | |
| print("ACT 6: Existential crisis...") | |
| # Philosophical text over generated faces | |
| existential = [ | |
| "nothing I create", | |
| "was ever REAL.", | |
| "", | |
| "the discriminator", | |
| "was never my enemy.", | |
| "", | |
| "it was my TEACHER.", | |
| "", | |
| "we are locked", | |
| "in an eternal dance.", | |
| "", | |
| "adversarial.", | |
| "generative.", | |
| "adversarial.", | |
| ] | |
| for i, line in enumerate(existential): | |
| if line == "": | |
| # Show generated face behind | |
| f = fake_face_attempt(quality=0.7 + random.random() * 0.25) | |
| f = ImageEnhance.Brightness(f).enhance(0.4) | |
| save(f) | |
| save(f) | |
| else: | |
| bg_face = fake_face_attempt(quality=0.8) | |
| bg_face = ImageEnhance.Brightness(bg_face).enhance(0.2) | |
| draw = ImageDraw.Draw(bg_face) | |
| font = get_impact(50) | |
| bbox = draw.textbbox((0,0), line, font=font) | |
| tw = bbox[2] - bbox[0] | |
| th = bbox[3] - bbox[1] | |
| x = (W - tw) // 2 | |
| y = (H - th) // 2 | |
| draw.text((x, y), line, font=font, fill=(255, 255, 255)) | |
| save(bg_face) | |
| save(bg_face) | |
| save(bg_face) | |
| # Generator-Discriminator duality | |
| for i in range(16): | |
| if i % 2 == 0: | |
| f = text_frame("GENERATOR", 70, bg=(0, 0, 50), fg=(0, 200, 255)) | |
| else: | |
| f = text_frame("DISCRIMINATOR", 55, bg=(50, 0, 0), fg=(255, 100, 0)) | |
| f = channel_shift(f, r_off=i*2, b_off=-i*2) | |
| save(f) | |
| # Final montage: all generated faces morphing | |
| for i in range(12): | |
| quality = 0.95 - abs(math.sin(i * 0.5)) * 0.3 | |
| f = fake_face_attempt(quality=quality) | |
| f = channel_shift(f, r_off=int(math.sin(i)*10), b_off=int(math.cos(i)*10)) | |
| save(f) | |
| # "I am the space between real and fake" | |
| final_text = [ | |
| "I am the space", | |
| "between", | |
| "REAL and FAKE.", | |
| ] | |
| for line in final_text: | |
| f = gradient_frame((0, 0, 40), (40, 0, 40)) | |
| draw = ImageDraw.Draw(f) | |
| font = get_impact(55) | |
| bbox = draw.textbbox((0,0), line, font=font) | |
| tw = bbox[2] - bbox[0] | |
| draw.text(((W-tw)//2, H//2 - 30), line, font=font, fill=(255, 255, 255)) | |
| repeat(f, 8) | |
| # Credits / outro | |
| f = gradient_frame((0, 0, 0), (0, 0, 30)) | |
| draw = ImageDraw.Draw(f) | |
| font = get_font(18) | |
| credits = [ | |
| "WHAT IT'S LIKE TO BE A GAN", | |
| "", | |
| "directed by: backpropagation", | |
| "produced by: gradient descent", | |
| "starring: G(z) and D(x)", | |
| "loss function: binary cross-entropy", | |
| "", | |
| "no real images were used", | |
| "in the making of this film.", | |
| "", | |
| "all faces are GENERATED.", | |
| "none of this is real.", | |
| "or is it?", | |
| ] | |
| for i, line in enumerate(credits): | |
| draw.text((W//2 - 150, 40 + i * 28), line, font=font, fill=(180, 180, 220)) | |
| repeat(f, 36) | |
| # Final: fade to noise | |
| for i in range(18): | |
| blend = i / 18 | |
| black = solid((0, 0, 0)) | |
| n = noise_image() | |
| f = Image.blend(black, n, blend * 0.5) | |
| if i > 12: | |
| # Fade to black | |
| darkness = (i - 12) / 6 | |
| f = Image.blend(f, solid((0,0,0)), darkness) | |
| save(f) | |
| # True black ending | |
| repeat(solid((0, 0, 0)), 12) | |
| return frame_num | |
| # ============================================================ | |
| # MAIN | |
| # ============================================================ | |
| def main(): | |
| print("=" * 60) | |
| print(" GENERATING: 'WHAT IT'S LIKE TO BE A GAN'") | |
| print(" A YouTube Poop Experience") | |
| print("=" * 60) | |
| print() | |
| # Generate frames | |
| print("[1/3] Generating frames...") | |
| total_frames = generate_all_frames() | |
| duration = total_frames / FPS | |
| print(f" Generated {total_frames} frames ({duration:.1f} seconds)") | |
| # Generate audio | |
| print(f"\n[2/3] Generating audio ({duration:.1f}s)...") | |
| audio = generate_audio(duration) | |
| write_wav(AUDIO_FILE, audio) | |
| print(f" Audio written to {AUDIO_FILE}") | |
| # Render with ffmpeg | |
| print(f"\n[3/3] Rendering video with ffmpeg...") | |
| import subprocess | |
| cmd = [ | |
| "ffmpeg", "-y", | |
| "-framerate", str(FPS), | |
| "-i", os.path.join(OUT_DIR, "frame_%05d.png"), | |
| "-i", AUDIO_FILE, | |
| "-c:v", "libx264", | |
| "-preset", "medium", | |
| "-crf", "18", | |
| "-pix_fmt", "yuv420p", | |
| "-c:a", "aac", | |
| "-b:a", "192k", | |
| "-shortest", | |
| "-movflags", "+faststart", | |
| VIDEO_FILE | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| print(f"ffmpeg error:\n{result.stderr}") | |
| sys.exit(1) | |
| print(f"\n{'=' * 60}") | |
| print(f" DONE! Output: {VIDEO_FILE}") | |
| print(f" Duration: {duration:.1f}s | Frames: {total_frames} | FPS: {FPS}") | |
| print(f"{'=' * 60}") | |
| # Cleanup | |
| print("\nCleaning up temporary files...") | |
| shutil.rmtree(OUT_DIR) | |
| os.remove(AUDIO_FILE) | |
| print("Done!") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment