Created
September 9, 2020 21:36
-
-
Save aminnj/882cf216de713da5b68d8094926e04e1 to your computer and use it in GitHub Desktop.
Seam carving in python (basically https://www.youtube.com/watch?v=rpB6zQNsbQU but I don't want to learn Julia)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json, glob, sys, os, gzip\n", | |
"from IPython.display import HTML\n", | |
"import ipywidgets\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"from yahist import set_default_style, Hist1D, Hist2D\n", | |
"from tqdm.auto import tqdm\n", | |
"\n", | |
"from skimage.io import imread\n", | |
"from scipy.ndimage import convolve\n", | |
"import numba\n", | |
"from IPython.display import HTML" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"orig_img = imread(\"https://upload.wikimedia.org/wikipedia/en/d/dd/The_Persistence_of_Memory.jpg\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_gradmag(img):\n", | |
" img_lumi = (0.299*img[:,:,0] + 0.587*img[:,:,1] + 0.114*img[:,:,2])\n", | |
" img_lumi /= img_lumi.max()\n", | |
" \n", | |
" filt_x = np.array([[1.,0,-1],[2,0,-2],[1,0,-1]])\n", | |
" dx = convolve(img_lumi, filt_x, mode=\"nearest\")\n", | |
" dy = convolve(img_lumi, filt_x.T, mode=\"nearest\")\n", | |
" gradmag = (dx**2 + dy**2)**0.5\n", | |
" return gradmag" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@numba.njit\n", | |
"def get_energy(img):\n", | |
" output = img.copy()\n", | |
" nrow, ncol = output.shape\n", | |
" for j in range(nrow-2,-1,-1):\n", | |
" for i in range(ncol):\n", | |
" down = output[j+1,i]\n", | |
" if i == 0:\n", | |
" left = down\n", | |
" else:\n", | |
" left = output[j+1,i-1]\n", | |
" if i == ncol-1:\n", | |
" right = down\n", | |
" else:\n", | |
" right = output[j+1,i+1]\n", | |
" output[j,i] = output[j,i] + min(left,down,right)\n", | |
" return output\n", | |
"\n", | |
"@numba.njit\n", | |
"def get_best_path(energy, start=-1):\n", | |
" path = []\n", | |
" nrow, ncol = energy.shape\n", | |
" if start < 0:\n", | |
" cx = energy[0].argmin()\n", | |
" else:\n", | |
" cx = start\n", | |
" path.append([cx, 0])\n", | |
" for cy in list(range(nrow-1)):\n", | |
" best_next = energy[cy+1,cx]\n", | |
" step = 0\n", | |
" if cx != 0:\n", | |
" if energy[cy+1,cx-1] < best_next:\n", | |
" best_next = energy[cy+1,cx-1]\n", | |
" step = -1\n", | |
" if cx != ncol-1:\n", | |
" if energy[cy+1,cx+1] < best_next:\n", | |
" best_next = energy[cy+1,cx+1]\n", | |
" step = 1\n", | |
" cx += step\n", | |
" path.append([cx, cy+1])\n", | |
" return np.array(path)\n", | |
"\n", | |
"def squeeze_out_seam(img, path):\n", | |
" mask = np.ones_like(img, dtype=bool)\n", | |
" for px,py in path:\n", | |
" mask[py,px] = False\n", | |
" newshape = list(img.shape)\n", | |
" newshape[1] -= 1\n", | |
" return img[mask].reshape(newshape)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 16.5 s, sys: 1.98 s, total: 18.5 s\n", | |
"Wall time: 41.2 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"reduced_img = orig_img.copy()\n", | |
"\n", | |
"for todelete in range(200):\n", | |
" gradmag = get_gradmag(reduced_img)\n", | |
" energy = get_energy(gradmag)\n", | |
" path = get_best_path(energy)\n", | |
" reduced_img = squeeze_out_seam(reduced_img, path)\n", | |
"\n", | |
" if todelete % 1 == 0:\n", | |
" fig, ax = plt.subplots()\n", | |
" ax.imshow(reduced_img, cmap=\"gray\", interpolation=\"none\")\n", | |
" ax.plot(path[:,0], path[:,1], color=\"white\")\n", | |
" ax.axis(\"off\")\n", | |
" !mkdir -p imgs/\n", | |
" fname = f\"imgs/{todelete:03d}.png\"\n", | |
" fig.savefig(fname)\n", | |
" plt.close()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"ffmpeg version 4.1.2 Copyright (c) 2000-2019 the FFmpeg developers\n", | |
" built with Apple LLVM version 10.0.0 (clang-1000.11.45.5)\n", | |
" configuration: --prefix=/usr/local/Cellar/ffmpeg/4.1.2 --enable-shared --enable-pthreads --enable-version3 --enable-hardcoded-tables --enable-avresample --cc=clang --host-cflags='-I/Library/Java/JavaVirtualMachines/openjdk-11.0.2.jdk/Contents/Home/include -I/Library/Java/JavaVirtualMachines/openjdk-11.0.2.jdk/Contents/Home/include/darwin' --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libbluray --enable-libmp3lame --enable-libopus --enable-librubberband --enable-libsnappy --enable-libtesseract --enable-libtheora --enable-libvorbis --enable-libvpx --enable-libx264 --enable-libx265 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librtmp --enable-libspeex --enable-videotoolbox --disable-libjack --disable-indev=jack --enable-libaom --enable-libsoxr\n", | |
" libavutil 56. 22.100 / 56. 22.100\n", | |
" libavcodec 58. 35.100 / 58. 35.100\n", | |
" libavformat 58. 20.100 / 58. 20.100\n", | |
" libavdevice 58. 5.100 / 58. 5.100\n", | |
" libavfilter 7. 40.101 / 7. 40.101\n", | |
" libavresample 4. 0. 0 / 4. 0. 0\n", | |
" libswscale 5. 3.100 / 5. 3.100\n", | |
" libswresample 3. 3.100 / 3. 3.100\n", | |
" libpostproc 55. 3.100 / 55. 3.100\n", | |
"Input #0, image2, from 'imgs/*.png':\n", | |
" Duration: 00:00:06.67, start: 0.000000, bitrate: N/A\n", | |
" Stream #0:0: Video: png, rgba(pc), 432x288 [SAR 2835:2835 DAR 3:2], 30 fps, 30 tbr, 30 tbn, 30 tbc\n", | |
"Stream mapping:\n", | |
" Stream #0:0 -> #0:0 (png (native) -> h264 (libx264))\n", | |
"Press [q] to stop, [?] for help\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0musing SAR=1/1\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0musing cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mprofile High, level 2.1\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0m264 - core 155 r2917 0a84d98 - H.264/MPEG-4 AVC codec - Copyleft 2003-2018 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=9 lookahead_threads=1 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=2.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00\n", | |
"Output #0, mp4, to 'reduction.mp4':\n", | |
" Metadata:\n", | |
" encoder : Lavf58.20.100\n", | |
" Stream #0:0: Video: h264 (libx264) (avc1 / 0x31637661), yuv420p, 432x288 [SAR 1:1 DAR 3:2], q=-1--1, 30 fps, 15360 tbn, 30 tbc\n", | |
" Metadata:\n", | |
" encoder : Lavc58.35.100 libx264\n", | |
" Side data:\n", | |
" cpb: bitrate max/min/avg: 0/0/0 buffer size: 0 vbv_delay: -1\n", | |
"frame= 200 fps=0.0 q=-1.0 Lsize= 1893kB time=00:00:06.56 bitrate=2361.3kbits/s speed=15.1x \n", | |
"video:1890kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.168474%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mframe I:1 Avg QP: 2.24 size: 59392\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mframe P:50 Avg QP: 5.19 size: 15981\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mframe B:149 Avg QP: 9.72 size: 7221\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mconsecutive B-frames: 0.5% 0.0% 1.5% 98.0%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mmb I I16..4: 55.8% 5.3% 38.9%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mmb P I16..4: 0.5% 0.1% 1.2% P16..4: 20.9% 9.1% 7.1% 0.0% 0.0% skip:61.2%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mmb B I16..4: 0.0% 0.0% 0.2% B16..8: 24.5% 5.1% 4.7% direct: 2.4% skip:63.2% L0:51.2% L1:45.5% BI: 3.3%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0m8x8 transform intra:4.6% inter:7.6%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mcoded y,uvDC,uvAC intra: 66.5% 65.7% 62.0% inter: 15.7% 19.9% 18.1%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mi16 v,h,dc,p: 81% 2% 13% 4%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mi8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 18% 26% 23% 4% 3% 7% 7% 5% 6%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mi4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 12% 14% 16% 9% 11% 10% 8% 10% 9%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mi8c dc,h,v,p: 55% 25% 13% 7%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mWeighted P-Frames: Y:0.0% UV:0.0%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mref P L0: 46.1% 2.9% 31.5% 19.5%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mref B L0: 72.2% 15.1% 12.7%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mref B L1: 93.6% 6.4%\n", | |
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mkb/s:2321.19\n" | |
] | |
} | |
], | |
"source": [ | |
"!ffmpeg -y -r 30 -f image2 -pattern_type glob -i 'imgs/*.png' -vcodec libx264 -crf 2 -pix_fmt yuv420p reduction.mp4" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"\n", | |
" <video alt=\"test\" controls autoplay>\n", | |
" <source src=\"reduction.mp4\" type=\"video/mp4\">\n", | |
" </video>\n" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"HTML(f\"\"\"\n", | |
" <video alt=\"test\" controls autoplay>\n", | |
" <source src=\"reduction.mp4\" type=\"video/mp4\">\n", | |
" </video>\n", | |
"\"\"\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment