Skip to content

Instantly share code, notes, and snippets.

@aminnj
Created September 9, 2020 21:36
Show Gist options
  • Save aminnj/882cf216de713da5b68d8094926e04e1 to your computer and use it in GitHub Desktop.
Save aminnj/882cf216de713da5b68d8094926e04e1 to your computer and use it in GitHub Desktop.
Seam carving in python (basically https://www.youtube.com/watch?v=rpB6zQNsbQU but I don't want to learn Julia)
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json, glob, sys, os, gzip\n",
"from IPython.display import HTML\n",
"import ipywidgets\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from yahist import set_default_style, Hist1D, Hist2D\n",
"from tqdm.auto import tqdm\n",
"\n",
"from skimage.io import imread\n",
"from scipy.ndimage import convolve\n",
"import numba\n",
"from IPython.display import HTML"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"orig_img = imread(\"https://upload.wikimedia.org/wikipedia/en/d/dd/The_Persistence_of_Memory.jpg\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def get_gradmag(img):\n",
" img_lumi = (0.299*img[:,:,0] + 0.587*img[:,:,1] + 0.114*img[:,:,2])\n",
" img_lumi /= img_lumi.max()\n",
" \n",
" filt_x = np.array([[1.,0,-1],[2,0,-2],[1,0,-1]])\n",
" dx = convolve(img_lumi, filt_x, mode=\"nearest\")\n",
" dy = convolve(img_lumi, filt_x.T, mode=\"nearest\")\n",
" gradmag = (dx**2 + dy**2)**0.5\n",
" return gradmag"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"@numba.njit\n",
"def get_energy(img):\n",
" output = img.copy()\n",
" nrow, ncol = output.shape\n",
" for j in range(nrow-2,-1,-1):\n",
" for i in range(ncol):\n",
" down = output[j+1,i]\n",
" if i == 0:\n",
" left = down\n",
" else:\n",
" left = output[j+1,i-1]\n",
" if i == ncol-1:\n",
" right = down\n",
" else:\n",
" right = output[j+1,i+1]\n",
" output[j,i] = output[j,i] + min(left,down,right)\n",
" return output\n",
"\n",
"@numba.njit\n",
"def get_best_path(energy, start=-1):\n",
" path = []\n",
" nrow, ncol = energy.shape\n",
" if start < 0:\n",
" cx = energy[0].argmin()\n",
" else:\n",
" cx = start\n",
" path.append([cx, 0])\n",
" for cy in list(range(nrow-1)):\n",
" best_next = energy[cy+1,cx]\n",
" step = 0\n",
" if cx != 0:\n",
" if energy[cy+1,cx-1] < best_next:\n",
" best_next = energy[cy+1,cx-1]\n",
" step = -1\n",
" if cx != ncol-1:\n",
" if energy[cy+1,cx+1] < best_next:\n",
" best_next = energy[cy+1,cx+1]\n",
" step = 1\n",
" cx += step\n",
" path.append([cx, cy+1])\n",
" return np.array(path)\n",
"\n",
"def squeeze_out_seam(img, path):\n",
" mask = np.ones_like(img, dtype=bool)\n",
" for px,py in path:\n",
" mask[py,px] = False\n",
" newshape = list(img.shape)\n",
" newshape[1] -= 1\n",
" return img[mask].reshape(newshape)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 16.5 s, sys: 1.98 s, total: 18.5 s\n",
"Wall time: 41.2 s\n"
]
}
],
"source": [
"%%time\n",
"reduced_img = orig_img.copy()\n",
"\n",
"for todelete in range(200):\n",
" gradmag = get_gradmag(reduced_img)\n",
" energy = get_energy(gradmag)\n",
" path = get_best_path(energy)\n",
" reduced_img = squeeze_out_seam(reduced_img, path)\n",
"\n",
" if todelete % 1 == 0:\n",
" fig, ax = plt.subplots()\n",
" ax.imshow(reduced_img, cmap=\"gray\", interpolation=\"none\")\n",
" ax.plot(path[:,0], path[:,1], color=\"white\")\n",
" ax.axis(\"off\")\n",
" !mkdir -p imgs/\n",
" fname = f\"imgs/{todelete:03d}.png\"\n",
" fig.savefig(fname)\n",
" plt.close()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ffmpeg version 4.1.2 Copyright (c) 2000-2019 the FFmpeg developers\n",
" built with Apple LLVM version 10.0.0 (clang-1000.11.45.5)\n",
" configuration: --prefix=/usr/local/Cellar/ffmpeg/4.1.2 --enable-shared --enable-pthreads --enable-version3 --enable-hardcoded-tables --enable-avresample --cc=clang --host-cflags='-I/Library/Java/JavaVirtualMachines/openjdk-11.0.2.jdk/Contents/Home/include -I/Library/Java/JavaVirtualMachines/openjdk-11.0.2.jdk/Contents/Home/include/darwin' --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libbluray --enable-libmp3lame --enable-libopus --enable-librubberband --enable-libsnappy --enable-libtesseract --enable-libtheora --enable-libvorbis --enable-libvpx --enable-libx264 --enable-libx265 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librtmp --enable-libspeex --enable-videotoolbox --disable-libjack --disable-indev=jack --enable-libaom --enable-libsoxr\n",
" libavutil 56. 22.100 / 56. 22.100\n",
" libavcodec 58. 35.100 / 58. 35.100\n",
" libavformat 58. 20.100 / 58. 20.100\n",
" libavdevice 58. 5.100 / 58. 5.100\n",
" libavfilter 7. 40.101 / 7. 40.101\n",
" libavresample 4. 0. 0 / 4. 0. 0\n",
" libswscale 5. 3.100 / 5. 3.100\n",
" libswresample 3. 3.100 / 3. 3.100\n",
" libpostproc 55. 3.100 / 55. 3.100\n",
"Input #0, image2, from 'imgs/*.png':\n",
" Duration: 00:00:06.67, start: 0.000000, bitrate: N/A\n",
" Stream #0:0: Video: png, rgba(pc), 432x288 [SAR 2835:2835 DAR 3:2], 30 fps, 30 tbr, 30 tbn, 30 tbc\n",
"Stream mapping:\n",
" Stream #0:0 -> #0:0 (png (native) -> h264 (libx264))\n",
"Press [q] to stop, [?] for help\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0musing SAR=1/1\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0musing cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mprofile High, level 2.1\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0m264 - core 155 r2917 0a84d98 - H.264/MPEG-4 AVC codec - Copyleft 2003-2018 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=9 lookahead_threads=1 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=2.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00\n",
"Output #0, mp4, to 'reduction.mp4':\n",
" Metadata:\n",
" encoder : Lavf58.20.100\n",
" Stream #0:0: Video: h264 (libx264) (avc1 / 0x31637661), yuv420p, 432x288 [SAR 1:1 DAR 3:2], q=-1--1, 30 fps, 15360 tbn, 30 tbc\n",
" Metadata:\n",
" encoder : Lavc58.35.100 libx264\n",
" Side data:\n",
" cpb: bitrate max/min/avg: 0/0/0 buffer size: 0 vbv_delay: -1\n",
"frame= 200 fps=0.0 q=-1.0 Lsize= 1893kB time=00:00:06.56 bitrate=2361.3kbits/s speed=15.1x \n",
"video:1890kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.168474%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mframe I:1 Avg QP: 2.24 size: 59392\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mframe P:50 Avg QP: 5.19 size: 15981\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mframe B:149 Avg QP: 9.72 size: 7221\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mconsecutive B-frames: 0.5% 0.0% 1.5% 98.0%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mmb I I16..4: 55.8% 5.3% 38.9%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mmb P I16..4: 0.5% 0.1% 1.2% P16..4: 20.9% 9.1% 7.1% 0.0% 0.0% skip:61.2%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mmb B I16..4: 0.0% 0.0% 0.2% B16..8: 24.5% 5.1% 4.7% direct: 2.4% skip:63.2% L0:51.2% L1:45.5% BI: 3.3%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0m8x8 transform intra:4.6% inter:7.6%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mcoded y,uvDC,uvAC intra: 66.5% 65.7% 62.0% inter: 15.7% 19.9% 18.1%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mi16 v,h,dc,p: 81% 2% 13% 4%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mi8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 18% 26% 23% 4% 3% 7% 7% 5% 6%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mi4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 12% 14% 16% 9% 11% 10% 8% 10% 9%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mi8c dc,h,v,p: 55% 25% 13% 7%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mWeighted P-Frames: Y:0.0% UV:0.0%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mref P L0: 46.1% 2.9% 31.5% 19.5%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mref B L0: 72.2% 15.1% 12.7%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mref B L1: 93.6% 6.4%\n",
"\u001b[1;36m[libx264 @ 0x7fa3ed018400] \u001b[0mkb/s:2321.19\n"
]
}
],
"source": [
"!ffmpeg -y -r 30 -f image2 -pattern_type glob -i 'imgs/*.png' -vcodec libx264 -crf 2 -pix_fmt yuv420p reduction.mp4"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <video alt=\"test\" controls autoplay>\n",
" <source src=\"reduction.mp4\" type=\"video/mp4\">\n",
" </video>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"HTML(f\"\"\"\n",
" <video alt=\"test\" controls autoplay>\n",
" <source src=\"reduction.mp4\" type=\"video/mp4\">\n",
" </video>\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment