-
-
Save flanter21/6fd9d7e16b2d412c40b15504e16a6d88 to your computer and use it in GitHub Desktop.
import json | |
import os | |
from moviepy import ImageClip, VideoFileClip, concatenate_videoclips | |
from PIL import Image | |
def create_slideshow(image_files, image_durations, output_file, fps=60, codec="libx264", preset="medium", debug=False, test_run=False, overwrite="ask"): | |
""" | |
Creates a slideshow video from a list of images. | |
Parameters: | |
image_files (list): List of image file paths | |
image_durations (int): Duration each image is displayed (in seconds) | |
output_file (str): Path for the output video file | |
fps (int): Frames per second of the output video | |
codec (str): The codec used to encode the output video | |
preset (str): Same as ffmpeg. See https://trac.ffmpeg.org/wiki/Encode/H.264#Preset | |
Options - ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, placebo | |
debug (bool): Show time (in seconds) that each image is displayed | |
test_run (bool): If true, will skip rendering video | |
overwrite (str): When a file with the same name as the output is found, | |
Options - "skip", "overwrite", "ask" | |
""" | |
clips = [] # Stores list of images currently being processed | |
# Loop through images and create clips | |
for i, image_file in enumerate(image_files): | |
clip = ImageClip(image_file).with_duration(image_durations[i]) # Set duration for each image | |
clips.append(clip) | |
if debug == True: | |
print (image_file, image_durations[i]) | |
video = concatenate_videoclips(clips, method="chain") | |
# Handle output in case a file already exists in the desired location | |
if os.path.isfile(output_file) == True: | |
while overwrite not in ("overwrite", "skip"): | |
overwrite = input(output_file + " already exists. Do you want to overwrite over it? Enter 'overwrite' or 'skip' ") | |
else: | |
overwrite = "overwrite" | |
if overwrite == "overwrite" and test_run == False: | |
video.write_videofile(output_file, fps=fps, codec=codec, preset=preset, audio=False) | |
def segment_slideshow(image_files, image_durations, output_file, fps=60, codec="libx264", preset="medium", debug=False, test_run=False, overwrite="ask", segment_at=2000, keep_temp=True): | |
''' | |
Create slideshow in multiple video files concatenated, rather than concatenating all images directly. | |
Concatenating too many images at once leads to memory exhaustion. | |
Parameters passed to create_slideshow(): image_files (list), image_durations (int), output_file (str), fps (int), codec (str), preset (str), debug (bool), test_run (bool): If true, will skip rendering video | |
Parameters unique to segment_slideshow(): | |
segment_at (int): The number of images after which to segment the file. | |
Reduce this if the process is killed (due to using too much memory). | |
The separate videos will be concatenated anyways. | |
keep_temp (bool): After finishing, keep temporary folder (containing intermediate videos)? | |
''' | |
temp_vids = [] # List of all temporary videos created | |
number_of_videos_to_create = len(image_files)//segment_at + 1 | |
for i in range(number_of_videos_to_create): | |
if (number_of_videos_to_create > 1): | |
os.makedirs("temp", exist_ok=True) # Make new folder for storage of temporary files | |
current_video_name = "temp/" + str(i) + ".mkv" | |
else: # When the video doesn't need to be segmented | |
current_video_name = output_file | |
current_files = image_files[i*segment_at:(i+1)*segment_at] | |
current_durations = image_durations[i*segment_at:(i+1)*segment_at] | |
print("Creating video", str(i), "of", str(number_of_videos_to_create)) | |
create_slideshow(current_files, current_durations, current_video_name, fps, codec, preset, debug, test_run, overwrite) | |
current_video = VideoFileClip(current_video_name) | |
temp_vids.append(current_video) | |
if len(temp_vids) > 1: # If multiple video files have been made, join them together | |
video = concatenate_videoclips(temp_vids, method="chain") | |
video.write_videofile(output_file, fps=fps, codec=codec, preset=preset, audio=False) | |
def check_images(image_files, ext=".jpg", resize="ask", overwrite="ask"): | |
''' | |
Check all images are same resolution and if not, resize the images. | |
Parameters: | |
image_files (list): List of image files to process | |
resize (string): should the script resize incorrectly sized images (to the most common size)? | |
Options - "ask", "skip", "resize" | |
overwrite (str): When a file with the same name as the output is found, | |
Options - "skip", "overwrite", "ask" | |
''' | |
sizes = [] # Stores all unique image resolutions found | |
image_sizes = [] # 2D array to categorise images by resolution. ie image_sizes[0] contains all images with the dimensions in sizes[0] | |
for i, image in enumerate(image_files): | |
current_size = Image.open(image).size # Contains dimensions of currently opened image | |
if current_size not in sizes: | |
sizes.append(current_size) | |
image_sizes.append([]) | |
# Keep track of which images are which resolutions | |
for i, current_resolution in enumerate(sizes): | |
if current_size == current_resolution: | |
image_sizes[i].append(image) | |
# Find most common resolution | |
largest = 0 | |
for i, size in enumerate(image_sizes): | |
if len(image_sizes[i]) > largest: | |
largest = len(image_sizes[i]) | |
most_common = i # Store most common image resolution in order to not print it later | |
print("The most common resolution is", sizes[most_common]) | |
if len(sizes) > 1: | |
print("These images are not the correct size:") | |
for i, size in enumerate(sizes): | |
if size != sizes[most_common]: | |
print (size, image_sizes[i]) | |
while resize not in ("resize", "skip"): | |
resize = input("Do you want to resize the remaining images to " + str(sizes[most_common]) + "? 'resize' or 'skip'? ") | |
if resize in "resize": | |
os.makedirs("original_images", exist_ok=True) # Make folder | |
for i, size in enumerate(sizes): | |
for c, image in enumerate(image_sizes[i]): | |
if i != most_common: | |
print("Resizing", image) | |
# Move original image to different folder | |
current_image = "original_images/" + os.path.basename(image) | |
if not os.path.isfile(current_image) == True: | |
while overwrite not in ("overwrite", "skip"): | |
overwrite = input("There are images already in original_images. Do you want to overwrite over them? Enter 'overwrite' or 'skip' ") | |
if overwrite == "overwrite": | |
os.rename(image, current_image) | |
# Resize and save image in original folder | |
image_resized = Image.open(current_image).resize(sizes[most_common]) | |
image_resized.save(image) | |
def get_image_durations(slides_folder, file="GetPlayerOptions.json"): | |
''' | |
Get duration each image is displayed for from a GetPlayerOptions.json | |
Parameters: | |
slides_folder (str): The name of the folder containing the slides images | |
file (str): Give path to GetPlayerOptions.json | |
''' | |
f = open(file) # Opening JSON file | |
data = json.load(f) # returns JSON object as a dictionary | |
image_durations = [] | |
image_files = [] | |
# Iterating through the json list | |
for stream in data["d"]["Presentation"]["Streams"]: | |
if stream["HasSlideContent"] == True: | |
#Handle first part, if there is no slide yet | |
if stream["Slides"][0]["Time"] != 0: | |
image_durations.append((stream["Slides"][0]["Time"])/1000) | |
image_files.append(slides_folder + "/slide_0001.jpg") | |
for i in range (1, len(stream["Slides"])): | |
image_durations.append((stream["Slides"][i]["Time"] - stream["Slides"][i-1]["Time"])/1000) | |
image_files.append(slides_folder + "/slide_{:04d}.jpg".format(i)) | |
# Handle final part if there is no slide there | |
image_durations.append((data["d"]["Presentation"]["Duration"] - stream["Slides"][int(len(stream["Slides"])-1)]["Time"])/1000) | |
image_files.append(slides_folder + "/slide_{:04d}.jpg".format(len(stream["Slides"]))) | |
f.close() # Closing file | |
# Sanity check for making sure each image has a duration value | |
if len(image_files) == len(image_durations): | |
return image_files, image_durations | |
else: | |
raise Exception ("An error has occurred. Not all the videos have a corresponding duration value.") | |
if __name__ == "__main__": | |
image_files, image_durations = get_image_durations("slides", "GetPlayerOptions.json") | |
output_file = "slideshow.mkv" | |
check_images(image_files, resize = "resize") | |
segment_slideshow(image_files, image_durations, output_file, segment_at=2500, debug=True) |
My fork of yt-dlp here has implemented support for saving GetPlayerOptions.json. It also has several fixes for downloading transcripts, subtitles and supports more url formats and fixes some videos that would not download because they are behind logins. A lot of this work is taken directly from https://github.com/kclauhk/yt-dlp so I'd like to thank them too.
This shell script is what I use currently to streamline downloading and converting slides into a video. It may need changes in your environment, but the gist is that it's meant to run in a folder, whose parent has a python venv, a yt-dlp binary and both the script below and above. yt-dlp can be compiled using github actions if you fork my repo.
The way you call it is by running ../download.sh "urlhere"
and it should take care of (most) of the rest.
../yt-dlp_macos --cookies-from-browser firefox --write-auto-subs --sub-format vtt "$1"
../yt-dlp_macos --cookies-from-browser firefox --write-auto-subs --sub-format txt --skip-download "$1"
../yt-dlp_macos --cookies-from-browser firefox --write-auto-subs --sub-langs und --sub-format json -f mhtml --keep-fragments "$1"
rename 's/(?<=-Frag)(\d+)/sprintf("%04d.jpg",$1+1)/eg' *Frag*
rename 's/(.+Frag)/"slide_"/eg' *Frag*
mv *.json GetPlayerOptions.json
mkdir slides
mv *.jpg slides
../bin/python "../slideshow interpreter.py"
ffmpeg -i slideshow.mkv -i *.mp4 -i *.vtt -c:v copy -c:a copy -c:s mov_text -map 0:v:0 -map 1:a:0 -map 2:s:0 -metadata:s:s:0 language=eng output.mp4
Note you may want to use --cookies-from-browser chrome
instead since session cookies can be retrieved from chrome, allowing more videos to be downloaded.
Here is a bit of an explanation of how it works.
Core
get_image_durations
reads the GetPlayerOptions.json from Mediasite and will pair up each slide to the duration it is displayed.create_slideshow
uses moviepy to join the images up into a video, including the timing information fed into it fromget_image_durations
. Slideshow videos appear to be timed to 60fps so that is the default framerate.Additional - in the example, these are run immediately after
get_image_durations
.check_images
ensures all images have same dimensions. If not, it can also resize them using pillow. This is so thatcreate_slideshow
can use moviepy's chain method, as its compose method is very slow.segment_slideshow
is an intermediate that callscreate_slideshow
by itself. It allows converting the images into a video in steps, eg. first 1000 images → video → next 1000 → video ... all videos → full videoIssues
create_slideshow
- moviepy sometimes throws this weird errorIn file temp/10.mkv, 6220800 bytes wanted but 0 bytes read at frame index 30091 (out of a total 30091 frames), at time 501.52/501.52 sec. Using the last valid frame instead.
GetPlayerOptions.json
get_image_durations
is currently hardcoded to only handle one naming scheme -slide_{0:D4}.jpg
d.Presentation.Streams._number_.SlideImageFileNameTemplate
slide_video1_{0:D4}.jpg
but yt-dlp fragments would be more tricky.create_slideshow
is hardcoded to only use the chain method, maybe I could add an option to use the compose method but I haven't needed to yet.