Skip to content

Instantly share code, notes, and snippets.

@flanter21
Last active May 28, 2025 15:33
Show Gist options
  • Save flanter21/6fd9d7e16b2d412c40b15504e16a6d88 to your computer and use it in GitHub Desktop.
Save flanter21/6fd9d7e16b2d412c40b15504e16a6d88 to your computer and use it in GitHub Desktop.
Convert mediasite slides into video
import argparse
import json
import os
from PIL import Image
def calculate_duration(self, start_frame: int, frames_later: int, timebase: int) -> int:
return round((self[start_frame + frames_later] - self[start_frame])/1000 * timebase)
def create_slideshow(images: dict[str,int], video_duration: int, image_folder: str = "slides", end_time: int = -1,
start_time: int = 0, fps: int = 60, max_interval: int = 0, frameskip: int = 0) -> str:
"""
Creates a concat.txt for a slideshow video from a list of images.
Parameters:
images (dict): For each image: name, timestamp at which it is displayed (in milliseconds)
image_folder (str): Name of folder containing images
fps (int): Frames per second of the output video
Mediasite stores time in milliseconds, so multiple of 10 are good approximates, however most videos appear to have originally been 60fps
overwrite (bool): Overwrite file with an existing name?
max_interval (float): Will combine the durations of frames under the first frame
"""
output = ''
time = 0 # Stores time in milliseconds
image_timestamps = list(images.values()) + [video_duration]
if end_time <= 0:
end_time = video_duration/1000
current_length = 0 # For max_interval
image_durations = {}
for i, current_image in enumerate(images):
time += calculate_duration(image_timestamps, i, 1, fps)
# Handle first frame if different start time given
if time/fps >= start_time and len(image_durations) == 0:
image_durations[current_image] = time - start_time * fps
continue
# For final slide
if time/fps >= end_time and not max_interval:
image_durations[current_image] = round(end_time - image_timestamps[i]/1000) * fps
break
elif time/fps >= end_time and max_interval:
image_durations[-1] = round(end_time - images[len(image_durations - 1)]/1000) * fps
# Both together is not supported
if max_interval and frameskip:
raise KeyError
# Max interval mode
elif max_interval and time/fps >= start_time and time/fps < end_time:
current_image_duration = calculate_duration(image_timestamps, i, 1, fps)
current_length += current_image_duration
if current_length >= max_interval * fps:
image_durations[-1] = current_length
current_length = 0
continue
# Frameskip mode
elif frameskip:
if i % frameskip != 0:
continue
# Normal mode
elif not (frameskip or max_interval) and len(image_durations) > 0:
image_durations[current_image] = calculate_duration(image_timestamps, i, 1, fps)
# Loop through images and create concat txt for ffmpeg
for current_image in image_durations:
output = f"{output}file '{os.path.join(image_folder, current_image)}'\nduration {image_durations[current_image]}\n"
# For final slide
output = f"{output}file '{os.path.join(image_folder, current_image)}'\n"
# Output expected length of video
time = end_time - start_time
h = time // 3600
m = time % 3600 // 60
s = time % 60 // 1
f = (time % 1) * fps
print ("Total duration: " + str(h) + " hours " + str(m) + " minutes " + str(s) + " seconds " + str(f) + " frames")
return output
def check_images(image_files: list[str], image_folder: str = 'slides', old_image_folder: str = 'original_images', overwrite: bool = False):
'''
Check all images are same resolution and if not, resize the images.
Parameters:
image_files (list): List of image files to process
image_folder (str): Where the images are located
overwrite (bool): Overwrite file with an existing name?
'''
# Classify images by size
images = {}
for image in image_files:
current_size = Image.open(f'{image_folder}/{image}').size # Contains dimensions of currently opened image
if current_size not in images.keys():
images[current_size] = []
images[current_size].append(image)
# Find most common resolution
most_common_size = max(images.keys())
print("The most common resolution is", most_common_size)
# Remove most common resolution
images.pop(most_common_size)
if len(images):
print("These images are not the correct size:")
for k, v in images.items():
print (k, v)
for images_of_current_resolution in images.values():
for current_image in images_of_current_resolution:
current_image_path = os.path.join(image_folder, current_image)
moved_image_path = os.path.join(image_folder, old_image_folder, os.path.basename(current_image_path))
if decide_overwrite(moved_image_path, overwrite):
image_resized = Image.open(current_image_path).resize(most_common_size)
os.makedirs(os.path.join(image_folder, old_image_folder), exist_ok=True)
os.rename(current_image_path, moved_image_path)
print("Moved", current_image, "to", moved_image_path)
image_resized.save(current_image_path)
print("Resized", image)
def get_image_timestamps(file_name: str = "GetPlayerOptions.json") -> tuple[dict[str, int], int]:
'''
Get time at which each image is displayed from a GetPlayerOptions.json
Parameters:
file (str): Give path to GetPlayerOptions.json
Returns:
images (dict): For each image - name, timestamp at which it is no longer displayed
duration (int): Duration in ms (unless mediasite changes their format)
'''
with open(file_name) as file:
presentation_data = json.load(file)['d']['Presentation'] # returns JSON object as a dictionary
duration = presentation_data['Duration']
images = {}
# Iterating through the json list
for stream in presentation_data['Streams']:
if stream['HasSlideContent']:
for i, current in enumerate (stream['Slides']):
current_image = 'slide_{:04d}.jpg'.format(i+1)
images[current_image] = current['Time']
# Handle first part, if there is no slide yet
if stream['Slides'][0]['Time'] != 0:
images['slide_0001.jpg'] = 0
return images, duration
def decide_overwrite(file_name: str, overwrite: False):
while os.path.isfile(file_name):
overwrite = input(f"{file_name} already exists. Do you want to overwrite over it? Enter 'overwrite' or 'skip' ")
if overwrite == 'overwrite':
return True
elif overwrite == 'skip':
return False
return True
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-o', '--output', default="concat.txt")
parser.add_argument('-ss', '--start', type=int, default=0, help='Desired start time for video')
parser.add_argument('-t', '--time', type=int, default=-1, help='Desired end time for video')
parser.add_argument('-f', '--overwrite', action='store_true', default=False, help='Specify if you want it to overwrite existing files without asking.')
args = parser.parse_args()
image_timestamps, video_duration = get_image_timestamps()
image_files = list(image_timestamps.keys())
check_images(image_files)
if decide_overwrite(args.output, args.overwrite):
with open (args.output, "w+", encoding="utf-8") as file:
file.write(create_slideshow(image_timestamps, video_duration, "slides", args.time, args.start))
@flanter21
Copy link
Author

flanter21 commented Dec 25, 2024

Here is a bit of an explanation of how it works.

Core

  • get_image_durations reads the GetPlayerOptions.json from Mediasite and will pair up each slide to the duration it is displayed.
  • create_slideshow uses moviepy to join the images up into a video, including the timing information fed into it from get_image_durations. Slideshow videos appear to be timed to 60fps so that is the default framerate.

Additional - in the example, these are run immediately after get_image_durations.

  • check_images ensures all images have same dimensions. If not, it can also resize them using pillow. This is so that create_slideshow can use moviepy's chain method, as its compose method is very slow.
  • segment_slideshow is an intermediate that calls create_slideshow by itself. It allows converting the images into a video in steps, eg. first 1000 images → video → next 1000 → video ... all videos → full video
    • It solves a problem where python is killed due to using too much memory if you try concatenating too many images at once.

Issues

  • Currently the main issue with this is that moviepy requires pillow 10.4 and yt-dlp requires pillow 11.0 so the script should be run in a venv with moviepy.
  • The output video duration is slightly shorter than the audio (we're talking less than 0.1 seconds)
  • create_slideshow - moviepy sometimes throws this weird error
    • In file temp/10.mkv, 6220800 bytes wanted but 0 bytes read at frame index 30091 (out of a total 30091 frames), at time 501.52/501.52 sec. Using the last valid frame instead.
    • This results in a file that is one frame (per occurrence) longer than compared to concatenating them with ffmpeg.
  • yt-dlp doesn't currently provide a method to get hold of GetPlayerOptions.json
  • get_image_durations is currently hardcoded to only handle one naming scheme - slide_{0:D4}.jpg
    • Naming schemes are found in GetPlayerOptions → d.Presentation.Streams._number_.SlideImageFileNameTemplate
    • I could make minor changes to support slide_video1_{0:D4}.jpg but yt-dlp fragments would be more tricky.
  • create_slideshow is hardcoded to only use the chain method, maybe I could add an option to use the compose method but I haven't needed to yet.

@flanter21
Copy link
Author

My fork of yt-dlp here has implemented support for saving GetPlayerOptions.json. It also has several fixes for downloading transcripts, subtitles and supports more url formats and fixes some videos that would not download because they are behind logins. A lot of this work is taken directly from https://github.com/kclauhk/yt-dlp so I'd like to thank them too.

This shell script is what I use currently to streamline downloading and converting slides into a video. It may need changes in your environment, but the gist is that it's meant to run in a folder, whose parent has a python venv, a yt-dlp binary and both the script below and above. yt-dlp can be compiled using github actions if you fork my repo.

The way you call it is by running ../download.sh "urlhere" and it should take care of (most) of the rest.

../yt-dlp_macos --cookies-from-browser firefox --write-auto-subs --sub-format vtt "$1"
../yt-dlp_macos --cookies-from-browser firefox --write-auto-subs --sub-format txt --skip-download "$1"
../yt-dlp_macos --cookies-from-browser firefox --write-auto-subs --sub-langs und --sub-format json -f mhtml --keep-fragments "$1"
rename 's/(?<=-Frag)(\d+)/sprintf("%04d.jpg",$1+1)/eg' *Frag*
rename 's/(.+Frag)/"slide_"/eg' *Frag*
mv *.json GetPlayerOptions.json 
mkdir slides
mv *.jpg slides
../bin/python "../slideshow interpreter.py"
ffmpeg -i slideshow.mkv -i *.mp4 -i *.vtt -c:v copy -c:a copy -c:s mov_text -map 0:v:0 -map 1:a:0 -map 2:s:0 -metadata:s:s:0 language=eng output.mp4

@flanter21
Copy link
Author

Note you may want to use --cookies-from-browser chrome instead since session cookies can be retrieved from chrome, allowing more videos to be downloaded.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment