jooray · January 20, 2025 12:39 · jooray · Jan 20, 2025
diff --git a/cut_video.py b/cut_video.py
 #!/usr/bin/env python3
 """
 cut_video.py

 Usage:
    python cut_video.py [--include|--exclude] input_video.mp4 output_video.mp4 < timestamps.txt

 The timestamps file (passed via stdin) contains lines like:
    00:00:30.900;00:00:54.360
    00:02:28.080;00:02:29.340
    ...

 - If --include is used: only these segments remain in the output.
 - If --exclude is used: these segments are removed from the output.

 Requires ffmpeg and ffprobe to be installed.
 """

 import sys
 import os
 import subprocess
 import argparse
 import tempfile

 def get_video_duration(video_path):
    """
    Use ffprobe to get the duration (in seconds) of the video.
    """
    # Run ffprobe to get duration in seconds
    cmd = [
        "ffprobe",
        "-v", "error",
        "-show_entries", "format=duration",
        "-of", "csv=p=0",
        video_path
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        print("Error running ffprobe to get duration:", result.stderr, file=sys.stderr)
        sys.exit(1)

    duration_str = result.stdout.strip()
    try:
        duration = float(duration_str)
    except ValueError:
        print("Could not parse video duration.", file=sys.stderr)
        sys.exit(1)

    return duration

 def time_str_to_seconds(timestr):
    """
    Convert a timestamp string (HH:MM:SS.xxx) to total seconds as float.
    """
    # Example: "00:03:13.860" -> 193.86
    h, m, s = timestr.split(':')
    return float(h)*3600 + float(m)*60 + float(s)

 def seconds_to_time_str(secs):
    """
    Convert seconds (float) back to a ffmpeg-friendly string HH:MM:SS.xxx
    """
    # We handle fractional part for milliseconds
    hours = int(secs // 3600)
    secs_remaining = secs % 3600
    minutes = int(secs_remaining // 60)
    seconds = secs_remaining % 60
    return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"

 def parse_intervals_from_stdin():
    """
    Read lines from stdin of the form "start;end",
    returning a list of (start_seconds, end_seconds).
    """
    intervals = []
    for line in sys.stdin:
        line = line.strip()
        if not line:
            continue
        start_str, end_str = line.split(';')
        start_sec = time_str_to_seconds(start_str)
        end_sec = time_str_to_seconds(end_str)
        intervals.append((start_sec, end_sec))
    return intervals

 def merge_segments(segments):
    """
    Given a list of (start, end) segments (in seconds), merge any that overlap or touch.
    Assumes segments are already sorted by start time.
    Returns a new list of merged segments.
    """
    if not segments:
        return []
    merged = []
    current_start, current_end = segments[0]

    for i in range(1, len(segments)):
        s, e = segments[i]
        if s <= current_end:
            # Overlapping or touching, extend current_end
            current_end = max(current_end, e)
        else:
            # No overlap, push the current segment and reset
            merged.append((current_start, current_end))
            current_start, current_end = s, e

    # Add final segment
    merged.append((current_start, current_end))
    return merged

 def invert_segments(include_segments, total_duration):
    """
    Given a set of segments to exclude and the total duration,
    compute the segments to keep. In other words, invert them.

    If we have intervals like:
        [0, s1), (s1, e1), (e1, s2), (s2, e2), ..., (en, total_duration)
    We exclude the intervals (s1, e1), (s2, e2), ...
    We keep everything else.

    Returns a list of (start, end) of segments we want to keep (the inverse).
    """
    # Sort and merge the exclude segments first
    sorted_merged = merge_segments(sorted(include_segments, key=lambda x: x[0]))
    inverted = []
    current_start = 0.0

    for (s, e) in sorted_merged:
        # If there's a gap between current_start and s, that's a "keep" segment
        if s > current_start:
            inverted.append((current_start, s))
        # Move current_start to e
        current_start = e

    # Finally, if current_start is less than total_duration, keep the rest
    if current_start < total_duration:
        inverted.append((current_start, total_duration))

    return inverted

 def main():
    parser = argparse.ArgumentParser(description="Cut video by timestamps.")
    parser.add_argument("input_video", help="Path to input video")
    parser.add_argument("output_video", help="Path to output video")
    parser.add_argument("--include", action="store_true",
                        help="Keep only the specified segments (default if neither include nor exclude is given).")
    parser.add_argument("--exclude", action="store_true",
                        help="Remove the specified segments from the video.")
    args = parser.parse_args()

    # If neither --include nor --exclude is specified, assume --include
    if not args.include and not args.exclude:
        args.include = True

    intervals = parse_intervals_from_stdin()
    if not intervals:
        print("No intervals provided.", file=sys.stderr)
        sys.exit(1)

    # If --exclude, we need to invert intervals
    if args.exclude:
        total_duration = get_video_duration(args.input_video)
        intervals = invert_segments(intervals, total_duration)
        if not intervals:
            print("No video remains after excluding these intervals!", file=sys.stderr)
            sys.exit(1)

    # Sort intervals by start time and merge if needed
    intervals = merge_segments(sorted(intervals, key=lambda x: x[0]))

    # Create temporary directory for segments
    with tempfile.TemporaryDirectory() as tmpdir:
        segment_files = []
        for idx, (start_sec, end_sec) in enumerate(intervals):
            seg_path = os.path.join(tmpdir, f"segment_{idx:04d}.mp4")
            segment_files.append(seg_path)

            start_str = seconds_to_time_str(start_sec)
            end_str   = seconds_to_time_str(end_sec)

            # Use ffmpeg to cut the segment
            cmd = [
                "ffmpeg", "-y",
                "-i", args.input_video,
                "-ss", start_str,
                "-to", end_str,
                "-c:v", "libx264", "-crf", "18", "-preset", "veryfast",
                "-c:a", "aac",
                seg_path
           ]

            print(f"Extracting segment {idx}: {start_str} -> {end_str}")
            result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            if result.returncode != 0:
                print(f"Failed to extract segment {idx}: {result.stderr.decode()}", file=sys.stderr)
                sys.exit(1)

        # Create concat file
        concat_list_path = os.path.join(tmpdir, "concat_list.txt")
        with open(concat_list_path, "w") as f:
            for seg_file in segment_files:
                # The -safe 0 requires the path to be quoted
                # The concat demuxer expects lines: file '/path/to/segment'
                f.write(f"file '{seg_file}'\n")

        # Concatenate all segments
        cmd_concat = [
            "ffmpeg", "-y",
            "-f", "concat",
            "-safe", "0",
            "-i", concat_list_path,
            "-c", "copy",
            args.output_video
        ]
        print("Concatenating segments into final output.")
        result_concat = subprocess.run(cmd_concat, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        if result_concat.returncode != 0:
            print(f"Failed to concatenate segments: {result_concat.stderr.decode()}", file=sys.stderr)
            sys.exit(1)

    print(f"Done. Output saved to {args.output_video}")

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	cut_video.py

	Usage:
	python cut_video.py [--include\|--exclude] input_video.mp4 output_video.mp4 < timestamps.txt

	The timestamps file (passed via stdin) contains lines like:
	00:00:30.900;00:00:54.360
	00:02:28.080;00:02:29.340
	...

	- If --include is used: only these segments remain in the output.
	- If --exclude is used: these segments are removed from the output.

	Requires ffmpeg and ffprobe to be installed.
	"""

	import sys
	import os
	import subprocess
	import argparse
	import tempfile

	def get_video_duration(video_path):
	"""
	Use ffprobe to get the duration (in seconds) of the video.
	"""
	# Run ffprobe to get duration in seconds
	cmd = [
	"ffprobe",
	"-v", "error",
	"-show_entries", "format=duration",
	"-of", "csv=p=0",
	video_path
	]
	result = subprocess.run(cmd, capture_output=True, text=True)
	if result.returncode != 0:
	print("Error running ffprobe to get duration:", result.stderr, file=sys.stderr)
	sys.exit(1)

	duration_str = result.stdout.strip()
	try:
	duration = float(duration_str)
	except ValueError:
	print("Could not parse video duration.", file=sys.stderr)
	sys.exit(1)

	return duration

	def time_str_to_seconds(timestr):
	"""
	Convert a timestamp string (HH:MM:SS.xxx) to total seconds as float.
	"""
	# Example: "00:03:13.860" -> 193.86
	h, m, s = timestr.split(':')
	return float(h)3600 + float(m)60 + float(s)

	def seconds_to_time_str(secs):
	"""
	Convert seconds (float) back to a ffmpeg-friendly string HH:MM:SS.xxx
	"""
	# We handle fractional part for milliseconds
	hours = int(secs // 3600)
	secs_remaining = secs % 3600
	minutes = int(secs_remaining // 60)
	seconds = secs_remaining % 60
	return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"

	def parse_intervals_from_stdin():
	"""
	Read lines from stdin of the form "start;end",
	returning a list of (start_seconds, end_seconds).
	"""
	intervals = []
	for line in sys.stdin:
	line = line.strip()
	if not line:
	continue
	start_str, end_str = line.split(';')
	start_sec = time_str_to_seconds(start_str)
	end_sec = time_str_to_seconds(end_str)
	intervals.append((start_sec, end_sec))
	return intervals

	def merge_segments(segments):
	"""
	Given a list of (start, end) segments (in seconds), merge any that overlap or touch.
	Assumes segments are already sorted by start time.
	Returns a new list of merged segments.
	"""
	if not segments:
	return []
	merged = []
	current_start, current_end = segments[0]

	for i in range(1, len(segments)):
	s, e = segments[i]
	if s <= current_end:
	# Overlapping or touching, extend current_end
	current_end = max(current_end, e)
	else:
	# No overlap, push the current segment and reset
	merged.append((current_start, current_end))
	current_start, current_end = s, e

	# Add final segment
	merged.append((current_start, current_end))
	return merged

	def invert_segments(include_segments, total_duration):
	"""
	Given a set of segments to exclude and the total duration,
	compute the segments to keep. In other words, invert them.

	If we have intervals like:
	[0, s1), (s1, e1), (e1, s2), (s2, e2), ..., (en, total_duration)
	We exclude the intervals (s1, e1), (s2, e2), ...
	We keep everything else.

	Returns a list of (start, end) of segments we want to keep (the inverse).
	"""
	# Sort and merge the exclude segments first
	sorted_merged = merge_segments(sorted(include_segments, key=lambda x: x[0]))
	inverted = []
	current_start = 0.0

	for (s, e) in sorted_merged:
	# If there's a gap between current_start and s, that's a "keep" segment
	if s > current_start:
	inverted.append((current_start, s))
	# Move current_start to e
	current_start = e

	# Finally, if current_start is less than total_duration, keep the rest
	if current_start < total_duration:
	inverted.append((current_start, total_duration))

	return inverted

	def main():
	parser = argparse.ArgumentParser(description="Cut video by timestamps.")
	parser.add_argument("input_video", help="Path to input video")
	parser.add_argument("output_video", help="Path to output video")
	parser.add_argument("--include", action="store_true",
	help="Keep only the specified segments (default if neither include nor exclude is given).")
	parser.add_argument("--exclude", action="store_true",
	help="Remove the specified segments from the video.")
	args = parser.parse_args()

	# If neither --include nor --exclude is specified, assume --include
	if not args.include and not args.exclude:
	args.include = True

	intervals = parse_intervals_from_stdin()
	if not intervals:
	print("No intervals provided.", file=sys.stderr)
	sys.exit(1)

	# If --exclude, we need to invert intervals
	if args.exclude:
	total_duration = get_video_duration(args.input_video)
	intervals = invert_segments(intervals, total_duration)
	if not intervals:
	print("No video remains after excluding these intervals!", file=sys.stderr)
	sys.exit(1)

	# Sort intervals by start time and merge if needed
	intervals = merge_segments(sorted(intervals, key=lambda x: x[0]))

	# Create temporary directory for segments
	with tempfile.TemporaryDirectory() as tmpdir:
	segment_files = []
	for idx, (start_sec, end_sec) in enumerate(intervals):
	seg_path = os.path.join(tmpdir, f"segment_{idx:04d}.mp4")
	segment_files.append(seg_path)

	start_str = seconds_to_time_str(start_sec)
	end_str = seconds_to_time_str(end_sec)

	# Use ffmpeg to cut the segment
	cmd = [
	"ffmpeg", "-y",
	"-i", args.input_video,
	"-ss", start_str,
	"-to", end_str,
	"-c:v", "libx264", "-crf", "18", "-preset", "veryfast",
	"-c:a", "aac",
	seg_path
	]

	print(f"Extracting segment {idx}: {start_str} -> {end_str}")
	result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	if result.returncode != 0:
	print(f"Failed to extract segment {idx}: {result.stderr.decode()}", file=sys.stderr)
	sys.exit(1)

	# Create concat file
	concat_list_path = os.path.join(tmpdir, "concat_list.txt")
	with open(concat_list_path, "w") as f:
	for seg_file in segment_files:
	# The -safe 0 requires the path to be quoted
	# The concat demuxer expects lines: file '/path/to/segment'
	f.write(f"file '{seg_file}'\n")

	# Concatenate all segments
	cmd_concat = [
	"ffmpeg", "-y",
	"-f", "concat",
	"-safe", "0",
	"-i", concat_list_path,
	"-c", "copy",
	args.output_video
	]
	print("Concatenating segments into final output.")
	result_concat = subprocess.run(cmd_concat, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	if result_concat.returncode != 0:
	print(f"Failed to concatenate segments: {result_concat.stderr.decode()}", file=sys.stderr)
	sys.exit(1)

	print(f"Done. Output saved to {args.output_video}")

	if __name__ == "__main__":
	main()