Snarp · June 14, 2020 14:43
diff --git a/extract_youtube_closed_captions.rb b/extract_youtube_closed_captions.rb
 require 'json'

 # Extracts all UTF-8 strings from YouTube's closed caption (`timedtext`) JSON 
 # object to generate a rough transcript.
 # 
 # Instructions for acquiring the JSON: 
 # 
 # 1. Open the video in a new Firefox tab. Don't click "CC" yet.
 # 
 # 2. Open Firefox Developer Tools (right-click somewhere and select "Inspect 
 #    Element") and open the "Network" tab. (Shortcut: Ctrl+Shift+E)
 # 
 # 4. Click "CC" to turn on closed captions.
 # 
 # 5. Right click on the "?timedtext(somethingsometing)" object that appears 
 #    in the list.
 # 
 # 6. Select "Copy response".
 # 
 # 7. Paste the JSON into your text editor / console / whatever.
 # 
 def extract_closed_captions(json_string, output_fname="video_transcript.txt")
  json  = JSON::parse(json_string, symbolize_names: true)
  words = Array.new
  json[:events].each do |event|
    next unless event[:segs].is_a?(Array)
    event[:segs].each do |segment|
      next unless segment.is_a?(Hash)
      words.push(segment[:utf8])
    end
  end
  transcript = words.join
  File.write(output_fname, transcript) if output_fname
  return transcript
 end
	require 'json'

	# Extracts all UTF-8 strings from YouTube's closed caption (`timedtext`) JSON
	# object to generate a rough transcript.
	#
	# Instructions for acquiring the JSON:
	#
	# 1. Open the video in a new Firefox tab. Don't click "CC" yet.
	#
	# 2. Open Firefox Developer Tools (right-click somewhere and select "Inspect
	# Element") and open the "Network" tab. (Shortcut: Ctrl+Shift+E)
	#
	# 4. Click "CC" to turn on closed captions.
	#
	# 5. Right click on the "?timedtext(somethingsometing)" object that appears
	# in the list.
	#
	# 6. Select "Copy response".
	#
	# 7. Paste the JSON into your text editor / console / whatever.
	#
	def extract_closed_captions(json_string, output_fname="video_transcript.txt")
	json = JSON::parse(json_string, symbolize_names: true)
	words = Array.new
	json[:events].each do \|event\|
	next unless event[:segs].is_a?(Array)
	event[:segs].each do \|segment\|
	next unless segment.is_a?(Hash)
	words.push(segment[:utf8])
	end
	end
	transcript = words.join
	File.write(output_fname, transcript) if output_fname
	return transcript
	end