Last active
June 25, 2020 20:25
-
-
Save peta/5451482 to your computer and use it in GitHub Desktop.
Die Jungs von ORF/FM4/APA haben offenbar den Publishingworkflow und die Integration der FM4oD Mitschnitte auf fm4.orf.at überarbeitet -- und somit mein altbewährtes Grabber-Bashskript unbrauchbar gemacht. Die neue Lösung ist etwas komplexer, weswegen ich das Skript auch mit Ruby anstatt Bash umgesetzt habe, aber funktioniert dennoch gewohnt zuve…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# encoding: UTF-8 | |
require 'uri' | |
require 'open-uri' | |
require 'json' | |
require 'nokogiri' | |
class PodcastGrabber | |
def initialize(url, output_dir, &block) | |
@url = url | |
@output_dir = output_dir | |
scrape_playlists() | |
build_choices() | |
block_given? ? yield(self) : self | |
end | |
def show_choices() | |
@choices.each do |choice| | |
if choice[4] === :show | |
# Show title | |
puts "-----", choice[1] + " ('#{choice[0]}' für alle)" | |
else | |
# Show podcast item | |
puts choice[0].to_s.rjust(4) + ') ' + choice[1] | |
end | |
end | |
end | |
def grab(item_code) | |
item_code = item_code.to_i | |
idx = @choices.index do |choice| | |
choice[0] == item_code | |
end | |
if idx.nil? | |
puts "Sorry, einen Mitschnitt mit diesem Code gibt es nicht" | |
sleep 2 | |
return | |
end | |
# Determine path of output directory and filename | |
dpath = @output_dir | |
idx.downto(0) do |choice_idx| | |
if @choices[choice_idx][4] === :show | |
# We found the show name | |
dpath += "/#{@choices[choice_idx][1]}" | |
`mkdir -p '#{dpath}'` unless Dir.exists? dpath | |
break | |
end | |
end | |
# Try to download episode | |
choice = @choices[idx] | |
if choice[4] == :episode | |
# Download a single episode | |
download choice, dpath | |
else | |
# Download all episodes of show | |
@choices[(idx+1)..-1].each do |choice| | |
break unless choice[4] === :episode | |
download choice, dpath | |
end | |
end | |
end | |
private | |
def download(episode, to_dir) | |
fname = episode[3] | |
fpath = "#{to_dir}/#{fname}" | |
if File.exists? fpath | |
puts "Ein Mitschnitt mit dem Dateinamen '#{fname}' existiert bereits. Überspringe Download ..." | |
else | |
puts "Lade #{episode[2]}" | |
puts `curl -A 'Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0' -o '#{fpath}' '#{episode[2]}'` | |
# ,Write to log file | |
log_msg = "[#{Time.now.strftime('%F')}] Downloaded #{episode[2]}" | |
`echo '#{log_msg}' >> '#{to_dir}/log'` | |
end | |
end | |
def build_choices() | |
# Group by show name | |
@shows = {} | |
@playlists.each do |p| | |
name = p['title'] | |
@shows[name] ||= [] | |
@shows[name] += p['streams'] | |
end | |
@choices = [] | |
item_counter = 0 | |
@shows.each do |name, episodes| | |
@choices << [(item_counter+=1), name, nil, nil, :show] | |
episodes.each do |episode| | |
title = (episode['title'].to_s.empty?) ? | |
Time.at(episode['start'] / 1000).strftime('%a, %F') : | |
episode['title'] | |
mp3_url = @loopstreamUrl + "&ua=flash&id=#{URI.encode(episode['loopStreamId'])}" | |
@choices << [(item_counter+=1).to_s, title, mp3_url, episode['loopStreamId'], :episode] | |
#puts episode | |
end | |
end | |
end | |
def scrape_playlists() | |
src = open(@url.to_s).read | |
# Find radimeta+loopstream base URLs | |
if src =~ /radimetaApi"\s*:\s*"(#{URI::regexp})"/ | |
@radimetaApi = Regexp.last_match[1] | |
else | |
raise StandardError.new 'Page did not contain JSON property "radimetaApi"' | |
end | |
if src =~ /loopStreamUrl"\s*:\s*"(#{URI::regexp})"/ | |
@loopstreamUrl = Regexp.last_match[1] | |
else | |
raise StandardError.new 'Page did not contain JSON property "loopStreamUrl"' | |
end | |
# Build playlists | |
@playlists = Nokogiri::HTML(src).css('.audioplaylist').map {|elem| | |
begin | |
JSON.parse open(@radimetaApi+elem['data-id']).read | |
rescue OpenURI::HTTPError => exc | |
puts "FEHLER: #{exc.message}" | |
end | |
}.compact | |
end | |
end | |
if $0 == __FILE__ | |
# BEGIN | |
begin | |
def show_usage() | |
puts <<HELP | |
========================================================================================= | |
FM4-Grabber 0.1 --- Ein Ruby-Script um FM4 Podcasts herunterzuladen (aka "FM4 On Demand") | |
========================================================================================= | |
Benutzung: ./fm4-grabber [url] [ausgabevz] | |
url HTTP URL zur FM4oD Übersichtsseite (OPTIONAL) | |
ausgabevz Verzeichnispfad zum Ausgabeordner in dem die Podcasts gespeichert werden. | |
Standardmäßig wird das aktuelle Verzeichnis verwendet. (OPTIONAL) | |
========================================================================================= | |
Peter Geil, mailto:[email protected] (c) 2012-2013 | |
========================================================================================= | |
HELP | |
end | |
(show_usage(); exit) if (ARGV & ['-h', '--help']).any? | |
# Determine URL of index page | |
url = URI.parse (ARGV[0] || `curl -sf 'https://gist.github.com/peta/6f095ab8f7b94b1397d3/raw/fm4od_url'`) | |
unless url.is_a? URI::HTTP | |
show_usage() | |
exit "\n\nPlease provide a valid URL" | |
end | |
# Determine output directory | |
dpath = File.absolute_path(ARGV[1] || File.expand_path('~/Music/Livesets/FM4')) | |
unless Dir.exists? dpath | |
show_usage() | |
puts "\n\nBitte geben Sie einen gültigen Verzeichnispfad an" | |
exit 1 | |
end | |
puts <<MSG | |
========================================================================================= | |
FM4-Grabber 0.1 --- Ein Ruby-Script um FM4 Podcasts herunterzuladen (aka "FM4 On Demand") | |
========================================================================================= | |
Verwende URL: #{url} | |
Ausgabeverzeichnis: #{dpath} | |
Suche Mitschnitte (dies kann einen Moment dauern) ... | |
========================================================================================= | |
# G E F U N D E N E P L A Y L I S T S # | |
========================================================================================= | |
MSG | |
STDOUT.flush() | |
PodcastGrabber.new(url, dpath) do |grabber| | |
begin | |
grabber.show_choices | |
puts <<MSG | |
========================================================================================= | |
Welchen Mitschnitt darf ich herunterladen? | |
MSG | |
print " ('keinen' eingeben um zu beenden) >>> " | |
choice = gets.chomp | |
aborted = (choice == 'keinen') | |
grabber.grab(choice) unless aborted | |
end while not aborted | |
puts 'Beenden ...' | |
end | |
rescue Interrupt | |
puts "\n\nBeenden ... (Interrupted)" | |
end | |
# END | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment