Last active
June 28, 2020 11:19
-
-
Save puttin/090ba3ab84757d753c28595d348810a1 to your computer and use it in GitHub Desktop.
fetch WWDC HD videos' download links
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source 'https://rubygems.org' | |
gem 'httparty' | |
gem 'nokogiri' | |
gem 'pry' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'HTTParty' | |
require 'Nokogiri' | |
require 'Pry' | |
class Session | |
attr_accessor :id | |
attr_accessor :title | |
attr_accessor :link | |
attr_accessor :hd_video_link | |
attr_accessor :sd_video_link | |
attr_accessor :pdf_link | |
end | |
class WWDC | |
include HTTParty | |
base_uri 'https://developer.apple.com/' | |
attr_accessor :path | |
def initialize(path) | |
@path = path | |
end | |
def get_response(link) | |
max_retries = 3 | |
times_retried = 0 | |
http_response = nil | |
get_response = lambda { | |
begin | |
http_response = self.class.get(link) | |
rescue | |
if times_retried < max_retries | |
times_retried += 1 | |
puts "Failed to get #{link} #{times_retried}" | |
get_response.call | |
else | |
raise | |
end | |
end | |
} | |
get_response.call | |
http_response | |
end | |
attr_accessor :sessions | |
def fetch | |
page = get_response(@path) | |
page_body = page.body | |
parse_page = Nokogiri::HTML(page_body) | |
sessions_part = parse_page.search("main/li/section", ".collection-item .gutter") | |
sessions = [] | |
sessions_part.each { |session| | |
link = session.search("a").first | |
link_path = link['href'] | |
session_title = link.search("h4").first.text | |
session_id = File.basename(link_path) | |
session = Session.new | |
session.title = session_title | |
session.link = link_path | |
session.id = session_id | |
http_response = get_response(link_path) | |
response_body = http_response.body | |
session_page = Nokogiri::HTML(response_body) | |
hd_link = session_page.at('a:contains("HD Video")') | |
session.hd_video_link = hd_link['href'] unless hd_link.nil? | |
sd_link = session_page.at('a:contains("SD Video")') | |
session.sd_video_link = sd_link['href'] unless sd_link.nil? | |
pdf_link = session_page.at('a:contains("Presentation Slides (PDF)")') | |
session.pdf_link = pdf_link['href'] unless pdf_link.nil? | |
sessions << session | |
} | |
@sessions = sessions | |
end | |
end | |
def all_hd_links(year) | |
links = [] | |
year.sessions.each { |session| | |
links << session.hd_video_link unless session.hd_video_link.nil? | |
} | |
links | |
end | |
def non_exist_links(inputs, links) | |
require 'set' | |
non_exist_set = Set.new | |
exist_link_set = Set.new | |
inputs.each { |input| | |
unless File.exist?(input) | |
STDERR.puts "#{input} is not file or folder" | |
next | |
end | |
if File.directory?(input) | |
folder = input | |
links.each { |link| | |
uri = URI(link) | |
filename = File.basename(uri.path) | |
complete_path = File.join(folder, filename) | |
if exist_link_set.include?(link) | |
next | |
end | |
if File.file?(complete_path) | |
exist_link_set << link | |
non_exist_set.delete(link) | |
else | |
non_exist_set << link | |
end | |
} | |
end | |
if File.file?(input) | |
file_list = File.readlines(input).map { |line| File.basename(line.strip) } | |
links.each { |link| | |
uri = URI(link) | |
filename = File.basename(uri.path) | |
if exist_link_set.include?(link) | |
next | |
end | |
if file_list.include?(filename) | |
exist_link_set << link | |
non_exist_set.delete(link) | |
else | |
non_exist_set << link | |
end | |
} | |
end | |
} | |
STDERR.puts "non_exist:#{non_exist_set.count} exist:#{exist_link_set.count} links:#{links.count}" unless non_exist_set.count + exist_link_set.count == links.count | |
non_exist_set.to_a | |
end | |
YEAR2020 = WWDC.new("/videos/wwdc2020") | |
YEAR2020.fetch | |
hd_links = all_hd_links(YEAR2020) | |
if ARGV.count == 0 | |
puts hd_links.sort | |
else | |
puts non_exist_links(ARGV, hd_links).sort | |
end | |
# Pry.start(binding) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment