cjavdev · January 3, 2016 04:59
diff --git a/.gitignore b/.gitignore
 pages/*
 episode_pages/*
 videos/*
 cookies.txt
diff --git a/railscasts_download.rb b/railscasts_download.rb
 #!/usr/bin/env ruby

 require "nokogiri"
 require "open-uri"
 require "thread/pool"

 $pool = Thread.pool(24)

 def download(url, output_file)
  unless File.exists?(output_file)
    $pool.process {  system("wget -c #{url} --load-cookies=cookies.txt -O #{output_file}") }
  end
 end

 def download_pages(max = 48)
  (1..max).each do |i|
    download "http://railscasts.com/?page=#{i}", "pages/page#{i}.html"
  end
 end

 def extract_episode_links
  links = []
  Dir["pages/*.html"].each do |page_html|
    doc = Nokogiri::HTML(open(page_html))
    doc.css("div.episode h2 a").each do |link|
      links << "http://railscasts.com#{link['href']}".tap { |l| puts l }
    end
  end
  links
 end

 def extract_episode_name(episode_link)
  episode_link.match(/.*\/(.+)/)[1]
 end

 def download_episode_pages(episode_link)
  download episode_link, "episode_pages/#{extract_episode_name(episode_link)}.html"
 end

 def extract_mp4_links
  links = []
  Dir["episode_pages/*.html"].each do |page_html|
    # puts page_html
    doc = Nokogiri::HTML(open(page_html))
    doc.css("a").each do |link|
      links << link['href'].tap { |link| puts link } if link['href'] =~ /mp4$/
    end
  end
  links
 end

 trap(:INT) {
  puts "INT signal caught, now exiting"
  exit
 }

 # step 1: mkdir pages then ->
 #download_pages(48)

 # step 2: mkdir episode_pages then ->
 #extract_episode_links.each { |link| download_episode_pages(link) }

 # step 3:
 extract_mp4_links.each { |link| download link, "videos/#{extract_episode_name(link)}" }

 # puts extract_episode_name("http://railscasts.com/episodes/90-fragment-caching-revised")
 $pool.shutdown
	#!/usr/bin/env ruby

	require "nokogiri"
	require "open-uri"
	require "thread/pool"

	$pool = Thread.pool(24)

	def download(url, output_file)
	unless File.exists?(output_file)
	$pool.process { system("wget -c #{url} --load-cookies=cookies.txt -O #{output_file}") }
	end
	end

	def download_pages(max = 48)
	(1..max).each do \|i\|
	download "http://railscasts.com/?page=#{i}", "pages/page#{i}.html"
	end
	end

	def extract_episode_links
	links = []
	Dir["pages/*.html"].each do \|page_html\|
	doc = Nokogiri::HTML(open(page_html))
	doc.css("div.episode h2 a").each do \|link\|
	links << "http://railscasts.com#{link['href']}".tap { \|l\| puts l }
	end
	end
	links
	end

	def extract_episode_name(episode_link)
	episode_link.match(/.*\/(.+)/)[1]
	end

	def download_episode_pages(episode_link)
	download episode_link, "episode_pages/#{extract_episode_name(episode_link)}.html"
	end

	def extract_mp4_links
	links = []
	Dir["episode_pages/*.html"].each do \|page_html\|
	# puts page_html
	doc = Nokogiri::HTML(open(page_html))
	doc.css("a").each do \|link\|
	links << link['href'].tap { \|link\| puts link } if link['href'] =~ /mp4$/
	end
	end
	links
	end

	trap(:INT) {
	puts "INT signal caught, now exiting"
	exit
	}

	# step 1: mkdir pages then ->
	#download_pages(48)

	# step 2: mkdir episode_pages then ->
	#extract_episode_links.each { \|link\| download_episode_pages(link) }

	# step 3:
	extract_mp4_links.each { \|link\| download link, "videos/#{extract_episode_name(link)}" }

	# puts extract_episode_name("http://railscasts.com/episodes/90-fragment-caching-revised")
	$pool.shutdown