henrik · December 19, 2015 23:35 · henrik · Dec 9, 2015
diff --git a/dnloader.rb b/dnloader.rb
 # Ruby script for DN subscribers to download the latest DN as a PDF – because it beats their iPad app.
 # By Henrik Nyh 2015-12-08 under the MIT license.
 #
 # INSTRUCTIONS (for advanced users)
 #
 # Fetch the script dependencies:
 #
 #     (sudo) gem install mechanize
 #
 # Put your username and password in a ~/.dnloader file separarated by a ":", e.g.:
 #
 #     echo "[email protected]:myp4ssw0rd" > ~/.dnloader
 #
 # Optionally, customize where the files end up, here:

 DEST_DIR = File.expand_path("~/Dropbox/DN")

 # Optionally, customize how many issues to keep:

 MAX_ISSUES = 14

 # Now make sure the script works:
 #
 #     ruby dnloader.rb
 #
 # You can add a cron job to run this script every few minutes. (If the file is already downloaded, nothing happens.)
 # E.g. to run it every 5 minutes:
 #
 #     */5 * * * * ruby /Users/foo/bin/dnloader.rb > /dev/null

 require "mechanize"
 require "date"

 DATE = Date.today
 PDF_INDEX_URL = "https://kund.dn.se/mitt-konto/dn-som-pdf/"
 PDF_PATH = "/service/download/#{DATE.strftime("%Y%m%d")}/DN.pdf"
 PDF_URL = "https://kund.dn.se#{PDF_PATH}"
 PDF_DEST = "#{DEST_DIR}/DN_#{DATE}.pdf"
 REMOVAL_GLOB = "#{DEST_DIR}/DN_*.pdf"

 if File.exist?(PDF_DEST)
  puts "Already downloaded: #{PDF_DEST}"
  exit
 end

 FileUtils.mkdir_p(DEST_DIR)

 CONFIG_FILE = File.expand_path("~/.dnloader")
 abort("Missing config file! See docs.") unless File.exist?(CONFIG_FILE)

 username, password = File.read(CONFIG_FILE).strip.split(":", 2)
 abort("Missing username or password!") unless username && password


 # Log in and download (we need a session to get the PDF)

 agent = Mechanize.new
 agent.pluggable_parser.pdf = Mechanize::Download

 login_page = agent.get("https://auth.dn.se/login?appId=dagensnyheter.se&lc=sv&callback=http%3A%2F%2Fkund.dn.se%2Fservice%2Floginplus%3Fredirect%3D%2F")

 login_result_page = login_page.form_with(id: "loginForm") do |form|
  form.field_with(name: "form.username").value = username
  form.field_with(name: "form.password").value = password
 end.submit

 if login_result_page.body.include?("Logga ut")

  # For some reason, they will sometimes make "today's" PDF available right after midnight but with yesterday's content.
  # But they don't seem to update links until there is a real issue, so we use that.
  page = agent.get(PDF_INDEX_URL)
  unless page.links_with(href: PDF_PATH).any?
    puts "Seems today's issue is not published yet."
    exit
  end

  agent.get(PDF_URL).save(PDF_DEST)
  puts "Downloaded: #{PDF_DEST}"
 else
  # If you want to debug it:
  #p login_result_page
  #puts login_result_page.body

  abort "Error logging in!"
 end


 # Remove old files.

 files_to_remove = Dir[REMOVAL_GLOB].sort.reverse.drop(MAX_ISSUES)

 if files_to_remove.any?
  FileUtils.rm(files_to_remove)
  puts "Removed files: #{files_to_remove.inspect}"
 end
	# Ruby script for DN subscribers to download the latest DN as a PDF – because it beats their iPad app.
	# By Henrik Nyh 2015-12-08 under the MIT license.
	#
	# INSTRUCTIONS (for advanced users)
	#
	# Fetch the script dependencies:
	#
	# (sudo) gem install mechanize
	#
	# Put your username and password in a ~/.dnloader file separarated by a ":", e.g.:
	#
	# echo "[email protected]:myp4ssw0rd" > ~/.dnloader
	#
	# Optionally, customize where the files end up, here:

	DEST_DIR = File.expand_path("~/Dropbox/DN")

	# Optionally, customize how many issues to keep:

	MAX_ISSUES = 14

	# Now make sure the script works:
	#
	# ruby dnloader.rb
	#
	# You can add a cron job to run this script every few minutes. (If the file is already downloaded, nothing happens.)
	# E.g. to run it every 5 minutes:
	#
	# /5 * * * ruby /Users/foo/bin/dnloader.rb > /dev/null

	require "mechanize"
	require "date"

	DATE = Date.today
	PDF_INDEX_URL = "https://kund.dn.se/mitt-konto/dn-som-pdf/"
	PDF_PATH = "/service/download/#{DATE.strftime("%Y%m%d")}/DN.pdf"
	PDF_URL = "https://kund.dn.se#{PDF_PATH}"
	PDF_DEST = "#{DEST_DIR}/DN_#{DATE}.pdf"
	REMOVAL_GLOB = "#{DEST_DIR}/DN_*.pdf"

	if File.exist?(PDF_DEST)
	puts "Already downloaded: #{PDF_DEST}"
	exit
	end

	FileUtils.mkdir_p(DEST_DIR)

	CONFIG_FILE = File.expand_path("~/.dnloader")
	abort("Missing config file! See docs.") unless File.exist?(CONFIG_FILE)

	username, password = File.read(CONFIG_FILE).strip.split(":", 2)
	abort("Missing username or password!") unless username && password


	# Log in and download (we need a session to get the PDF)

	agent = Mechanize.new
	agent.pluggable_parser.pdf = Mechanize::Download

	login_page = agent.get("https://auth.dn.se/login?appId=dagensnyheter.se&lc=sv&callback=http%3A%2F%2Fkund.dn.se%2Fservice%2Floginplus%3Fredirect%3D%2F")

	login_result_page = login_page.form_with(id: "loginForm") do \|form\|
	form.field_with(name: "form.username").value = username
	form.field_with(name: "form.password").value = password
	end.submit

	if login_result_page.body.include?("Logga ut")

	# For some reason, they will sometimes make "today's" PDF available right after midnight but with yesterday's content.
	# But they don't seem to update links until there is a real issue, so we use that.
	page = agent.get(PDF_INDEX_URL)
	unless page.links_with(href: PDF_PATH).any?
	puts "Seems today's issue is not published yet."
	exit
	end

	agent.get(PDF_URL).save(PDF_DEST)
	puts "Downloaded: #{PDF_DEST}"
	else
	# If you want to debug it:
	#p login_result_page
	#puts login_result_page.body

	abort "Error logging in!"
	end


	# Remove old files.

	files_to_remove = Dir[REMOVAL_GLOB].sort.reverse.drop(MAX_ISSUES)

	if files_to_remove.any?
	FileUtils.rm(files_to_remove)
	puts "Removed files: #{files_to_remove.inspect}"
	end