Last active
April 13, 2016 16:30
-
-
Save criztovyl/10ceed8600628dfb017aec353d196c52 to your computer and use it in GitHub Desktop.
Downloads a full devian art gallery.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require "nokogiri" | |
require "open-uri" | |
require "json/add/struct" | |
require "logger" | |
require "typhoeus" | |
if ARGV[0] =~ /-{1,2}h(elp)?/ | |
puts "Usage: ./%s subdomain-name" % $0 | |
puts "Images go to `content-full' dir. If there is a download button, that file will go to `page-download' dir." | |
exit | |
end | |
unless ARGV[0] | |
puts "Missing name!" | |
exit | |
end | |
Typhoeus::Config.user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0" | |
#Typhoeus::Config.verbose = true | |
$logger = Logger.new(STDOUT) | |
#$logger.level = Logger::INFO | |
$break_ = false | |
trap "SIGINT" do | |
$break_=true | |
end | |
DAGalleryImage = Struct.new(:title, :page_href, :image_href, :request, :doc, :desc, :dl_href) do | |
def parse | |
self.doc = Nokogiri::HTML(self.doc) if self.doc.class.name == String.name | |
if btn = self.doc.css(".dev-page-download")[0] | |
self.dl_href = btn["href"] | |
$logger.debug "Image (Page-Download) for %s is %s" % [self.page_href, self.dl_href] | |
end | |
if img = self.doc.css(".dev-content-full")[0] | |
self.image_href= img["src"] | |
$logger.debug "Image (Content-Full) for %s is %s" % [self.page_href, self.image_href] | |
end | |
if not self.image_href and not self.dl_href | |
mature = true | |
$logger.error "No image(s) for %s. Maybe mature content?" % self.page_href | |
end | |
unless mature | |
self.desc = self.doc.css(".dev-description div div").text[0..100] | |
$logger.debug self.desc.inspect | |
end | |
end | |
end | |
class DAGalleryImages | |
@@opts = { cookiefile: "cookies.txt", cookiejar: "cookies.txt", followlocation: true } | |
def initialize(subdomain) | |
@subdomain = subdomain | |
@gallery_images = [] | |
@logger = Logger.new(STDOUT) | |
unless File.exists?(self.file_name) | |
self.request | |
else | |
@gallery_images = File.open(self.file_name) do |file| | |
JSON.parse(file.readlines("")[0], create_additions: true) | |
end | |
end | |
end | |
def walk_gallery(doc) | |
doc.css("span.details a.t").each do |a_t| | |
dagi = DAGalleryImage.new(a_t["title"], a_t["href"]) | |
return if $break_ | |
request = Typhoeus::Request.new(dagi.page_href, @@opts) | |
request.run | |
response = request.response | |
dagi.doc = response.body | |
dagi.parse | |
if dagi.image_href | |
dl dagi.image_href, "content-full" | |
end | |
if dagi.dl_href | |
dl dagi.dl_href, "page-download" | |
end | |
end | |
return if $break_ | |
link = doc.url.split(?/)[0..2].join(?/) | |
next_link = doc.css("li.next a")[0]["href"] | |
return unless next_link | |
next_link = link + next_link | |
@logger.debug "Next link: %s" % next_link.inspect | |
request = Typhoeus::Request.new(next_link, @@opts) | |
request.run | |
response = request.response | |
walk_gallery(Nokogiri::HTML(response.body, response.effective_url)) | |
end | |
def dl(url, destDir) | |
return if $break_ | |
request = Typhoeus::Request.new(url, @@opts) | |
request.run | |
response = request.response | |
destfile = destDir + File::SEPARATOR + response.effective_url.split(?/)[-1] | |
Dir.mkdir(destDir) unless Dir.exists?(destDir) | |
File.open(destfile, "w+") do |file| | |
file.puts response.body | |
end | |
end | |
def request | |
link = "http://%s.deviantart.com/gallery" % @subdomain | |
return if $break_ | |
@logger.debug "Requesting gallery... %s" % link | |
request = Typhoeus::Request.new(link, @@opts) | |
request.run | |
response = request.response | |
if response.code == 404 | |
$logger.error "No such gallery %s." % link | |
return | |
end | |
walk_gallery(Nokogiri::HTML(response.body, response.effective_url)) | |
end | |
def file_name | |
"%s.da_gallery_images.json" % @subdomain | |
end | |
def save | |
File.open(self.file_name, "w+") do |file| | |
file.puts JSON.generate(@gallery_images) | |
end | |
end | |
end | |
dagi = DAGalleryImages.new(ARGV[0]) | |
dagi.request | |
dagi.save |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment