graymouser/hb_all_books_dl.js

Woody2143 · 2018-02-02T14:47:19Z

I've been working on a Perl script to do the downloads. You will need to login via the website first and grab the session cookie.
If you go to run it you'll need to manually tweak it some first: HumbleBundleDL

liesnikov · 2018-02-14T08:35:49Z

There is a working one (at least as I write this) from @tlc
https://gist.github.com/tlc/96292166c7253f86565f0d18e5f8ec41

scheras · 2018-02-26T18:44:25Z

I used

$('div.js-start-download a').each(function(){ $(this).trigger('click') });

for downloading all formats of all books just now.

KurtBurgess · 2018-05-04T11:59:25Z

Building on what @azdle wrote I have modified the script to only select PDF files and changed the syntax for Windows PowerShell's wget command:

cmds = "";

function removeExtra(a2){
	a2 = a2.replace('https://dl.humble.com/','');
	a2 = a2.substring(0, a2.indexOf('.'));
	return a2;
	
}
for (a of document.getElementsByTagName("a")) {
  if (a.href.startsWith("https://dl.humble.com") && a.href.includes("pdf")) cmds += "wget \"" + a.href + "\" -Outfile " + removeExtra(a.href) + ".pdf \n";
};
console.log(cmds);

It's ugly but it works,

Iterates over each anchor tag
Only selects if the URL starts with 'https://dl.humble.com' and contains 'pdf' (change for EPUB or other file type).
Then names the Outfile the same as URL without 'https://dl.humble.com' or anything after the first '.' then appends .pdf at the end (again replace with any extension you prefer). Thus naming the file as it's title name without caps or spaces.
Finally paste all console logs in a PowerShell window in the directory to save and they will automatically download.

Thanks @azdle, couldn't of done it without your code to start

Drizzt321 · 2018-05-10T20:58:47Z

FYI, for regular wget (e.g. Unix, Linux, Mac), it's just -O, not -Outfile (WHY does powershell have to be different?). So you need to modify the above to change -Outfile to -O

Although Mac doesn't have wget installed by default, in that case using curl, modify the IF statement to be:

if (a.href.startsWith("https://dl.humble.com") && a.href.includes("pdf")) cmds += "curl \"" + a.href + "\" -o " + removeExtra(a.href) + ".pdf \n";

schemacs · 2018-05-17T00:01:58Z

This code works if you set firefox to save pdf instead of preview it(Firefox > Preferences > Applications > Adobe PDF document : Save File):

function Book(title, author, formats) {
  this.title = title;
  this.author = author;
  this.formats = formats;
};

// Change this to non-zero to download
var seconds_between_switch_book = 0; // 10;
var seconds_between_download = 0; // 3;
var books = [];
var rows = document.querySelectorAll('.subproduct-selector');
rows.forEach(function(item, item_index) {
    setTimeout(function() {
        item.click();
        var title = item.querySelectorAll('h2')[0].title;
        var author = item.querySelectorAll('p')[0].innerText;
        var formats = [...document.querySelectorAll('div.js-download-button')].map(
            download_item => download_item.querySelectorAll('h4')[0].innerText
        )
        books.push(new Book(title, author, formats));

        document.querySelectorAll('div.js-download-button').forEach(function(download_item, download_index){
            setTimeout(function() {
                var format = download_item.querySelectorAll('h4')[0].childNodes[1].data;
                console.log(item_index, download_index, title, format);
                // uncomment this to download
                //download_item.click();
            }, seconds_between_download * 1000 * download_index);
        });
    }, seconds_between_switch_book * 1000 * item_index);
});

setTimeout(function(){
    console.table(books);
    copy(books);
}, (rows.length + 1) * 1000 * seconds_between_switch_book);

jbhelfrich · 2018-05-29T00:10:58Z

So I'm currently downloading just about everything to put in a Calibre library. Since some of the bundles have some repeat content (looking at you , Make) I updated the @KurtBurgess script to test the working directory for a copy of the current file and skip it if present:

cmds = "";

function buildCommand(a, ext) {
  let filename = removeExtra(a.href);
  ext = '.' + ext;
  cmds += "If(Test-Path -Path \"" + filename + ext + "\") {Write-Warning \"" + filename + ext + " exists, skipping \"} Else { wget \"" + a.href + "\" -Outfile " + filename + ext + "}\n";
  }
  
function removeExtra(a2){
	a2 = a2.replace('https://dl.humble.com/','');
	a2 = a2.substring(0, a2.indexOf('.'));
	return a2;
}

for (a of document.getElementsByTagName("a")) {
  if (a.href.startsWith("https://dl.humble.com") && a.href.includes("pdf")) buildCommand(a, 'pdf');
  if (a.href.startsWith("https://dl.humble.com") && a.href.includes("epub")) buildCommand (a, 'epub');
  if (a.href.startsWith("https://dl.humble.com") && a.href.includes("cbz")) buildCommand(a, 'cbz');
};

console.log(cmds);

Next steps, adding a bash variant, and seeing if I can remove the repeated if statements for a some

calexandrepcjr · 2018-07-17T16:56:46Z

var pattern = /(MOBI|EPUB|PDF( ?$H.$)?|CBZ|Download)$/i; var nodes = document.getElementsByTagName('a'); var downloadCmd = ''; for (i in nodes) { var a = nodes[i]; if (a && a.text && pattern.test(a.text.trim()) && a.attributes['href']) { downloadCmd += a.attributes['href'].value + "\"\n"; } } var output = document.createElement("pre"); output.textContent = downloadCmd; document.getElementById("papers-content").prepend(output);

Copy/Paste the links in one txt and run wget:

wget --no-check-certificate --content-disposition -r -H -np -nH -N --cut-dirs=1 -e robots=off -l1 -i ./linksfilename.txt -B 'https://dl.humble.com/'

bl4ckb1rd · 2018-07-19T15:31:37Z

A modified version of @kellerkindt

var nodes_a = document.querySelectorAll('.downloads a:not(.dlmd5)');

for (node of nodes_a) {
    console.log('wget --content-disposition', node.href);
};

m-d-johnson · 2018-07-22T06:13:29Z

If you're using the above, you may need to place the generated link in double quotes so your shell interprets the ampersand literally. I tried to tweak this but I hit an issue with whitespace which would be easy for someone who actually knows Javascript to fix. Sadly this person is not me.

m-d-johnson · 2018-07-22T06:22:00Z

var nodes_a = document.querySelectorAll('.downloads a:not(.dlmd5)');
for (node of nodes_a) {
    var tmp = node.href;
    tmp = tmp.replace(/ /g,'')
    console.log('wget --content-disposition \"'+tmp+"\"");
};

Maybe this works. Apologies for hackyness. I'm sure a better alteration is possible but like I say, I don't know javascript

jmerle · 2018-08-03T23:28:31Z

I like my files to be organized, so here's my take on it.

const commands = [];

document.querySelectorAll('.row').forEach(row => {
  const bookTitle = row.dataset.humanName;

  [...row.querySelectorAll('.downloads .flexbtn a')].forEach(el => {
    const downloadLink = el.href;
    const fileName = /\.com\/([^?]+)/.exec(downloadLink)[1];

    commands.push(`curl --create-dirs -o "${bookTitle}/${fileName}" "${downloadLink}"`);
  });
});

console.log(commands.join('; '));

Instead of wget this uses curl, because wget's -O does not create directories automatically (and while -P does, -O and -P cannot be used together).

The resulting directory tree is like this:

.
├── Advanced Penetration Testing
│   ├── advancedpenetrationtesting.epub
│   └── advancedpenetrationtesting.pdf
├── Applied Cryptography: Protocols, Algorithms and Source Code in C, 20th Anniversary Edition
│   ├── applied_cryptography_protocols_algorithms_and_source_code_in_c.epub
│   └── applied_cryptography_protocols_algorithms_and_source_code_in_c.pdf
└── Cryptography Engineering: Design Principles and Practical Applications
    ├── cryptography_engineering_design_principles_and_practical_applications.epub
    ├── cryptography_engineering_design_principles_and_practical_applications.pdf
    └── cryptography_engineering_design_principles_and_practical_applications.prc

fsteffek · 2018-08-28T09:00:51Z

I took @jmerle's code and changed the last line:

console.log(commands.join('; ');

to:

console.log(commands.join(' && ');

That way, it didn't try to download everything at once.

fsteffek · 2018-08-29T23:33:05Z

If you want to verify your downloads, here's the code to make the md5 hashes visible:

var md5_links = document.querySelectorAll(".dlmd5");
for (i in md5_links) {
  md5_links[i].click();
}

OR...

If you are like me and have way too many book bundles, you might be interested in something like the following code.

function getTitle() {
  var re = /^Humble\ Book\ Bundle\:\ (.*)\ \(/g;
  return re.exec(document.title)[1];
}
function showHashes() {
  document.querySelectorAll('.dlmd5').forEach(md5 => {
    if (md5.innerText.trim() == 'md5') {
      md5.click();
    }
  });
}
function gatherInfo() {
  const data = [];
  const bundleTitle = getTitle();
  showHashes();
  document.querySelectorAll('.row').forEach(row => {
    const bookTitle = row.dataset.humanName;
    [...row.querySelectorAll('.downloads .download')].forEach(dl => {
      const downloadLink = dl.querySelector('.flexbtn a').href;
      const filename = /\.com\/([^?]+)/.exec(downloadLink)[1];
      const md5 = dl.querySelector('a.dlmd5').innerText.trim();
      data.push({
        "bundleTitle": bundleTitle,
        "bookTitle": bookTitle,
        "filename": filename,
        "downloadLink": downloadLink,
        "md5": md5
      });
    });
  });
  return data;
}
function downloadBookBundle() {
  const commands = []
  const md5Sums = [];
  const info = gatherInfo();
  for (var i in info) {
    bundleTitle = info[i]["bundleTitle"];
    bookTitle = info[i]["bookTitle"];
    filename = info[i]["filename"];
    downloadLink = info[i]["downloadLink"];
    md5 = info[i]["md5"];
    commands.push(`curl --create-dirs -o "${bundleTitle}/${bookTitle}/${filename}" "${downloadLink}"`);
    md5Sums.push(`${md5}  ${bundleTitle}/${bookTitle}/${filename}`);
  };
  console.log(commands.join(' && '));
  console.log(md5Sums.join('\n'));
}
downloadBookBundle();

It is based upon's @jmerle's approach and is also forked here: https://gist.github.com/fsteffek/bf4ac1e3d2601629a6c9cca94b5649f6.

What does it do?

It prints the command line command for curl to download your Humble Book Bundle. I modified it, so each bundle is saved into a separate folder:

.
├── Bundle Name
│   └── Book Name
│       └── Files
└── More Bundles

It prints the content of an md5 file, which md5sum can read/check. Paste it into a file like hb_all_books.md5 ...

5b3e6de1fc4c45be45b1299ea50a6a7d  Essential Knowledge by MIT Press/Cloud Computing/cloudcomputing.epub
a14391f6971da830d064c2c0fd132019  Essential Knowledge by MIT Press/Cloud Computing/cloudcomputing.mobi
...

... and check it with md5sum -c hb_all_books.md5.

Essential Knowledge by MIT Press/Cloud Computing/cloudcomputing.epub: OK
Essential Knowledge by MIT Press/Cloud Computing/cloudcomputing.mobi: OK
...

Feel free to tell me how to make this script more readable, convenient and generally just better.

zuazo · 2018-10-02T20:06:41Z

My JavaScript fork of this script is still working today: https://gist.github.com/zuazo/a91ecbb97b90ef3ef9ce8caf361199a2

	/*
	After purchasing a humble book bundle, go to your download page for that bundle.
	Open a console window for the page and paste in the below javascript
	*/
	$('a').each(function(i){
	if ($.trim($(this).text()) == 'MOBI') {
	$('body').append('<iframe id="dl_iframe_'+i+'" style="display:none;">');
	document.getElementById('dl_iframe_'+i).src = $(this).data('web');
	}
	});

graymouser/hb_all_books_dl.js

Woody2143 commented Feb 2, 2018

Uh oh!

liesnikov commented Feb 14, 2018

Uh oh!

scheras commented Feb 26, 2018

Uh oh!

KurtBurgess commented May 4, 2018

Uh oh!

Drizzt321 commented May 10, 2018

Uh oh!

schemacs commented May 17, 2018 •

edited

Loading

Uh oh!

jbhelfrich commented May 29, 2018 •

edited

Loading

Uh oh!

calexandrepcjr commented Jul 17, 2018 •

edited

Loading

Uh oh!

bl4ckb1rd commented Jul 19, 2018

Uh oh!

m-d-johnson commented Jul 22, 2018

Uh oh!

m-d-johnson commented Jul 22, 2018

Uh oh!

jmerle commented Aug 3, 2018 •

edited

Loading

Uh oh!

fsteffek commented Aug 28, 2018

Uh oh!

fsteffek commented Aug 29, 2018

Uh oh!

zuazo commented Oct 2, 2018

Uh oh!

graymouser/hb_all_books_dl.js

Woody2143 commented Feb 2, 2018

Uh oh!

liesnikov commented Feb 14, 2018

Uh oh!

scheras commented Feb 26, 2018

Uh oh!

KurtBurgess commented May 4, 2018

Uh oh!

Drizzt321 commented May 10, 2018

Uh oh!

schemacs commented May 17, 2018 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jbhelfrich commented May 29, 2018 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

calexandrepcjr commented Jul 17, 2018 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

bl4ckb1rd commented Jul 19, 2018

Uh oh!

m-d-johnson commented Jul 22, 2018

Uh oh!

m-d-johnson commented Jul 22, 2018

Uh oh!

jmerle commented Aug 3, 2018 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

fsteffek commented Aug 28, 2018

Uh oh!

fsteffek commented Aug 29, 2018

Uh oh!

zuazo commented Oct 2, 2018

Uh oh!

schemacs commented May 17, 2018 •

edited

Loading

jbhelfrich commented May 29, 2018 •

edited

Loading

calexandrepcjr commented Jul 17, 2018 •

edited

Loading

jmerle commented Aug 3, 2018 •

edited

Loading