Skip to content

Instantly share code, notes, and snippets.

@xen0f0n
Last active August 5, 2020 06:56
Show Gist options
  • Select an option

  • Save xen0f0n/7e75bf6c0317db798feecab54809628b to your computer and use it in GitHub Desktop.

Select an option

Save xen0f0n/7e75bf6c0317db798feecab54809628b to your computer and use it in GitHub Desktop.
script_download_springer_free_ML_books
import requests
from bs4 import BeautifulSoup
import os
import re
### Requirements ###
# requests
# beautifulsoup4
source_url = 'https://techgrabyte.com/springer-released-65-machine-learning-data-science-books-free/'
page = requests.get(source_url)
books = BeautifulSoup(page.content, 'html.parser').find_all('a')
save_folder = 'springer_ML_free_books'
os.makedirs(save_folder, exist_ok=True)
base_url = 'https://link.springer.com/content/pdf/'
for book in books:
try:
book_url = book.attrs['href']
if 'springer.com' in book_url:
book_url += '#authorsandaffiliationsbook'
pdf_url = book_url.replace('book', 'content/pdf')
pdf_url += '.pdf'
page = requests.get(book_url)
title_ = BeautifulSoup(page.content, 'html.parser').find_all('h1')[0].text
title = title_.replace(' ', '_')
title = title.replace(':', '_')
title = title.replace(',', '')
print(f'Downloading... {title_}')
regex = r"doi\.org/(.*)<"
x = re.search(regex, page.text)[0]
x = x.split('/')[1:]
x = '/'.join(x)
x = x.replace('/', '%2F')[0:-1]
pdf_url = base_url + x
s = requests.Session()
r = s.get(pdf_url)
with open(f'{os.path.join(save_folder, title)}.pdf', 'wb') as f:
f.write(r.content)
except:
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment