xen0f0n · August 5, 2020 06:56
diff --git a/download_springer_free_ML_books.py b/download_springer_free_ML_books.py
 import requests
 from bs4 import BeautifulSoup
 import os
 import re

 ### Requirements ###
 # requests
 # beautifulsoup4


 source_url = 'https://techgrabyte.com/springer-released-65-machine-learning-data-science-books-free/'
 page = requests.get(source_url)
 books = BeautifulSoup(page.content, 'html.parser').find_all('a')
 save_folder = 'springer_ML_free_books'
 os.makedirs(save_folder, exist_ok=True)
 base_url = 'https://link.springer.com/content/pdf/'

 for book in books:
    try:
        book_url = book.attrs['href']
        if 'springer.com' in book_url:
            book_url += '#authorsandaffiliationsbook'
            pdf_url = book_url.replace('book', 'content/pdf')
            pdf_url += '.pdf'

            page = requests.get(book_url)
            title_ = BeautifulSoup(page.content, 'html.parser').find_all('h1')[0].text
            title = title_.replace(' ', '_')
            title = title.replace(':', '_')
            title = title.replace(',', '')

            print(f'Downloading... {title_}')

            regex = r"doi\.org/(.*)<"
            x = re.search(regex, page.text)[0]
            x = x.split('/')[1:]
            x = '/'.join(x)
            x = x.replace('/', '%2F')[0:-1]

            pdf_url = base_url + x
            s = requests.Session()
            r = s.get(pdf_url)

            with open(f'{os.path.join(save_folder, title)}.pdf', 'wb') as f:
                f.write(r.content)
    except:
        continue
	import requests
	from bs4 import BeautifulSoup
	import os
	import re

	### Requirements ###
	# requests
	# beautifulsoup4


	source_url = 'https://techgrabyte.com/springer-released-65-machine-learning-data-science-books-free/'
	page = requests.get(source_url)
	books = BeautifulSoup(page.content, 'html.parser').find_all('a')
	save_folder = 'springer_ML_free_books'
	os.makedirs(save_folder, exist_ok=True)
	base_url = 'https://link.springer.com/content/pdf/'

	for book in books:
	try:
	book_url = book.attrs['href']
	if 'springer.com' in book_url:
	book_url += '#authorsandaffiliationsbook'
	pdf_url = book_url.replace('book', 'content/pdf')
	pdf_url += '.pdf'

	page = requests.get(book_url)
	title_ = BeautifulSoup(page.content, 'html.parser').find_all('h1')[0].text
	title = title_.replace(' ', '_')
	title = title.replace(':', '_')
	title = title.replace(',', '')

	print(f'Downloading... {title_}')

	regex = r"doi\.org/(.*)<"
	x = re.search(regex, page.text)[0]
	x = x.split('/')[1:]
	x = '/'.join(x)
	x = x.replace('/', '%2F')[0:-1]

	pdf_url = base_url + x
	s = requests.Session()
	r = s.get(pdf_url)

	with open(f'{os.path.join(save_folder, title)}.pdf', 'wb') as f:
	f.write(r.content)
	except:
	continue
No results found