Skip to content

Instantly share code, notes, and snippets.

@Sergeydigl3
Created July 27, 2019 11:08
Show Gist options
  • Select an option

  • Save Sergeydigl3/cc5e890b38486d0d00f7f4a65230f93a to your computer and use it in GitHub Desktop.

Select an option

Save Sergeydigl3/cc5e890b38486d0d00f7f4a65230f93a to your computer and use it in GitHub Desktop.
#@title Download smth
import requests
from bs4 import BeautifulSoup
from pathlib import Path
from os.path import join
from os import chdir, mkdir
import PIL.Image as Image
from itertools import groupby
main_path = '/content/'
main_name = 'czech'
pages = 37
chdir(main_path)
if not Path(main_name).exists():
mkdir(main_name)
chdir(main_name)
#@markdow Start from page
page=1 #@param {type: "number"}
while page <= pages:
soup = BeautifulSoup(
requests.get('https://czechcasting.adultsites.co/page/{}/'.format(page)).content,'lxml')
soup = soup.find('div', {'class':'posts'})
soup = soup.find_all('div', {'class': 'post'})
persons = []
for person in soup:
persons.append(person.find('a')['href'])
persons = [el for el, _ in groupby(persons)]
print(persons)
for person_page in persons:
person_dir = person_page.split('/')[-2]
print(person_dir)
if not Path(person_dir).exists():
mkdir(person_dir)
chdir(person_dir)
person_data = requests.get(person_page).content
person_pars = BeautifulSoup(person_data, 'lxml')
person_pars = person_pars.find('div', {'id': 'gallery-2'})
person_pars = person_pars.find_all('img')
person_imgs = []
for image in person_pars:
name = image['src'].split('/')[-1]
new_name = name.split('-300x400.jpg')[0]+'.jpg'
person_imgs.append({'url':image['src'].replace(name, new_name),
'name': new_name})
#downloading
for img in person_imgs:
r = requests.get(img['url'])
response = requests.get(img['url'])
with open(img['name'], 'wb') as f:
f.write(response.content)
#Here should be labling
chdir('../')
print('\n\nCheckpoint: {} page\n\n'.format(page))
page=page+1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment