Sergeydigl3 · July 27, 2019 11:08
diff --git a/Parser_czech_cast.py b/Parser_czech_cast.py
 #@title Download smth
 import requests
 from bs4 import BeautifulSoup
 from pathlib import Path
 from os.path import join
 from os import chdir, mkdir
 import PIL.Image as Image
 from itertools import groupby

 main_path = '/content/'
 main_name = 'czech'
 pages = 37
 chdir(main_path)


 if not Path(main_name).exists():
  mkdir(main_name)

 chdir(main_name)

 #@markdow Start from page
 page=1 #@param {type: "number"}
 while page <= pages:
  soup = BeautifulSoup(
      requests.get('https://czechcasting.adultsites.co/page/{}/'.format(page)).content,'lxml')
  soup = soup.find('div', {'class':'posts'})
  soup = soup.find_all('div', {'class': 'post'})
  
  persons = []
  for person in soup:
    persons.append(person.find('a')['href'])

  persons = [el for el, _ in groupby(persons)]
  print(persons)
  for person_page in persons:
    person_dir = person_page.split('/')[-2]
    print(person_dir)
    if not Path(person_dir).exists():
      mkdir(person_dir)
    
    chdir(person_dir)

    person_data = requests.get(person_page).content

    person_pars = BeautifulSoup(person_data, 'lxml')
    person_pars = person_pars.find('div', {'id': 'gallery-2'})
    person_pars = person_pars.find_all('img')
    person_imgs = []
    for image in person_pars:
      name = image['src'].split('/')[-1]
      new_name = name.split('-300x400.jpg')[0]+'.jpg'
      person_imgs.append({'url':image['src'].replace(name, new_name),
                          'name': new_name})
    #downloading
    for img in person_imgs:
      r = requests.get(img['url'])
      response = requests.get(img['url'])
      with open(img['name'], 'wb') as f:
        f.write(response.content)

      #Here should be labling

    chdir('../')
  print('\n\nCheckpoint: {} page\n\n'.format(page))
  page=page+1
	#@title Download smth
	import requests
	from bs4 import BeautifulSoup
	from pathlib import Path
	from os.path import join
	from os import chdir, mkdir
	import PIL.Image as Image
	from itertools import groupby

	main_path = '/content/'
	main_name = 'czech'
	pages = 37
	chdir(main_path)


	if not Path(main_name).exists():
	mkdir(main_name)

	chdir(main_name)

	#@markdow Start from page
	page=1 #@param {type: "number"}
	while page <= pages:
	soup = BeautifulSoup(
	requests.get('https://czechcasting.adultsites.co/page/{}/'.format(page)).content,'lxml')
	soup = soup.find('div', {'class':'posts'})
	soup = soup.find_all('div', {'class': 'post'})

	persons = []
	for person in soup:
	persons.append(person.find('a')['href'])

	persons = [el for el, _ in groupby(persons)]
	print(persons)
	for person_page in persons:
	person_dir = person_page.split('/')[-2]
	print(person_dir)
	if not Path(person_dir).exists():
	mkdir(person_dir)

	chdir(person_dir)

	person_data = requests.get(person_page).content

	person_pars = BeautifulSoup(person_data, 'lxml')
	person_pars = person_pars.find('div', {'id': 'gallery-2'})
	person_pars = person_pars.find_all('img')
	person_imgs = []
	for image in person_pars:
	name = image['src'].split('/')[-1]
	new_name = name.split('-300x400.jpg')[0]+'.jpg'
	person_imgs.append({'url':image['src'].replace(name, new_name),
	'name': new_name})
	#downloading
	for img in person_imgs:
	r = requests.get(img['url'])
	response = requests.get(img['url'])
	with open(img['name'], 'wb') as f:
	f.write(response.content)

	#Here should be labling

	chdir('../')
	print('\n\nCheckpoint: {} page\n\n'.format(page))
	page=page+1
No results found