Skip to content

Instantly share code, notes, and snippets.

@kirussian911
Created August 26, 2018 00:57
Show Gist options
  • Save kirussian911/8a14ab685b10ebcc2508a96687111df8 to your computer and use it in GitHub Desktop.
Save kirussian911/8a14ab685b10ebcc2508a96687111df8 to your computer and use it in GitHub Desktop.
import urllib.request
import re
page_number = 1
def load_source(website):
site = urllib.request.urlopen(website)
read_site = site.read()
return read_site
def parse_img(source):
links = []
t = str(source)
pattern = '<img width="\d+" height="\d+" src="'
result = re.split(pattern, t)
# рабочий вариант через replace
# t = str(source).replace('550', ' ').replace('375', ' ').split('<img width=" " height=" " src="')
for i in result:
r = str(i).split('""')
links.append(r[0])
return links
def download(links):
name = 1
for i in links:
try:
v = urllib.request.urlopen(i)
f = open('Стр' + str(page_number) + 'номер' + str(name) + '.jpg', 'wb')
f.write(v.read())
f.close()
name += 1
except:
pass
def main():
print('start page: ')
print()
source = load_source('https://aliholic.com/shop/')
links = parse_img(source)
download(links)
print('Tnx')
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment