Created
September 27, 2021 23:52
-
-
Save allanvobraun/4476f6fca91881f95a8e674dbeb64939 to your computer and use it in GitHub Desktop.
Um script para pegar prints do lightshot (NÃO É TOTALMENTE ALEATORIO) imagens vao se repetir
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import permutations | |
from string import ascii_lowercase, digits | |
from itertools import permutations | |
from typing import Generator, List | |
from bs4 import BeautifulSoup | |
import cloudscraper | |
import random | |
import requests | |
import os.path | |
import uuid | |
scraper = cloudscraper.create_scraper() | |
def get_img_from_url(url: str) -> str: | |
page = scraper.get(url) | |
soup = BeautifulSoup(page.text, 'html.parser') | |
tag = soup.select_one('.no-click.screenshot-image') | |
return tag['src'] | |
def combinacoes() -> Generator[str, None, None]: | |
characters = ascii_lowercase + digits | |
per = permutations(characters, 6) | |
for _ in per: | |
yield "".join(next(per)) | |
def build_urls(count: int) -> List[str]: | |
code_combinations_generator = combinacoes() | |
prefix_url = "https://prnt.sc/" | |
comb_array = [prefix_url + next(code_combinations_generator) | |
for _ in range(count)] | |
random.shuffle(comb_array) | |
return comb_array | |
def dowload_images(urls: List[str]): | |
directory = './images/' | |
for url in urls: | |
print(url) | |
try: | |
img_data = requests.get(url).content | |
except requests.exceptions.MissingSchema: | |
continue | |
image_name = os.path.join(directory, str(uuid.uuid4())) | |
with open(image_name + ".jpg", 'wb') as handler: | |
handler.write(img_data) | |
if __name__ == '__main__': | |
url_count = 1500 | |
img_count = 100 | |
urls = build_urls(url_count) | |
asw = input(f"{url_count} urls geradas, começar? (Y/n)") | |
if (asw not in ['y', 'Y', '']): | |
exit(0) | |
print(f"\n Buscando {img_count} urls de imagens..") | |
images_urls = [get_img_from_url(urls[i]) for i in range(img_count)] | |
print("Pronto!") | |
print(f"\n Fazendo dowload de {img_count} imagens..") | |
dowload_images(images_urls) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment