Created
February 12, 2018 05:22
-
-
Save facepainter/cb1a33f5ce28dc6f4ad4a7e8ff2f3a41 to your computer and use it in GitHub Desktop.
Fetch images of faces from google by persons name
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Fetch faces from the interwebs by name | |
Uses Google Image Search to get a bunch faces | |
based on the specified name | |
''' | |
from os import path, errno, makedirs | |
import urllib.request | |
import re | |
class FaceFetch(): | |
'''It sure fetches faces! (tm)''' | |
def __init__(self, user_agent=None, pattern=None): | |
self.url = 'https://www.google.co.uk/search' | |
self.pattern = '"ou":"(?P<url>http(s?)://[^\s\"]+)' if pattern is None else pattern | |
self.user_agent = 'Mozilla/5.0 Gecko/20100101 Firefox/12.0' if user_agent is None else user_agent | |
self.regex = re.compile(self.pattern, re.IGNORECASE) | |
self.headers = { 'User-Agent': self.user_agent } | |
@staticmethod | |
def __make_path(filename): | |
if not path.exists(path.dirname(filename)): | |
try: | |
makedirs(path.dirname(filename)) | |
except OSError as exc: | |
if exc.errno != errno.EEXIST: | |
raise | |
return filename | |
@staticmethod | |
def __save_file(file, file_path): | |
with open(file_path, 'wb') as handler: | |
print("Saving {}".format(file_path)) | |
handler.write(file.read()) | |
def __make_query(self, name): | |
'''Builds the query URI''' | |
return self.url + '?tbm=isch&tbs=itp:face,isz:l,ift:jpg&q={}'.format(name.replace(' ', '+')) | |
def __valid_files(self, output, result): | |
'''Iterates over the parsed results. | |
Attempts to download matching image''' | |
for idx, match in enumerate(self.regex.finditer(result)): | |
try: | |
file = urllib.request.urlopen(match.group(1)) | |
if not file.headers['Content-Type'] == 'image/jpeg': | |
continue | |
except urllib.error.HTTPError as e: | |
continue | |
yield (file, path.join(output, '{}.jpg'.format(idx+1))) | |
def __open(self, name): | |
request = urllib.request.Request(self.__make_query(name), data=None, headers=self.headers) | |
return urllib.request.urlopen(request).read().decode('utf-8') | |
def request(self, name, output=None, limit=4): | |
output = self.__make_path(name if output is None else output) | |
result = self.__open(name) | |
for file, file_path in self.__valid_files(output, result): | |
self.__make_path(file_path) | |
self.__save_file(file, file_path) | |
limit -= 1 | |
if limit == 0: | |
return |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment