Skip to content

Instantly share code, notes, and snippets.

@facepainter
Created February 12, 2018 05:22
Show Gist options
  • Save facepainter/cb1a33f5ce28dc6f4ad4a7e8ff2f3a41 to your computer and use it in GitHub Desktop.
Save facepainter/cb1a33f5ce28dc6f4ad4a7e8ff2f3a41 to your computer and use it in GitHub Desktop.
Fetch images of faces from google by persons name
'''
Fetch faces from the interwebs by name
Uses Google Image Search to get a bunch faces
based on the specified name
'''
from os import path, errno, makedirs
import urllib.request
import re
class FaceFetch():
'''It sure fetches faces! (tm)'''
def __init__(self, user_agent=None, pattern=None):
self.url = 'https://www.google.co.uk/search'
self.pattern = '"ou":"(?P<url>http(s?)://[^\s\"]+)' if pattern is None else pattern
self.user_agent = 'Mozilla/5.0 Gecko/20100101 Firefox/12.0' if user_agent is None else user_agent
self.regex = re.compile(self.pattern, re.IGNORECASE)
self.headers = { 'User-Agent': self.user_agent }
@staticmethod
def __make_path(filename):
if not path.exists(path.dirname(filename)):
try:
makedirs(path.dirname(filename))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
return filename
@staticmethod
def __save_file(file, file_path):
with open(file_path, 'wb') as handler:
print("Saving {}".format(file_path))
handler.write(file.read())
def __make_query(self, name):
'''Builds the query URI'''
return self.url + '?tbm=isch&tbs=itp:face,isz:l,ift:jpg&q={}'.format(name.replace(' ', '+'))
def __valid_files(self, output, result):
'''Iterates over the parsed results.
Attempts to download matching image'''
for idx, match in enumerate(self.regex.finditer(result)):
try:
file = urllib.request.urlopen(match.group(1))
if not file.headers['Content-Type'] == 'image/jpeg':
continue
except urllib.error.HTTPError as e:
continue
yield (file, path.join(output, '{}.jpg'.format(idx+1)))
def __open(self, name):
request = urllib.request.Request(self.__make_query(name), data=None, headers=self.headers)
return urllib.request.urlopen(request).read().decode('utf-8')
def request(self, name, output=None, limit=4):
output = self.__make_path(name if output is None else output)
result = self.__open(name)
for file, file_path in self.__valid_files(output, result):
self.__make_path(file_path)
self.__save_file(file, file_path)
limit -= 1
if limit == 0:
return
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment