facepainter · March 11, 2025 21:22 · k3erg · Feb 16, 2018 · facepainter · Feb 20, 2018
diff --git a/README.md b/README.md
diff --git a/dlib_avx_cuda.bat b/dlib_avx_cuda.bat
 set COMPILER="C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64"
 set PATH=%COMPILER%;%PATH%
 git clone https://github.com/davisking/dlib.git
 cd dlib
 mkdir build
 cd build
 cmake -G "Visual Studio 15 Win64" -T host=x64 -DUSE_AVX_INSTRUCTIONS=1 -DDLIB_USE_CUDA=1 -DCUDA_HOST_COMPILER=%COMPILER% ..
 cmake --build .
 cd ..
 python setup.py install --yes USE_AVX_INSTRUCTIONS --yes DLIB_USE_CUDA
diff --git a/FaceGrab.py b/FaceGrab.py
 '''
 Extract a known face from a video.

 Uses a combination of a deep learning CNN model to batch detect faces
 in video frames, or a sequence of images, in GPU with CUDA and HoG to compare
 the detected faces with a computed reference set of face encodings.
 '''
 from os import path, listdir
 from typing import NamedTuple

 import cv2
 import numpy as np
 import face_recognition as fr
 from tqdm import tqdm

 class RecognitionSettings(NamedTuple):
    '''
    Face recognition settings
        :param float tolerance: How much "distance" between faces to consider it a match.
        :param int jitter: How many times to re-sample images when calculating encodings.
    '''
    tolerance: float = .6
    jitter: int = 5

 class ProcessSettings(NamedTuple):
    '''
    Video process settings
        :param int batch_size: How many frames to include in each GPU processing batch.
        :param int skip_frames: How many frames to skip e.g. 5 means look at every 6th
        :param int extract_size: Size in pixels of extracted face images (n*n).
        :param float scale: Amount to down-sample input by for detection processing.
        :param bool display_output: Show the detection and extraction images in process.
        :param float blur_threshold: Minimum edge variance to consider extract blurry.
    '''
    batch_size: int = 128
    skip_frames: int = 6
    extract_size: int = 256
    scale: float = .25
    display_output: bool = False
    blur_threshold: float = 10.0

 class FaceGrab(object):
    '''
    It sure grabs faces! (tm)
        :param str reference: Path to a input data (video/image sequence)
        :param RecognitionSettings recognition: Face recognition settings
        :param ProcessSettings process: Video process settings
    '''
    def __init__(self, reference, recognition=None, process=None):
        if recognition is None:
            recognition = RecognitionSettings()
        if process is None:
            process = ProcessSettings()
        skip_sanity = 1 if process.skip_frames <= 0 else process.skip_frames + 1
        self.__ps = process._replace(batch_size=np.clip(process.batch_size, 2, 128),
                                     skip_frames=skip_sanity,
                                     scale=np.clip(process.scale, 0, 1.0))
        self.__rs = recognition._replace(tolerance=np.clip(recognition.tolerance, 0.1, 1))
        self.__process_frames = []
        self.__original_frames = []
        self.__reference_encodings = []
        self.__total_extracted = 0
        self.__extract_dim = (self.__ps.extract_size, self.__ps.extract_size)
        self.__extract_pad = int(self.__ps.extract_size / 100 * 18)
        self.__check_reference(reference)

    _MEAN_FACE_TRANSPOSED = np.asarray([
        [-0.4899134, -0.41486446, -0.30899666, -0.19935666, -0.09672966,
         0.09672934, 0.19935634, 0.30899734, 0.41486434, 0.48991334,
         0.00000034, 0.00000034, 0.00000034, 0.00000034, -0.12324666,
         -0.06409066, 0.00000034, 0.06409034, 0.12324634, -0.36838966,
         -0.30300466, -0.22430166, -0.15552066, -0.22920866, -0.30738366,
         0.15552034, 0.22430134, 0.30300534, 0.36838934, 0.30738334,
         0.22920834, -0.23597766, -0.14914166, -0.06126866, 0.00000034,
         0.06126834, 0.14914134, 0.23597734, 0.15203234, 0.06659434,
         0.00000034, -0.06659466, -0.15203266, -0.19974766, -0.06203066,
         0.00000034, 0.06203034, 0.19974734, 0.06323734, 0.00000034,
         -0.06323666],
        [-0.35796968, -0.42550868, -0.44567548, -0.42993458, -0.38703308,
         -0.38703308, -0.42993458, -0.44567548, -0.42550868, -0.35796968,
         -0.26107168, -0.15741468, -0.05461868, 0.05120132, 0.12290232,
         0.14492132, 0.16368232, 0.14492132, 0.12290232, -0.24800068,
         -0.28566568, -0.28457168, -0.23269068, -0.21932468, -0.22034668,
         -0.23269068, -0.28457168, -0.28566568, -0.24800068, -0.22034668,
         -0.21932468, 0.31580932, 0.28098132, 0.26296432, 0.27815432,
         0.26296432, 0.28098132, 0.31580932, 0.40038132, 0.43776832,
         0.44485732, 0.43776832, 0.40038132, 0.32036832, 0.31432232,
         0.32091932, 0.31432232, 0.32036832, 0.35975832, 0.36737932,
         0.35975832]])

    @classmethod
    def __get_matrix(cls, face):
        '''
        2D face similarity transform with scaling
        adapted from _umeyama
        https://github.com/scikit-image/scikit-image/blob/master/skimage/transform/_geometric.py#L72
        http://web.stanford.edu/class/cs273/refs/umeyama.pdf
        '''
        mean_face = np.average(face, axis=0)
        mean_reference = np.asarray([0.49012666, 0.46442368])
        mean_delta = face - mean_face
        d = np.ones((2,))
        mat = np.eye(2, 3)
        U, s, V = np.linalg.svd(cls._MEAN_FACE_TRANSPOSED @ mean_delta / 51)
        mat[:2, :2] = U @ np.diag(d) @ V
        scale = 1.0 / mean_delta.var(0).sum() * s @ d
        mat[:2, 2] = mean_reference - scale * mat[:2, :2] @ mean_face.T
        mat[:2, :2] *= scale
        return mat

    @staticmethod
    def __is_blurred(image, threshold):
        '''Fast Laplacian edge variance check.
        Returns True if image edge variance is below threshold'''
        grayed = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        return cv2.Laplacian(grayed, cv2.CV_64F).var() < threshold

    @property
    def reference_count(self):
        '''Total currently loaded reference encodings for recognition'''
        return len(self.__reference_encodings)

    @staticmethod
    def __downsample(image, scale):
        '''Downscale and convert image for faster detection processing'''
        sampled = cv2.resize(image,
                             (0, 0),
                             fx=scale,
                             fy=scale,
                             interpolation=cv2.INTER_AREA) if scale > 0 else image
        return sampled[:, :, ::-1] #RGB->BGR

    @staticmethod
    def __file_count(directory):
        '''Returns the number of files in a directory'''
        return len([item for item in listdir(directory) if path.isfile(path.join(directory, item))])

    @staticmethod
    def __draw_detection(frame, locations):
        '''draws the process frames with face detection locations'''
        for (top, right, bottom, left) in locations:
            cv2.rectangle(frame, (left, top), (right, bottom), (255, 0, 0), 1)
        cv2.imshow('process', frame[:, :, ::-1]) #BGR->RGB
        cv2.waitKey(1)

    @staticmethod
    def __landmark_points(image, location=None):
        '''Face landmarks as coordinates (without the chin!)'''
        # NB: protected-access - eek!
        landmarks = fr.api._raw_face_landmarks(image, location)
        if np.any(landmarks):
            return [(p.x, p.y) for p in landmarks[0].parts()][17:]
        return []

    def __transform(self, image, coordinates, padding=0):
        '''Affine transform between image landmarks and "mean face"'''
        mat = self.__get_matrix(coordinates)
        mat = mat * (self.__ps.extract_size - 2 * padding)
        mat[:, 2] += padding
        return cv2.warpAffine(image, mat, self.__extract_dim, None, flags=cv2.INTER_LANCZOS4)

    def __check_reference(self, reference):
        '''Checks if the reference is a wild-card/directory/file and looks for encodings'''
        if reference == '*':
            return
        if path.isdir(reference):
            with tqdm(total=self.__file_count(reference), unit='files') as progress:
                for file in listdir(reference):
                    full_path = path.join(reference, file)
                    if path.isfile(full_path):
                        progress.update(1)
                        progress.set_description(f'Checking reference: {file}')
                        self.__parse_encoding(full_path)
            return
        if path.isfile(reference):
            if reference.endswith('.npz'):
                self.load(reference)
            else:
                self.__parse_encoding(reference)
            return
        raise ValueError(f'Invalid reference: {reference}')

    def __parse_encoding(self, image_path):
        '''Adds the first face encoding in an image to the reference encodings'''
        image = fr.load_image_file(image_path)
        encoding = fr.face_encodings(image, None, self.__rs.jitter)
        if np.any(encoding):
            self.__reference_encodings.append(encoding[0])
        else:
            tqdm.write(f'No encoding: {image_path}')

    def __recognise(self, face):
        '''Checks a given face against any known reference encodings.
        Returns total number of reference encodings below or equal to tolerance.
        Returns int in range [1, reference_count] if recognised.
        Returns 0 if not recognised/no encoding, -1 if matching all'''
        if not self.reference_count:
            return -1
        encoding = fr.face_encodings(face, None, self.__rs.jitter)
        if not np.any(encoding):
            return 0
        # maybe? np.mean(distance[np.where(distance <= self.__rs.tolerance)[0]])
        distance = fr.face_distance(self.__reference_encodings, encoding[0])
        return len(np.where(distance <= self.__rs.tolerance)[0])

    def __reset_frames(self):
        self.__process_frames.clear()
        self.__original_frames.clear()

    def __get_location_frames(self):
        '''Get the total faces detected and f.indexed locations/original/process based on hits.'''
        batch = fr.batch_face_locations(self.__process_frames, 1, self.__ps.batch_size)
        hits = np.nonzero(batch)[0]
        locations = np.asarray(batch)[hits]
        return (sum(len(x) for x in locations),
                zip(locations,
                    np.asarray(self.__original_frames)[hits],
                    np.asarray(self.__process_frames)[hits]))

    def __get_faces(self, image, face_locations):
        '''Get the quick cropped faces and scaled locations from
        a set of detect locations'''
        for location in face_locations:
            factor = int(1 / self.__ps.scale)
            scaled = tuple(factor * n for n in location)
            top, right, bottom, left = scaled
            yield (image[top:bottom, left:right], scaled)

    def __save_extract(self, image, file_path):
        '''Saves the image to file_path at the extract dimensions'''
        width, height, _ = np.shape(image)
        if (width, height) != self.__extract_dim:
            image = cv2.resize(image, self.__extract_dim, interpolation=cv2.INTER_LANCZOS4)
        cv2.imwrite(file_path, image)
        self.__total_extracted += 1
        if self.__ps.display_output:
            for point in self.__landmark_points(image):
                cv2.circle(image, point, 1, (0, 0, 255), -1)
            cv2.imshow('extracted', image)
            cv2.waitKey(1)

    def __get_fame(self, sequence, total_frames):
        '''Grabs, decodes and returns the next frame and number.'''
        for frame_number in range(total_frames):
            if self.__skip_frame(frame_number):
                continue
            sequence.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
            ret, frame = sequence.read()
            if not ret:
                break
            yield (frame, frame_number)

    def __do_batch(self, batch_count, output_path):
        '''Handles each batch of detected faces, performing recognition on each'''
        total, results = self.__get_location_frames()
        self.__reset_frames()
        extracted = 0
        with tqdm(total=total, unit='checks') as progress:
            progress.set_description(f'Batch #{batch_count}')
            if not total:
                return
            for locations, original, processed in results:
                if self.__ps.display_output:
                    self.__draw_detection(processed, locations)
                for face, location_scaled in self.__get_faces(original, locations):
                    progress.update(1)
                    progress.set_description(f'Batch #{batch_count} (recognised {extracted})')
                    recognised = self.__recognise(face)
                    if recognised:
                        points = self.__landmark_points(original, [location_scaled])
                        if np.any(points):
                            face = self.__transform(original, points, self.__extract_pad)
                        if self.__is_blurred(face, self.__ps.blur_threshold):
                            continue
                        name = path.join(output_path, f'{recognised}-{self.__total_extracted}.jpg')
                        self.__save_extract(face, name)
                        extracted += 1
                        # v.unlikely to have target multiple times
                        # break if we have references
                        if self.reference_count:
                            break

    def __skip_frame(self, number):
        '''We want every nth frame if skipping'''
        return self.__ps.skip_frames > 0 and number % self.__ps.skip_frames

    def __batch_builder(self, output_path, sequence, total_frames):
        '''Splits the fames in batches and keeps score'''
        tqdm.monitor_interval = 0
        with tqdm(total=total_frames, unit='frame') as progress:
            batch_count = 0
            for frame, frame_count in self.__get_fame(sequence, total_frames):
                progress.update(frame_count - progress.n)
                progress.set_description(f'Total (extracted {self.__total_extracted})')
                process_frame = self.__downsample(frame, self.__ps.scale)
                self.__process_frames.append(process_frame)
                self.__original_frames.append(frame)
                if self.__ps.display_output:
                    self.__draw_detection(process_frame, [])
                if len(self.__process_frames) == self.__ps.batch_size:
                    batch_count += 1
                    self.__do_batch(batch_count, output_path)

    def stats(self):
        '''Prints out statistics on the current references'''
        if not self.reference_count:
            print(f'Error: No reference encodings currently loaded')
            return
        matches = []
        distances = []
        for encoding in self.__reference_encodings:
            group = [x for x in self.__reference_encodings if not np.array_equal(x, encoding)]
            if group:
                matches.append(fr.compare_faces(group, encoding, self.__rs.tolerance))
                distances.append(fr.face_distance(group, encoding))
        print(f'Consistancy {round(np.average(matches) * 100, 2)}%')
        print(f'Dist. Avg. {round(np.average(distances), 3)}')
        print(f'Dist. SD. {round(np.std(distances), 3)}')

    def save(self, file_path):
        '''
        Saves references in compressed npz format to the given path
            :param str file_path: Path to save file (.npz extension will be added if not present)
        '''
        print(f'Saving {len(self.__reference_encodings)} to {file_path}.npz')
        np.savez_compressed(file_path, *self.__reference_encodings)

    def load(self, file_path):
        '''
        Loads references in compressed npz format from the given path.
        NB: Overwrites any existing encodings
            :param str file_path: Path to a .npz file generated from the save method
        '''
        npzfile = np.load(file_path)
        print(f'Loading {len(npzfile.files)} from {file_path}')
        self.__reference_encodings = [npzfile[key] for key in npzfile]

    def process(self, input_path, output_path='.'):
        '''
        Extracts known faces from the input source to the output.
            :param str input_path: Path to video or image sequence pattern
            :param str output_path: path to output directory
        '''
        self.__total_extracted = 0
        sequence = cv2.VideoCapture(input_path)
        frame_count = int(sequence.get(cv2.CAP_PROP_FRAME_COUNT))
        work = int(frame_count / self.__ps.skip_frames)
        batches = int(work / self.__ps.batch_size)
        print(f'Processing {input_path} (scale:{self.__ps.scale})')
        print(f'References {self.reference_count} (tolerance:{self.__rs.tolerance})')
        print(f'Checking {work}/{frame_count} frames ({batches} batches of {self.__ps.batch_size})')
        self.__batch_builder(output_path, sequence, frame_count)
        if self.__ps.display_output:
            cv2.destroyWindow('process')
            cv2.destroyWindow('extract')

 if __name__ == '__main__':
    import argparse

    class Range(object):
        '''Restricted range for float arguments'''
        def __init__(self, start, end):
            self.start = start
            self.end = end
        def __eq__(self, other):
            return self.start <= other <= self.end

    AP = argparse.ArgumentParser(description='''FaceGrab''')
    # Required settings
    AP.add_argument('-r', '--reference', type=str, required=True,
                    help=r'''Path to a single image file or a directory of reference images.
                    Also accepts the path to an .npz file generated by save_references.
                    Finally to skip recognition and extract all faces an asterix '*'
                    can be passed.''')
    AP.add_argument('-i', '--input', type=str, required=True,
                    help=r'''Path to a single file e.g. ./video/foo.mp4
    Or a path/pattern of an image sequence e.g. ./frames/img_%%04d.jpg
    (read like ./frames/img_0000.jpg, ./frames/img_0001.jpg, ./frames/img_0002.jpg, ...)''')
    AP.add_argument('-o', '--output', type=str, required=True,
                    help='''Path to output directory''')
    # Optional settings
    AP.add_argument('-ds', '--display_stats', action='store_true',
                    help='''Show the reference encoding statistics (model consistency).''')
    AP.add_argument('-sr', '--save_references', type=str,
                    help='''Save the references in .npz format.
                    This file can be loaded as a reference avoiding the need
                    to re-encode a set of images each time for the same person''')
    # Optional process settings
    AP.add_argument('-bs', '--batch_size', type=int, default=128, choices=range(2, 128),
                    metavar='[2-128]',
                    help='''How many frames to include in each GPU processing batch.''')
    AP.add_argument('-sf', '--skip_frames', type=int, default=6, choices=range(0, 1000),
                    metavar='[0-1000]',
                    help='''How many frames to skip e.g. 5 means look at every 6th''')
    AP.add_argument('-xs', '--extract_size', type=int, default=256, choices=range(32, 1024),
                    metavar='[32-1024]',
                    help='''Size in pixels of extracted face images (n*n).''')
    AP.add_argument('-s', '--scale', type=float, default=0.25, choices=[Range(0.1, 1.0)],
                    metavar='[0.1-1.0]',
                    help='''Factor to down-sample input by for detection processing.
    If you get too few matches try scaling by half e.g. 0.5''')
    AP.add_argument('-do', '--display_output', action='store_true',
                    help='''Show the detection and extraction images (slows processing).''')
    AP.add_argument('-bt', '--blur_threshold', type=float, default=10, choices=[Range(0, 9999)],
                    metavar='[0-9999]',
                    help='''Extract blur threshold, higher values are stricter. Default is 10''')
    # Optional recognition settings
    AP.add_argument('-t', '--tolerance', type=float, default=0.6, choices=[Range(0.1, 1.0)],
                    metavar='[0.1-1.0]',
                    help='''How much "distance" between faces to consider it a match.
    Lower is stricter. 0.6 is typical best performance''')
    AP.add_argument('-j', '--jitter', type=int, default=5, choices=range(1, 1000),
                    metavar='[1-1000]',
                    help='''How many times to re-sample images when
    calculating recognition encodings. Higher is more accurate, but slower.
    (100 is 100 times slower than 1).''')
    ARGS = AP.parse_args()
    RS = RecognitionSettings(tolerance=ARGS.tolerance, jitter=ARGS.jitter)
    PS = ProcessSettings(batch_size=ARGS.batch_size,
                         skip_frames=ARGS.skip_frames,
                         extract_size=ARGS.extract_size,
                         scale=ARGS.scale,
                         display_output=ARGS.display_output,
                         blur_threshold=ARGS.blur_threshold)
    FG = FaceGrab(ARGS.reference, RS, PS)
    if ARGS.display_stats:
        FG.stats()
    if ARGS.save_references:
        FG.save(ARGS.save_references)
    FG.process(ARGS.input, ARGS.output)
diff --git a/MODEL.npz b/MODEL.npz
diff --git a/requirements.txt b/requirements.txt
 opencv-python==3.4.0.12
 face-recognition==1.2.1
 numpy==1.14.0
 tqdm==4.19.5
 dlib==19.9.99
	set COMPILER="C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64"
	set PATH=%COMPILER%;%PATH%
	git clone https://github.com/davisking/dlib.git
	cd dlib
	mkdir build
	cd build
	cmake -G "Visual Studio 15 Win64" -T host=x64 -DUSE_AVX_INSTRUCTIONS=1 -DDLIB_USE_CUDA=1 -DCUDA_HOST_COMPILER=%COMPILER% ..
	cmake --build .
	cd ..
	python setup.py install --yes USE_AVX_INSTRUCTIONS --yes DLIB_USE_CUDA
	'''
	Extract a known face from a video.

	Uses a combination of a deep learning CNN model to batch detect faces
	in video frames, or a sequence of images, in GPU with CUDA and HoG to compare
	the detected faces with a computed reference set of face encodings.
	'''
	from os import path, listdir
	from typing import NamedTuple

	import cv2
	import numpy as np
	import face_recognition as fr
	from tqdm import tqdm

	class RecognitionSettings(NamedTuple):
	'''
	Face recognition settings
	:param float tolerance: How much "distance" between faces to consider it a match.
	:param int jitter: How many times to re-sample images when calculating encodings.
	'''
	tolerance: float = .6
	jitter: int = 5

	class ProcessSettings(NamedTuple):
	'''
	Video process settings
	:param int batch_size: How many frames to include in each GPU processing batch.
	:param int skip_frames: How many frames to skip e.g. 5 means look at every 6th
	:param int extract_size: Size in pixels of extracted face images (n*n).
	:param float scale: Amount to down-sample input by for detection processing.
	:param bool display_output: Show the detection and extraction images in process.
	:param float blur_threshold: Minimum edge variance to consider extract blurry.
	'''
	batch_size: int = 128
	skip_frames: int = 6
	extract_size: int = 256
	scale: float = .25
	display_output: bool = False
	blur_threshold: float = 10.0

	class FaceGrab(object):
	'''
	It sure grabs faces! (tm)
	:param str reference: Path to a input data (video/image sequence)
	:param RecognitionSettings recognition: Face recognition settings
	:param ProcessSettings process: Video process settings
	'''
	def __init__(self, reference, recognition=None, process=None):
	if recognition is None:
	recognition = RecognitionSettings()
	if process is None:
	process = ProcessSettings()
	skip_sanity = 1 if process.skip_frames <= 0 else process.skip_frames + 1
	self.__ps = process._replace(batch_size=np.clip(process.batch_size, 2, 128),
	skip_frames=skip_sanity,
	scale=np.clip(process.scale, 0, 1.0))
	self.__rs = recognition._replace(tolerance=np.clip(recognition.tolerance, 0.1, 1))
	self.__process_frames = []
	self.__original_frames = []
	self.__reference_encodings = []
	self.__total_extracted = 0
	self.__extract_dim = (self.__ps.extract_size, self.__ps.extract_size)
	self.__extract_pad = int(self.__ps.extract_size / 100 * 18)
	self.__check_reference(reference)

	_MEAN_FACE_TRANSPOSED = np.asarray([
	[-0.4899134, -0.41486446, -0.30899666, -0.19935666, -0.09672966,
	0.09672934, 0.19935634, 0.30899734, 0.41486434, 0.48991334,
	0.00000034, 0.00000034, 0.00000034, 0.00000034, -0.12324666,
	-0.06409066, 0.00000034, 0.06409034, 0.12324634, -0.36838966,
	-0.30300466, -0.22430166, -0.15552066, -0.22920866, -0.30738366,
	0.15552034, 0.22430134, 0.30300534, 0.36838934, 0.30738334,
	0.22920834, -0.23597766, -0.14914166, -0.06126866, 0.00000034,
	0.06126834, 0.14914134, 0.23597734, 0.15203234, 0.06659434,
	0.00000034, -0.06659466, -0.15203266, -0.19974766, -0.06203066,
	0.00000034, 0.06203034, 0.19974734, 0.06323734, 0.00000034,
	-0.06323666],
	[-0.35796968, -0.42550868, -0.44567548, -0.42993458, -0.38703308,
	-0.38703308, -0.42993458, -0.44567548, -0.42550868, -0.35796968,
	-0.26107168, -0.15741468, -0.05461868, 0.05120132, 0.12290232,
	0.14492132, 0.16368232, 0.14492132, 0.12290232, -0.24800068,
	-0.28566568, -0.28457168, -0.23269068, -0.21932468, -0.22034668,
	-0.23269068, -0.28457168, -0.28566568, -0.24800068, -0.22034668,
	-0.21932468, 0.31580932, 0.28098132, 0.26296432, 0.27815432,
	0.26296432, 0.28098132, 0.31580932, 0.40038132, 0.43776832,
	0.44485732, 0.43776832, 0.40038132, 0.32036832, 0.31432232,
	0.32091932, 0.31432232, 0.32036832, 0.35975832, 0.36737932,
	0.35975832]])

	@classmethod
	def __get_matrix(cls, face):
	'''
	2D face similarity transform with scaling
	adapted from _umeyama
	https://github.com/scikit-image/scikit-image/blob/master/skimage/transform/_geometric.py#L72
	http://web.stanford.edu/class/cs273/refs/umeyama.pdf
	'''
	mean_face = np.average(face, axis=0)
	mean_reference = np.asarray([0.49012666, 0.46442368])
	mean_delta = face - mean_face
	d = np.ones((2,))
	mat = np.eye(2, 3)
	U, s, V = np.linalg.svd(cls._MEAN_FACE_TRANSPOSED @ mean_delta / 51)
	mat[:2, :2] = U @ np.diag(d) @ V
	scale = 1.0 / mean_delta.var(0).sum() * s @ d
	mat[:2, 2] = mean_reference - scale * mat[:2, :2] @ mean_face.T
	mat[:2, :2] *= scale
	return mat

	@staticmethod
	def __is_blurred(image, threshold):
	'''Fast Laplacian edge variance check.
	Returns True if image edge variance is below threshold'''
	grayed = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	return cv2.Laplacian(grayed, cv2.CV_64F).var() < threshold

	@property
	def reference_count(self):
	'''Total currently loaded reference encodings for recognition'''
	return len(self.__reference_encodings)

	@staticmethod
	def __downsample(image, scale):
	'''Downscale and convert image for faster detection processing'''
	sampled = cv2.resize(image,
	(0, 0),
	fx=scale,
	fy=scale,
	interpolation=cv2.INTER_AREA) if scale > 0 else image
	return sampled[:, :, ::-1] #RGB->BGR

	@staticmethod
	def __file_count(directory):
	'''Returns the number of files in a directory'''
	return len([item for item in listdir(directory) if path.isfile(path.join(directory, item))])

	@staticmethod
	def __draw_detection(frame, locations):
	'''draws the process frames with face detection locations'''
	for (top, right, bottom, left) in locations:
	cv2.rectangle(frame, (left, top), (right, bottom), (255, 0, 0), 1)
	cv2.imshow('process', frame[:, :, ::-1]) #BGR->RGB
	cv2.waitKey(1)

	@staticmethod
	def __landmark_points(image, location=None):
	'''Face landmarks as coordinates (without the chin!)'''
	# NB: protected-access - eek!
	landmarks = fr.api._raw_face_landmarks(image, location)
	if np.any(landmarks):
	return [(p.x, p.y) for p in landmarks[0].parts()][17:]
	return []

	def __transform(self, image, coordinates, padding=0):
	'''Affine transform between image landmarks and "mean face"'''
	mat = self.__get_matrix(coordinates)
	mat = mat * (self.__ps.extract_size - 2 * padding)
	mat[:, 2] += padding
	return cv2.warpAffine(image, mat, self.__extract_dim, None, flags=cv2.INTER_LANCZOS4)

	def __check_reference(self, reference):
	'''Checks if the reference is a wild-card/directory/file and looks for encodings'''
	if reference == '*':
	return
	if path.isdir(reference):
	with tqdm(total=self.__file_count(reference), unit='files') as progress:
	for file in listdir(reference):
	full_path = path.join(reference, file)
	if path.isfile(full_path):
	progress.update(1)
	progress.set_description(f'Checking reference: {file}')
	self.__parse_encoding(full_path)
	return
	if path.isfile(reference):
	if reference.endswith('.npz'):
	self.load(reference)
	else:
	self.__parse_encoding(reference)
	return
	raise ValueError(f'Invalid reference: {reference}')

	def __parse_encoding(self, image_path):
	'''Adds the first face encoding in an image to the reference encodings'''
	image = fr.load_image_file(image_path)
	encoding = fr.face_encodings(image, None, self.__rs.jitter)
	if np.any(encoding):
	self.__reference_encodings.append(encoding[0])
	else:
	tqdm.write(f'No encoding: {image_path}')

	def __recognise(self, face):
	'''Checks a given face against any known reference encodings.
	Returns total number of reference encodings below or equal to tolerance.
	Returns int in range [1, reference_count] if recognised.
	Returns 0 if not recognised/no encoding, -1 if matching all'''
	if not self.reference_count:
	return -1
	encoding = fr.face_encodings(face, None, self.__rs.jitter)
	if not np.any(encoding):
	return 0
	# maybe? np.mean(distance[np.where(distance <= self.__rs.tolerance)[0]])
	distance = fr.face_distance(self.__reference_encodings, encoding[0])
	return len(np.where(distance <= self.__rs.tolerance)[0])

	def __reset_frames(self):
	self.__process_frames.clear()
	self.__original_frames.clear()

	def __get_location_frames(self):
	'''Get the total faces detected and f.indexed locations/original/process based on hits.'''
	batch = fr.batch_face_locations(self.__process_frames, 1, self.__ps.batch_size)
	hits = np.nonzero(batch)[0]
	locations = np.asarray(batch)[hits]
	return (sum(len(x) for x in locations),
	zip(locations,
	np.asarray(self.__original_frames)[hits],
	np.asarray(self.__process_frames)[hits]))

	def __get_faces(self, image, face_locations):
	'''Get the quick cropped faces and scaled locations from
	a set of detect locations'''
	for location in face_locations:
	factor = int(1 / self.__ps.scale)
	scaled = tuple(factor * n for n in location)
	top, right, bottom, left = scaled
	yield (image[top:bottom, left:right], scaled)

	def __save_extract(self, image, file_path):
	'''Saves the image to file_path at the extract dimensions'''
	width, height, _ = np.shape(image)
	if (width, height) != self.__extract_dim:
	image = cv2.resize(image, self.__extract_dim, interpolation=cv2.INTER_LANCZOS4)
	cv2.imwrite(file_path, image)
	self.__total_extracted += 1
	if self.__ps.display_output:
	for point in self.__landmark_points(image):
	cv2.circle(image, point, 1, (0, 0, 255), -1)
	cv2.imshow('extracted', image)
	cv2.waitKey(1)

	def __get_fame(self, sequence, total_frames):
	'''Grabs, decodes and returns the next frame and number.'''
	for frame_number in range(total_frames):
	if self.__skip_frame(frame_number):
	continue
	sequence.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
	ret, frame = sequence.read()
	if not ret:
	break
	yield (frame, frame_number)

	def __do_batch(self, batch_count, output_path):
	'''Handles each batch of detected faces, performing recognition on each'''
	total, results = self.__get_location_frames()
	self.__reset_frames()
	extracted = 0
	with tqdm(total=total, unit='checks') as progress:
	progress.set_description(f'Batch #{batch_count}')
	if not total:
	return
	for locations, original, processed in results:
	if self.__ps.display_output:
	self.__draw_detection(processed, locations)
	for face, location_scaled in self.__get_faces(original, locations):
	progress.update(1)
	progress.set_description(f'Batch #{batch_count} (recognised {extracted})')
	recognised = self.__recognise(face)
	if recognised:
	points = self.__landmark_points(original, [location_scaled])
	if np.any(points):
	face = self.__transform(original, points, self.__extract_pad)
	if self.__is_blurred(face, self.__ps.blur_threshold):
	continue
	name = path.join(output_path, f'{recognised}-{self.__total_extracted}.jpg')
	self.__save_extract(face, name)
	extracted += 1
	# v.unlikely to have target multiple times
	# break if we have references
	if self.reference_count:
	break

	def __skip_frame(self, number):
	'''We want every nth frame if skipping'''
	return self.__ps.skip_frames > 0 and number % self.__ps.skip_frames

	def __batch_builder(self, output_path, sequence, total_frames):
	'''Splits the fames in batches and keeps score'''
	tqdm.monitor_interval = 0
	with tqdm(total=total_frames, unit='frame') as progress:
	batch_count = 0
	for frame, frame_count in self.__get_fame(sequence, total_frames):
	progress.update(frame_count - progress.n)
	progress.set_description(f'Total (extracted {self.__total_extracted})')
	process_frame = self.__downsample(frame, self.__ps.scale)
	self.__process_frames.append(process_frame)
	self.__original_frames.append(frame)
	if self.__ps.display_output:
	self.__draw_detection(process_frame, [])
	if len(self.__process_frames) == self.__ps.batch_size:
	batch_count += 1
	self.__do_batch(batch_count, output_path)

	def stats(self):
	'''Prints out statistics on the current references'''
	if not self.reference_count:
	print(f'Error: No reference encodings currently loaded')
	return
	matches = []
	distances = []
	for encoding in self.__reference_encodings:
	group = [x for x in self.__reference_encodings if not np.array_equal(x, encoding)]
	if group:
	matches.append(fr.compare_faces(group, encoding, self.__rs.tolerance))
	distances.append(fr.face_distance(group, encoding))
	print(f'Consistancy {round(np.average(matches) * 100, 2)}%')
	print(f'Dist. Avg. {round(np.average(distances), 3)}')
	print(f'Dist. SD. {round(np.std(distances), 3)}')

	def save(self, file_path):
	'''
	Saves references in compressed npz format to the given path
	:param str file_path: Path to save file (.npz extension will be added if not present)
	'''
	print(f'Saving {len(self.__reference_encodings)} to {file_path}.npz')
	np.savez_compressed(file_path, *self.__reference_encodings)

	def load(self, file_path):
	'''
	Loads references in compressed npz format from the given path.
	NB: Overwrites any existing encodings
	:param str file_path: Path to a .npz file generated from the save method
	'''
	npzfile = np.load(file_path)
	print(f'Loading {len(npzfile.files)} from {file_path}')
	self.__reference_encodings = [npzfile[key] for key in npzfile]

	def process(self, input_path, output_path='.'):
	'''
	Extracts known faces from the input source to the output.
	:param str input_path: Path to video or image sequence pattern
	:param str output_path: path to output directory
	'''
	self.__total_extracted = 0
	sequence = cv2.VideoCapture(input_path)
	frame_count = int(sequence.get(cv2.CAP_PROP_FRAME_COUNT))
	work = int(frame_count / self.__ps.skip_frames)
	batches = int(work / self.__ps.batch_size)
	print(f'Processing {input_path} (scale:{self.__ps.scale})')
	print(f'References {self.reference_count} (tolerance:{self.__rs.tolerance})')
	print(f'Checking {work}/{frame_count} frames ({batches} batches of {self.__ps.batch_size})')
	self.__batch_builder(output_path, sequence, frame_count)
	if self.__ps.display_output:
	cv2.destroyWindow('process')
	cv2.destroyWindow('extract')

	if __name__ == '__main__':
	import argparse

	class Range(object):
	'''Restricted range for float arguments'''
	def __init__(self, start, end):
	self.start = start
	self.end = end
	def __eq__(self, other):
	return self.start <= other <= self.end

	AP = argparse.ArgumentParser(description='''FaceGrab''')
	# Required settings
	AP.add_argument('-r', '--reference', type=str, required=True,
	help=r'''Path to a single image file or a directory of reference images.
	Also accepts the path to an .npz file generated by save_references.
	Finally to skip recognition and extract all faces an asterix '*'
	can be passed.''')
	AP.add_argument('-i', '--input', type=str, required=True,
	help=r'''Path to a single file e.g. ./video/foo.mp4
	Or a path/pattern of an image sequence e.g. ./frames/img_%%04d.jpg
	(read like ./frames/img_0000.jpg, ./frames/img_0001.jpg, ./frames/img_0002.jpg, ...)''')
	AP.add_argument('-o', '--output', type=str, required=True,
	help='''Path to output directory''')
	# Optional settings
	AP.add_argument('-ds', '--display_stats', action='store_true',
	help='''Show the reference encoding statistics (model consistency).''')
	AP.add_argument('-sr', '--save_references', type=str,
	help='''Save the references in .npz format.
	This file can be loaded as a reference avoiding the need
	to re-encode a set of images each time for the same person''')
	# Optional process settings
	AP.add_argument('-bs', '--batch_size', type=int, default=128, choices=range(2, 128),
	metavar='[2-128]',
	help='''How many frames to include in each GPU processing batch.''')
	AP.add_argument('-sf', '--skip_frames', type=int, default=6, choices=range(0, 1000),
	metavar='[0-1000]',
	help='''How many frames to skip e.g. 5 means look at every 6th''')
	AP.add_argument('-xs', '--extract_size', type=int, default=256, choices=range(32, 1024),
	metavar='[32-1024]',
	help='''Size in pixels of extracted face images (n*n).''')
	AP.add_argument('-s', '--scale', type=float, default=0.25, choices=[Range(0.1, 1.0)],
	metavar='[0.1-1.0]',
	help='''Factor to down-sample input by for detection processing.
	If you get too few matches try scaling by half e.g. 0.5''')
	AP.add_argument('-do', '--display_output', action='store_true',
	help='''Show the detection and extraction images (slows processing).''')
	AP.add_argument('-bt', '--blur_threshold', type=float, default=10, choices=[Range(0, 9999)],
	metavar='[0-9999]',
	help='''Extract blur threshold, higher values are stricter. Default is 10''')
	# Optional recognition settings
	AP.add_argument('-t', '--tolerance', type=float, default=0.6, choices=[Range(0.1, 1.0)],
	metavar='[0.1-1.0]',
	help='''How much "distance" between faces to consider it a match.
	Lower is stricter. 0.6 is typical best performance''')
	AP.add_argument('-j', '--jitter', type=int, default=5, choices=range(1, 1000),
	metavar='[1-1000]',
	help='''How many times to re-sample images when
	calculating recognition encodings. Higher is more accurate, but slower.
	(100 is 100 times slower than 1).''')
	ARGS = AP.parse_args()
	RS = RecognitionSettings(tolerance=ARGS.tolerance, jitter=ARGS.jitter)
	PS = ProcessSettings(batch_size=ARGS.batch_size,
	skip_frames=ARGS.skip_frames,
	extract_size=ARGS.extract_size,
	scale=ARGS.scale,
	display_output=ARGS.display_output,
	blur_threshold=ARGS.blur_threshold)
	FG = FaceGrab(ARGS.reference, RS, PS)
	if ARGS.display_stats:
	FG.stats()
	if ARGS.save_references:
	FG.save(ARGS.save_references)
	FG.process(ARGS.input, ARGS.output)
	opencv-python==3.4.0.12
	face-recognition==1.2.1
	numpy==1.14.0
	tqdm==4.19.5
	dlib==19.9.99