alexdong · July 18, 2023 22:40
diff --git a/EmbeddingVectorSearchEngine.swift b/EmbeddingVectorSearchEngine.swift
 import Accelerate
 import Dispatch
 import Foundation
 import os

 let logger = Logger(subsystem: "photos.keepers", category: "EmbeddingVectorSearchEngine")

 typealias Embedding = [Float32]
 typealias Distance = Float32

 final class EmbeddingVectorSearchEngine {
  static let shared = EmbeddingVectorSearchEngine()
  
  let fileURL: URL
  var uuids: [UUID] = []
  var embeddings: [Embedding] = []
  
  // private init prevents others from using the default initializer for this class
  private init() {
    let containerURL =  FileManager.default.containerURL(forSecurityApplicationGroupIdentifier: "group.photos.keepers.app")!
    fileURL = containerURL.appendingPathComponent("Embeddings.dat")
  }
  
  public func insert(_ inputs: [(UUID, Embedding)]) {
    /// First, update the in-memory records, then write it to disk
    uuids += inputs.map { $0.0 }
    embeddings += inputs.map { $0.1 }
    
    var data = Data()
    for (uuid, embedding) in inputs {
      let uuidTuple = uuid.uuid
      let uuidBytes: [UInt8] = [
        uuidTuple.0, uuidTuple.1, uuidTuple.2, uuidTuple.3,
        uuidTuple.4, uuidTuple.5, uuidTuple.6, uuidTuple.7,
        uuidTuple.8, uuidTuple.9, uuidTuple.10, uuidTuple.11,
        uuidTuple.12, uuidTuple.13, uuidTuple.14, uuidTuple.15
      ]
      data.append(contentsOf: uuidBytes)
      
      for floatVal in embedding {
        let floatValData = withUnsafeBytes(of: floatVal) { Data($0) }
        data.append(floatValData)
      }
    }
    
    if FileManager.default.fileExists(atPath: fileURL.path()) {
      if let fileUpdater = try? FileHandle(forWritingTo: fileURL) {
        fileUpdater.seekToEndOfFile()
        fileUpdater.write(data)
        fileUpdater.closeFile()
      }
    } else {
      try! data.write(to: fileURL)
    }
  }
  
  public func load() {
    let data = try! Data(contentsOf: fileURL, options: .alwaysMapped)
    let frameWidth = 16 + 1024 * 4
    let totalEmbeddings = data.count / frameWidth // calculate total number of embeddings to process
    
    // Preallocate memory with a placeholder UUID
    // This function preallocates memory for performance reasons. Preallocating memory:
    // 1) Helps to avoid memory fragmentation: Allocating all needed memory at once makes it more likely
    //    that the data will be stored contiguously in memory, improving access speed.
    // 2) Avoids a synchronisation point: When appending to an array from multiple threads, we need to
    //    synchronize access to the array which can become a bottleneck. By preallocating, each thread
    //    can write to a distinct region of the array, eliminating the need for synchronization.
    let placeholderUUID = UUID()
    uuids = [UUID](repeating: placeholderUUID, count: totalEmbeddings)
    embeddings = [Embedding](repeating: Embedding(repeating: 0.0, count: 1024), count: totalEmbeddings)
    
    DispatchQueue.concurrentPerform(iterations: totalEmbeddings) { index in
      let start = data.index(data.startIndex, offsetBy: index * frameWidth)
      
      let uuidEnd = data.index(start, offsetBy: 16)
      let uuidData = data[start..<uuidEnd]
      
      let uuidBytes = Array(uuidData)
      uuids[index] = UUID(uuid: (
        uuidBytes[0], uuidBytes[1], uuidBytes[2], uuidBytes[3],
        uuidBytes[4], uuidBytes[5], uuidBytes[6], uuidBytes[7],
        uuidBytes[8], uuidBytes[9], uuidBytes[10], uuidBytes[11],
        uuidBytes[12], uuidBytes[13], uuidBytes[14], uuidBytes[15]
      ))
      
      var embedding = Embedding(repeating: 0.0, count: 1024)
      var floatStart = uuidEnd
      for i in 0..<1024 {
        let floatEnd = data.index(floatStart, offsetBy: 4)
        let floatData = data[floatStart..<floatEnd]
        floatStart = floatEnd
        embedding[i] = floatData.withUnsafeBytes { $0.load(as: Float32.self) }
      }
      embeddings[index] = embedding
    }
  }
  
  public func find(input: Embedding, threshold: Distance) -> [UUID] {
    // The `find` function computes the similarity score between a given vector and a list of embedding vectors.
    // It runs tasks concurrently, which can result in a significant speedup. The number of
    // concurrent tasks is set to be three times the number of available CPU cores.
    //
    // Each task is responsible for a specific chunk of the embeddings list. An embedding vector is considered a match if
    // its similarity score with the input vector is greater than a given threshold.
    let numOfCores = ProcessInfo.processInfo.activeProcessorCount
    let numOfTasks = numOfCores * 3
    let chunkSize = (embeddings.count + numOfTasks - 1) / numOfTasks
    
    var results: [Int] = []
    let resultQueue = DispatchQueue(label: "photos.keepers.vectorSearch.find", attributes: .concurrent)
    DispatchQueue.concurrentPerform(iterations: numOfTasks) { taskIndex in
      let startIndex = taskIndex * chunkSize
      let endIndex = min(startIndex + chunkSize, embeddings.count)
      for i in startIndex..<endIndex {
        let distance = similarityScore(Array(input), embeddings[i])
        if distance > threshold {
          resultQueue.sync(flags: .barrier) {
            results.append(i)
          }
        }
      }
    }
    resultQueue.sync { }
    return results.map { uuids[$0] }
  }
  
  private func similarityScore(_ vector1: Embedding, _ vector2: Embedding) -> Distance {
    /* vDSP uses CPU's SIMD to perform the calculations. We could also use Metal Shader
     and Matrix operation to avoid using a for loop to call `similarityScore` for each.
     However, given the number of embeddings and the complexities of the Metal Shader,
     let's stick with vDSP for now.
     */
    var dotProduct: Float = 0.0
    var vector1Norm: Float = 0.0
    var vector2Norm: Float = 0.0
    let count = vDSP_Length(vector1.count)
    vDSP_dotpr(vector1, 1, vector2, 1, &dotProduct, count)
    vDSP_svesq(vector1, 1, &vector1Norm, count)
    vDSP_svesq(vector2, 1, &vector2Norm, count)
    return dotProduct / (sqrt(vector1Norm) * sqrt(vector2Norm))
  }
 }
	import Accelerate
	import Dispatch
	import Foundation
	import os

	let logger = Logger(subsystem: "photos.keepers", category: "EmbeddingVectorSearchEngine")

	typealias Embedding = [Float32]
	typealias Distance = Float32

	final class EmbeddingVectorSearchEngine {
	static let shared = EmbeddingVectorSearchEngine()

	let fileURL: URL
	var uuids: [UUID] = []
	var embeddings: [Embedding] = []

	// private init prevents others from using the default initializer for this class
	private init() {
	let containerURL = FileManager.default.containerURL(forSecurityApplicationGroupIdentifier: "group.photos.keepers.app")!
	fileURL = containerURL.appendingPathComponent("Embeddings.dat")
	}

	public func insert(_ inputs: [(UUID, Embedding)]) {
	/// First, update the in-memory records, then write it to disk
	uuids += inputs.map { $0.0 }
	embeddings += inputs.map { $0.1 }

	var data = Data()
	for (uuid, embedding) in inputs {
	let uuidTuple = uuid.uuid
	let uuidBytes: [UInt8] = [
	uuidTuple.0, uuidTuple.1, uuidTuple.2, uuidTuple.3,
	uuidTuple.4, uuidTuple.5, uuidTuple.6, uuidTuple.7,
	uuidTuple.8, uuidTuple.9, uuidTuple.10, uuidTuple.11,
	uuidTuple.12, uuidTuple.13, uuidTuple.14, uuidTuple.15
	]
	data.append(contentsOf: uuidBytes)

	for floatVal in embedding {
	let floatValData = withUnsafeBytes(of: floatVal) { Data($0) }
	data.append(floatValData)
	}
	}

	if FileManager.default.fileExists(atPath: fileURL.path()) {
	if let fileUpdater = try? FileHandle(forWritingTo: fileURL) {
	fileUpdater.seekToEndOfFile()
	fileUpdater.write(data)
	fileUpdater.closeFile()
	}
	} else {
	try! data.write(to: fileURL)
	}
	}

	public func load() {
	let data = try! Data(contentsOf: fileURL, options: .alwaysMapped)
	let frameWidth = 16 + 1024 * 4
	let totalEmbeddings = data.count / frameWidth // calculate total number of embeddings to process

	// Preallocate memory with a placeholder UUID
	// This function preallocates memory for performance reasons. Preallocating memory:
	// 1) Helps to avoid memory fragmentation: Allocating all needed memory at once makes it more likely
	// that the data will be stored contiguously in memory, improving access speed.
	// 2) Avoids a synchronisation point: When appending to an array from multiple threads, we need to
	// synchronize access to the array which can become a bottleneck. By preallocating, each thread
	// can write to a distinct region of the array, eliminating the need for synchronization.
	let placeholderUUID = UUID()
	uuids = [UUID](repeating: placeholderUUID, count: totalEmbeddings)
	embeddings = [Embedding](repeating: Embedding(repeating: 0.0, count: 1024), count: totalEmbeddings)

	DispatchQueue.concurrentPerform(iterations: totalEmbeddings) { index in
	let start = data.index(data.startIndex, offsetBy: index * frameWidth)

	let uuidEnd = data.index(start, offsetBy: 16)
	let uuidData = data[start..<uuidEnd]

	let uuidBytes = Array(uuidData)
	uuids[index] = UUID(uuid: (
	uuidBytes[0], uuidBytes[1], uuidBytes[2], uuidBytes[3],
	uuidBytes[4], uuidBytes[5], uuidBytes[6], uuidBytes[7],
	uuidBytes[8], uuidBytes[9], uuidBytes[10], uuidBytes[11],
	uuidBytes[12], uuidBytes[13], uuidBytes[14], uuidBytes[15]
	))

	var embedding = Embedding(repeating: 0.0, count: 1024)
	var floatStart = uuidEnd
	for i in 0..<1024 {
	let floatEnd = data.index(floatStart, offsetBy: 4)
	let floatData = data[floatStart..<floatEnd]
	floatStart = floatEnd
	embedding[i] = floatData.withUnsafeBytes { $0.load(as: Float32.self) }
	}
	embeddings[index] = embedding
	}
	}

	public func find(input: Embedding, threshold: Distance) -> [UUID] {
	// The `find` function computes the similarity score between a given vector and a list of embedding vectors.
	// It runs tasks concurrently, which can result in a significant speedup. The number of
	// concurrent tasks is set to be three times the number of available CPU cores.
	//
	// Each task is responsible for a specific chunk of the embeddings list. An embedding vector is considered a match if
	// its similarity score with the input vector is greater than a given threshold.
	let numOfCores = ProcessInfo.processInfo.activeProcessorCount
	let numOfTasks = numOfCores * 3
	let chunkSize = (embeddings.count + numOfTasks - 1) / numOfTasks

	var results: [Int] = []
	let resultQueue = DispatchQueue(label: "photos.keepers.vectorSearch.find", attributes: .concurrent)
	DispatchQueue.concurrentPerform(iterations: numOfTasks) { taskIndex in
	let startIndex = taskIndex * chunkSize
	let endIndex = min(startIndex + chunkSize, embeddings.count)
	for i in startIndex..<endIndex {
	let distance = similarityScore(Array(input), embeddings[i])
	if distance > threshold {
	resultQueue.sync(flags: .barrier) {
	results.append(i)
	}
	}
	}
	}
	resultQueue.sync { }
	return results.map { uuids[$0] }
	}

	private func similarityScore(_ vector1: Embedding, _ vector2: Embedding) -> Distance {
	/* vDSP uses CPU's SIMD to perform the calculations. We could also use Metal Shader
	and Matrix operation to avoid using a for loop to call `similarityScore` for each.
	However, given the number of embeddings and the complexities of the Metal Shader,
	let's stick with vDSP for now.
	*/
	var dotProduct: Float = 0.0
	var vector1Norm: Float = 0.0
	var vector2Norm: Float = 0.0
	let count = vDSP_Length(vector1.count)
	vDSP_dotpr(vector1, 1, vector2, 1, &dotProduct, count)
	vDSP_svesq(vector1, 1, &vector1Norm, count)
	vDSP_svesq(vector2, 1, &vector2Norm, count)
	return dotProduct / (sqrt(vector1Norm) * sqrt(vector2Norm))
	}
	}