aminnj · May 20, 2024 06:15 · aminnj · May 20, 2024 · aminnj · May 20, 2024
diff --git a/embeddingsearch.py b/embeddingsearch.py
 import functools
 import jax
 import numpy as np
 import jax.numpy as jnp
 from sentence_transformers import SentenceTransformer

 model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 model = model.to("cpu")

 sentences = [
    "This is the first sentence.",
    "Here is another sentence.",
    "Cats are red",
    "Dogs are blue",
    "Venus is square",
    "Math is hard",
    "English is also hard",
    "Need another sentence",
    "Almost there.",
    "Tenth sentence is the last one",
 ]

 dbvecs = model.encode(sentences, precision="ubinary")

 # pack uint8s into uint32 (4x reduction in vec size)
 # since the popcount operation is a 64bit operation, this gives a 4x speedup
 # we would want to pack it into uint64, but jax doesn't support it
 dbvecs = dbvecs.view("uint32")

 dbvecs = jnp.array(dbvecs)

 # simulate having 1M vectors to search
 dbvecs = jnp.vstack([dbvecs]*100_000)
 sentences = sentences*100_000

 @functools.partial(jax.jit, static_argnames=["k", "recall_target"])
 def get_nearest_k(qvec, dbvecs, k=5, recall_target=0.95):
    xor_result = jax.lax.bitwise_xor(qvec, dbvecs)

    # Compute the population count (number of 1 bits) and sum along the last axis
    dists = jax.lax.population_count(xor_result).sum(axis=-1)
    dists = dists.astype(jnp.float32)
    
    # min was slow for some reason, so using max with flipped distances
    dists, indices = jax.lax.approx_max_k(-dists, k=k, recall_target=recall_target)
    return -dists, indices

 t0 = time.time()
 qvec = model.encode(["Difficult school subject"], precision="ubinary")
 qvec = qvec.view("uint32")
 qvec = jnp.array(qvec)
 t1 = time.time()
 print(f"Encoded query string into vector in {(t1-t0)*1000:.1f}ms")

 # warmup
 _ = get_nearest_k(qvec, dbvecs[:1000])

 t0 = time.time()
 dists, indices = get_nearest_k(qvec, dbvecs)
 t1 = time.time()
 print(f"Searched {len(dbvecs)} vectors in {(t1-t0)*1000:.1f}ms")

 print(
    np.vstack([np.array(sentences)[indices], dists])
 )
 """
 Encoded query string into vector in 12.8ms
 Searched 1000000 vectors in 15.5ms
 [['Math is hard' 'Math is hard' 'Math is hard' 'Math is hard' 'Math is hard']
 ['114.0' '114.0' '114.0' '114.0' '114.0']]
 """
	import functools
	import jax
	import numpy as np
	import jax.numpy as jnp
	from sentence_transformers import SentenceTransformer

	model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
	model = model.to("cpu")

	sentences = [
	"This is the first sentence.",
	"Here is another sentence.",
	"Cats are red",
	"Dogs are blue",
	"Venus is square",
	"Math is hard",
	"English is also hard",
	"Need another sentence",
	"Almost there.",
	"Tenth sentence is the last one",
	]

	dbvecs = model.encode(sentences, precision="ubinary")

	# pack uint8s into uint32 (4x reduction in vec size)
	# since the popcount operation is a 64bit operation, this gives a 4x speedup
	# we would want to pack it into uint64, but jax doesn't support it
	dbvecs = dbvecs.view("uint32")

	dbvecs = jnp.array(dbvecs)

	# simulate having 1M vectors to search
	dbvecs = jnp.vstack([dbvecs]*100_000)
	sentences = sentences*100_000

	@functools.partial(jax.jit, static_argnames=["k", "recall_target"])
	def get_nearest_k(qvec, dbvecs, k=5, recall_target=0.95):
	xor_result = jax.lax.bitwise_xor(qvec, dbvecs)

	# Compute the population count (number of 1 bits) and sum along the last axis
	dists = jax.lax.population_count(xor_result).sum(axis=-1)
	dists = dists.astype(jnp.float32)

	# min was slow for some reason, so using max with flipped distances
	dists, indices = jax.lax.approx_max_k(-dists, k=k, recall_target=recall_target)
	return -dists, indices

	t0 = time.time()
	qvec = model.encode(["Difficult school subject"], precision="ubinary")
	qvec = qvec.view("uint32")
	qvec = jnp.array(qvec)
	t1 = time.time()
	print(f"Encoded query string into vector in {(t1-t0)*1000:.1f}ms")

	# warmup
	_ = get_nearest_k(qvec, dbvecs[:1000])

	t0 = time.time()
	dists, indices = get_nearest_k(qvec, dbvecs)
	t1 = time.time()
	print(f"Searched {len(dbvecs)} vectors in {(t1-t0)*1000:.1f}ms")

	print(
	np.vstack([np.array(sentences)[indices], dists])
	)
	"""
	Encoded query string into vector in 12.8ms
	Searched 1000000 vectors in 15.5ms
	[['Math is hard' 'Math is hard' 'Math is hard' 'Math is hard' 'Math is hard']
	['114.0' '114.0' '114.0' '114.0' '114.0']]
	"""