Skip to content

Instantly share code, notes, and snippets.

@omriel1
Last active November 19, 2024 08:02
Show Gist options
  • Save omriel1/3b8ea57cc14b896237c47d5417eaec8f to your computer and use it in GitHub Desktop.
Save omriel1/3b8ea57cc14b896237c47d5417eaec8f to your computer and use it in GitHub Desktop.
Hybrid Search with LangChain-Milvus
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_milvus.utils.sparse import BM25SparseEmbedding
from langchain_milvus.vectorstores import Milvus
# Using:
# langchain_milvus==0.1.6
# langchain-huggingface==0.1.0
# pymilvus[model]==2.4.8
DOCUMENTS = [
"Today was very warm during the day but cold at night",
"In Israel, Hot is a TV provider that broadcasts 7 days a week",
]
QUERY_1 = "What is the weather? is it hot?"
QUERY_2 = "Does Hot cover weather changes during weekends?"
def example(query: str) -> None:
sparse_embedding = BM25SparseEmbedding(corpus=DOCUMENTS + [query])
dense_embedding = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vector_store = Milvus(
embedding_function=[
sparse_embedding,
dense_embedding,
],
connection_args={"uri": "./milvus.db"}, # used for milvus-lite
drop_old=True,
auto_id=True,
)
vector_store.add_texts(DOCUMENTS)
sparse_output = vector_store.similarity_search(
query=query,
k=1,
ranker_type="weighted",
ranker_params={"weights": [1.0, 0.0]}, # Use the first embeddings results only
)
dense_output = vector_store.similarity_search(
query=query,
k=1,
ranker_type="weighted",
ranker_params={"weights": [0.0, 1.0]}, # Use the second embeddings results only
)
hybrid_output = vector_store.similarity_search(
query=query,
k=1,
ranker_type="weighted",
ranker_params={"weights": [0.49, 0.51]}, # Combine both results!
)
print("-"*60)
print(f"Query: {query}\n")
print(f"Sparse embeddings results:\n{sparse_output[0].page_content}\n")
print(f"Dense embeddings results:\n{dense_output[0].page_content}\n")
print(f"Hybrid search results:\n{hybrid_output[0].page_content}\n")
if __name__ == "__main__":
example(QUERY_1)
example(QUERY_2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment