Last active
November 19, 2024 08:02
-
-
Save omriel1/3b8ea57cc14b896237c47d5417eaec8f to your computer and use it in GitHub Desktop.
Hybrid Search with LangChain-Milvus
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_milvus.utils.sparse import BM25SparseEmbedding | |
from langchain_milvus.vectorstores import Milvus | |
# Using: | |
# langchain_milvus==0.1.6 | |
# langchain-huggingface==0.1.0 | |
# pymilvus[model]==2.4.8 | |
DOCUMENTS = [ | |
"Today was very warm during the day but cold at night", | |
"In Israel, Hot is a TV provider that broadcasts 7 days a week", | |
] | |
QUERY_1 = "What is the weather? is it hot?" | |
QUERY_2 = "Does Hot cover weather changes during weekends?" | |
def example(query: str) -> None: | |
sparse_embedding = BM25SparseEmbedding(corpus=DOCUMENTS + [query]) | |
dense_embedding = HuggingFaceEmbeddings( | |
model_name="sentence-transformers/all-MiniLM-L6-v2" | |
) | |
vector_store = Milvus( | |
embedding_function=[ | |
sparse_embedding, | |
dense_embedding, | |
], | |
connection_args={"uri": "./milvus.db"}, # used for milvus-lite | |
drop_old=True, | |
auto_id=True, | |
) | |
vector_store.add_texts(DOCUMENTS) | |
sparse_output = vector_store.similarity_search( | |
query=query, | |
k=1, | |
ranker_type="weighted", | |
ranker_params={"weights": [1.0, 0.0]}, # Use the first embeddings results only | |
) | |
dense_output = vector_store.similarity_search( | |
query=query, | |
k=1, | |
ranker_type="weighted", | |
ranker_params={"weights": [0.0, 1.0]}, # Use the second embeddings results only | |
) | |
hybrid_output = vector_store.similarity_search( | |
query=query, | |
k=1, | |
ranker_type="weighted", | |
ranker_params={"weights": [0.49, 0.51]}, # Combine both results! | |
) | |
print("-"*60) | |
print(f"Query: {query}\n") | |
print(f"Sparse embeddings results:\n{sparse_output[0].page_content}\n") | |
print(f"Dense embeddings results:\n{dense_output[0].page_content}\n") | |
print(f"Hybrid search results:\n{hybrid_output[0].page_content}\n") | |
if __name__ == "__main__": | |
example(QUERY_1) | |
example(QUERY_2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment