Last active
March 9, 2025 14:50
-
-
Save proudzhu/ca31e191d0403f32e609f8b38950e08e to your computer and use it in GitHub Desktop.
rag with ollama and langchain
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://medium.com/@danushidk507/rag-with-llama-using-ollama-a-deep-dive-into-retrieval-augmented-generation-c58b9a1cfcd3 | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import FAISS | |
from langchain_ollama import OllamaLLM | |
from langchain.chains import RetrievalQA | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
# Load the document | |
loader = PyPDFLoader("rag.pdf") | |
documents = loader.load() | |
# Split the document into chunks | |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n") | |
docs = text_splitter.split_documents(documents=documents) | |
# Load embedding model | |
embedding_model_name = "sentence-transformers/all-mpnet-base-v2" | |
model_kwargs = {"device": "cpu"} | |
embeddings = HuggingFaceEmbeddings( | |
model_name=embedding_model_name, | |
model_kwargs=model_kwargs | |
) | |
# Create FAISS vector store | |
vectorstore = FAISS.from_documents(docs, embeddings) | |
# Save and reload the vector store | |
vectorstore.save_local("faiss_index_") | |
persisted_vectorstore = FAISS.load_local("faiss_index_", embeddings, allow_dangerous_deserialization=True) | |
# Create a retriever | |
retriever = persisted_vectorstore.as_retriever() | |
# Initialize the LLaMA model | |
llm = OllamaLLM( | |
model="deepseek-r1:8b", | |
base_url="http://127.0.0.1:11434", | |
callbacks=[StreamingStdOutCallbackHandler()]) | |
# Test with a sample prompt | |
# response = llm.invoke("Tell me a joke") | |
# print(response) | |
# Create RetrievalQA | |
qa = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=retriever) | |
# Interactive query loop | |
while True: | |
query = input("Type your query (or type 'Exit' to quit): \n") | |
if query.lower() == "exit": | |
break | |
result = qa.invoke(query) | |
print("") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment