Skip to content

Instantly share code, notes, and snippets.

@proudzhu
Last active March 9, 2025 14:50
Show Gist options
  • Save proudzhu/ca31e191d0403f32e609f8b38950e08e to your computer and use it in GitHub Desktop.
Save proudzhu/ca31e191d0403f32e609f8b38950e08e to your computer and use it in GitHub Desktop.
rag with ollama and langchain
# https://medium.com/@danushidk507/rag-with-llama-using-ollama-a-deep-dive-into-retrieval-augmented-generation-c58b9a1cfcd3
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_ollama import OllamaLLM
from langchain.chains import RetrievalQA
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# Load the document
loader = PyPDFLoader("rag.pdf")
documents = loader.load()
# Split the document into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator="\n")
docs = text_splitter.split_documents(documents=documents)
# Load embedding model
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cpu"}
embeddings = HuggingFaceEmbeddings(
model_name=embedding_model_name,
model_kwargs=model_kwargs
)
# Create FAISS vector store
vectorstore = FAISS.from_documents(docs, embeddings)
# Save and reload the vector store
vectorstore.save_local("faiss_index_")
persisted_vectorstore = FAISS.load_local("faiss_index_", embeddings, allow_dangerous_deserialization=True)
# Create a retriever
retriever = persisted_vectorstore.as_retriever()
# Initialize the LLaMA model
llm = OllamaLLM(
model="deepseek-r1:8b",
base_url="http://127.0.0.1:11434",
callbacks=[StreamingStdOutCallbackHandler()])
# Test with a sample prompt
# response = llm.invoke("Tell me a joke")
# print(response)
# Create RetrievalQA
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever)
# Interactive query loop
while True:
query = input("Type your query (or type 'Exit' to quit): \n")
if query.lower() == "exit":
break
result = qa.invoke(query)
print("")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment