Skip to content

Instantly share code, notes, and snippets.

@libcrack
Created July 24, 2025 15:23
Show Gist options
  • Select an option

  • Save libcrack/1a237924e7a672d945147721b63dcf8c to your computer and use it in GitHub Desktop.

Select an option

Save libcrack/1a237924e7a672d945147721b63dcf8c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3.11
# coding: utf-8
# Sun May 26 20:31:43 CEST 2024
"""
pip3.11 install openai
pip3.11 install langchain
pip3.11 install langchain-community
pip3.11 install langchain-openai
pip3.11 install chromadb
pip3.11 install tiktoken
"""
import os
from typing import List
# from langchain.vectorstores import Chroma
# from langchain.embeddings import OpenAIEmbeddings
# from langchain.llms import OpenAI
# from langchain.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings
# from langchain_openai import OpenAIEmbeddings
from langchain_community.llms import OpenAI
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.schema import Document
try:
openai_key = os.environ['OPENAI_API_KEY']
except BaseException:
print("Error: no $OPENAI_API_KEY env var")
sys.exit(1)
class ChatGPT:
def __init__(self, file_path: str):
self.file_path = file_path
self.loader = TextLoader(self.file_path, encoding="utf8")
self.documents = self.loader.load()
self.texts = self._text_split(self.documents)
self.vectordb = self._embed_texts(self.texts)
self.chatgpt = RetrievalQA.from_chain_type(
llm=OpenAI(),
chain_type="stuff",
retriever=self.vectordb.as_retriever()
)
@staticmethod
def _text_split(documents: List[Document]) -> List[Document]:
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=0)
return text_splitter.split_documents(documents)
@staticmethod
def _embed_texts(texts: List[Document]) -> Chroma:
embeddings = OpenAIEmbeddings()
return Chroma.from_documents(texts, embeddings)
def ask(self, query: str) -> str:
return self.chatgpt.run(query)
if __name__ == "__main__":
file_path = "pentest-azure.md"
prompt = "tell me the five most important surface discovery tools for pentesing Azure"
chatgpt = ChatGPT(file_path)
print(chatgpt.ask(prompt))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment