Last active
June 4, 2025 21:15
-
-
Save aurotripathy/ff1d98aac12ec65aa47610769596505d to your computer and use it in GitHub Desktop.
Kotaemon flowsettings.py file for configuring the project
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from importlib.metadata import version | |
from inspect import currentframe, getframeinfo | |
from pathlib import Path | |
from decouple import config | |
from ktem.utils.lang import SUPPORTED_LANGUAGE_MAP | |
from theflow.settings.default import * # noqa | |
cur_frame = currentframe() | |
if cur_frame is None: | |
raise ValueError("Cannot get the current frame.") | |
this_file = getframeinfo(cur_frame).filename | |
this_dir = Path(this_file).parent | |
# change this if your app use a different name | |
KH_PACKAGE_NAME = "kotaemon_app" | |
KH_APP_VERSION = config("KH_APP_VERSION", None) | |
if not KH_APP_VERSION: | |
try: | |
# Caution: This might produce the wrong version | |
# https://stackoverflow.com/a/59533071 | |
KH_APP_VERSION = version(KH_PACKAGE_NAME) | |
except Exception: | |
KH_APP_VERSION = "local" | |
KH_ENABLE_FIRST_SETUP = True | |
KH_DEMO_MODE = config("KH_DEMO_MODE", default=False, cast=bool) | |
KH_OLLAMA_URL = config("KH_OLLAMA_URL", default="http://localhost:11434/v1/") | |
# App can be ran from anywhere and it's not trivial to decide where to store app data. | |
# So let's use the same directory as the flowsetting.py file. | |
KH_APP_DATA_DIR = this_dir / "ktem_app_data" | |
KH_APP_DATA_EXISTS = KH_APP_DATA_DIR.exists() | |
KH_APP_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
# User data directory | |
KH_USER_DATA_DIR = KH_APP_DATA_DIR / "user_data" | |
KH_USER_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
# markdown output directory | |
KH_MARKDOWN_OUTPUT_DIR = KH_APP_DATA_DIR / "markdown_cache_dir" | |
KH_MARKDOWN_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
# chunks output directory | |
KH_CHUNKS_OUTPUT_DIR = KH_APP_DATA_DIR / "chunks_cache_dir" | |
KH_CHUNKS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
# zip output directory | |
KH_ZIP_OUTPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir" | |
KH_ZIP_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
# zip input directory | |
KH_ZIP_INPUT_DIR = KH_APP_DATA_DIR / "zip_cache_dir_in" | |
KH_ZIP_INPUT_DIR.mkdir(parents=True, exist_ok=True) | |
# HF models can be big, let's store them in the app data directory so that it's easier | |
# for users to manage their storage. | |
# ref: https://huggingface.co/docs/huggingface_hub/en/guides/manage-cache | |
os.environ["HF_HOME"] = str(KH_APP_DATA_DIR / "huggingface") | |
os.environ["HF_HUB_CACHE"] = str(KH_APP_DATA_DIR / "huggingface") | |
# doc directory | |
KH_DOC_DIR = this_dir / "docs" | |
KH_MODE = "dev" | |
KH_FEATURE_CHAT_SUGGESTION = config( | |
"KH_FEATURE_CHAT_SUGGESTION", default=False, cast=bool | |
) | |
KH_FEATURE_USER_MANAGEMENT = config( | |
"KH_FEATURE_USER_MANAGEMENT", default=True, cast=bool | |
) | |
KH_USER_CAN_SEE_PUBLIC = None | |
KH_FEATURE_USER_MANAGEMENT_ADMIN = str( | |
config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin") | |
) | |
KH_FEATURE_USER_MANAGEMENT_PASSWORD = str( | |
config("KH_FEATURE_USER_MANAGEMENT_PASSWORD", default="admin") | |
) | |
KH_ENABLE_ALEMBIC = False | |
KH_DATABASE = f"sqlite:///{KH_USER_DATA_DIR / 'sql.db'}" | |
KH_FILESTORAGE_PATH = str(KH_USER_DATA_DIR / "files") | |
KH_WEB_SEARCH_BACKEND = ( | |
"kotaemon.indices.retrievers.tavily_web_search.WebSearch" | |
# "kotaemon.indices.retrievers.jina_web_search.WebSearch" | |
) | |
KH_DOCSTORE = { | |
# "__type__": "kotaemon.storages.ElasticsearchDocumentStore", | |
# "__type__": "kotaemon.storages.SimpleFileDocumentStore", | |
"__type__": "kotaemon.storages.LanceDBDocumentStore", | |
"path": str(KH_USER_DATA_DIR / "docstore"), | |
} | |
KH_VECTORSTORE = { | |
# "__type__": "kotaemon.storages.LanceDBVectorStore", | |
"__type__": "kotaemon.storages.ChromaVectorStore", | |
# "__type__": "kotaemon.storages.MilvusVectorStore", | |
# "__type__": "kotaemon.storages.QdrantVectorStore", | |
"path": str(KH_USER_DATA_DIR / "vectorstore"), | |
} | |
KH_LLMS = {} | |
KH_EMBEDDINGS = {} | |
KH_RERANKINGS = {} | |
# populate options from config | |
if config("AZURE_OPENAI_API_KEY", default="") and config( | |
"AZURE_OPENAI_ENDPOINT", default="" | |
): | |
if config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""): | |
KH_LLMS["azure"] = { | |
"spec": { | |
"__type__": "kotaemon.llms.AzureChatOpenAI", | |
"temperature": 0, | |
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), | |
"api_key": config("AZURE_OPENAI_API_KEY", default=""), | |
"api_version": config("OPENAI_API_VERSION", default="") | |
or "2024-02-15-preview", | |
"azure_deployment": config("AZURE_OPENAI_CHAT_DEPLOYMENT", default=""), | |
"timeout": 20, | |
}, | |
"default": False, | |
} | |
if config("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default=""): | |
KH_EMBEDDINGS["azure"] = { | |
"spec": { | |
"__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings", | |
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""), | |
"api_key": config("AZURE_OPENAI_API_KEY", default=""), | |
"api_version": config("OPENAI_API_VERSION", default="") | |
or "2024-02-15-preview", | |
"azure_deployment": config( | |
"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT", default="" | |
), | |
"timeout": 10, | |
}, | |
"default": False, | |
} | |
if config("OPENAI_API_KEY", default=""): | |
KH_LLMS["openai"] = { | |
"spec": { | |
"__type__": "kotaemon.llms.ChatOpenAI", | |
"temperature": 0, | |
"base_url": config("OPENAI_API_BASE", default="") | |
or "https://api.openai.com/v1", | |
"api_key": config("OPENAI_API_KEY", default=""), | |
"model": config("OPENAI_CHAT_MODEL", default="gpt-3.5-turbo"), | |
"timeout": 20, | |
}, | |
"default": False, | |
} | |
KH_EMBEDDINGS["openai"] = { | |
"spec": { | |
"__type__": "kotaemon.embeddings.OpenAIEmbeddings", | |
"base_url": config("OPENAI_API_BASE", default="https://api.openai.com/v1"), | |
"api_key": config("OPENAI_API_KEY", default=""), | |
"model": config( | |
"OPENAI_EMBEDDINGS_MODEL", default="text-embedding-ada-002" | |
), | |
"timeout": 10, | |
"context_length": 8191, | |
}, | |
"default": False, | |
} | |
if config("LOCAL_MODEL", default=""): | |
KH_LLMS["ollama"] = { | |
"spec": { | |
"__type__": "kotaemon.llms.ChatOpenAI", | |
"base_url": KH_OLLAMA_URL, | |
"model": config("LOCAL_MODEL", default="llama3.1:8b"), | |
"api_key": "ollama", | |
}, | |
"default": False, | |
} | |
KH_EMBEDDINGS["ollama"] = { | |
"spec": { | |
"__type__": "kotaemon.embeddings.OpenAIEmbeddings", | |
"base_url": KH_OLLAMA_URL, | |
"model": config("LOCAL_MODEL_EMBEDDINGS", default="nomic-embed-text"), | |
"api_key": "ollama", | |
}, | |
"default": True, | |
} | |
KH_EMBEDDINGS["fast_embed"] = { | |
"spec": { | |
"__type__": "kotaemon.embeddings.FastEmbedEmbeddings", | |
"model_name": "BAAI/bge-base-en-v1.5", | |
}, | |
"default": False, | |
} | |
# additional LLM configurations | |
KH_LLMS["RNGD"] = { | |
"spec": { | |
"__type__": "kotaemon.llms.ChatOpenAI", | |
"base_url": "http://eval.furiosa.ai:32553/v1/", | |
"model": "EMPTY", | |
"api_key": "EMPTY", | |
}, | |
"default": True, | |
} | |
KH_LLMS["claude"] = { | |
"spec": { | |
"__type__": "kotaemon.llms.chats.LCAnthropicChat", | |
"model_name": "claude-3-5-sonnet-20240620", | |
"api_key": "your-key", | |
}, | |
"default": False, | |
} | |
KH_LLMS["google"] = { | |
"spec": { | |
"__type__": "kotaemon.llms.chats.LCGeminiChat", | |
"model_name": "gemini-1.5-flash", | |
"api_key": config("GOOGLE_API_KEY", default="your-key"), | |
}, | |
"default": False, | |
} | |
KH_LLMS["groq"] = { | |
"spec": { | |
"__type__": "kotaemon.llms.ChatOpenAI", | |
"base_url": "https://api.groq.com/openai/v1", | |
"model": "llama-3.1-8b-instant", | |
"api_key": "your-key", | |
}, | |
"default": False, | |
} | |
KH_LLMS["cohere"] = { | |
"spec": { | |
"__type__": "kotaemon.llms.chats.LCCohereChat", | |
"model_name": "command-r-plus-08-2024", | |
"api_key": config("COHERE_API_KEY", default="your-key"), | |
}, | |
"default": False, | |
} | |
# additional embeddings configurations | |
KH_EMBEDDINGS["cohere"] = { | |
"spec": { | |
"__type__": "kotaemon.embeddings.LCCohereEmbeddings", | |
"model": "embed-multilingual-v3.0", | |
"cohere_api_key": config("COHERE_API_KEY", default="your-key"), | |
"user_agent": "default", | |
}, | |
"default": False, | |
} | |
KH_EMBEDDINGS["google"] = { | |
"spec": { | |
"__type__": "kotaemon.embeddings.LCGoogleEmbeddings", | |
"model": "models/text-embedding-004", | |
"google_api_key": config("GOOGLE_API_KEY", default="your-key"), | |
} | |
} | |
# KH_EMBEDDINGS["huggingface"] = { | |
# "spec": { | |
# "__type__": "kotaemon.embeddings.LCHuggingFaceEmbeddings", | |
# "model_name": "sentence-transformers/all-mpnet-base-v2", | |
# }, | |
# "default": False, | |
# } | |
# default reranking models | |
KH_RERANKINGS["cohere"] = { | |
"spec": { | |
"__type__": "kotaemon.rerankings.CohereReranking", | |
"model_name": "rerank-multilingual-v2.0", | |
"cohere_api_key": config("COHERE_API_KEY", default=""), | |
}, | |
"default": True, | |
} | |
KH_REASONINGS = [ | |
"ktem.reasoning.simple.FullQAPipeline", | |
"ktem.reasoning.simple.FullDecomposeQAPipeline", | |
"ktem.reasoning.react.ReactAgentPipeline", | |
"ktem.reasoning.rewoo.RewooAgentPipeline", | |
] | |
KH_REASONINGS_USE_MULTIMODAL = config("USE_MULTIMODAL", default=False, cast=bool) | |
KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format( | |
config("AZURE_OPENAI_ENDPOINT", default=""), | |
config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"), | |
config("OPENAI_API_VERSION", default=""), | |
) | |
SETTINGS_APP: dict[str, dict] = {} | |
SETTINGS_REASONING = { | |
"use": { | |
"name": "Reasoning options", | |
"value": None, | |
"choices": [], | |
"component": "radio", | |
}, | |
"lang": { | |
"name": "Language", | |
"value": "en", | |
"choices": [(lang, code) for code, lang in SUPPORTED_LANGUAGE_MAP.items()], | |
"component": "dropdown", | |
}, | |
"max_context_length": { | |
"name": "Max context length (LLM)", | |
"value": 32000, | |
"component": "number", | |
}, | |
} | |
USE_NANO_GRAPHRAG = config("USE_NANO_GRAPHRAG", default=False, cast=bool) | |
USE_LIGHTRAG = config("USE_LIGHTRAG", default=True, cast=bool) | |
GRAPHRAG_INDEX_TYPES = ["ktem.index.file.graph.GraphRAGIndex"] | |
if USE_NANO_GRAPHRAG: | |
GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.NanoGraphRAGIndex") | |
if USE_LIGHTRAG: | |
GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.LightRAGIndex") | |
KH_INDEX_TYPES = [ | |
"ktem.index.file.FileIndex", | |
*GRAPHRAG_INDEX_TYPES, | |
] | |
GRAPHRAG_INDICES = [ | |
{ | |
"name": graph_type.split(".")[-1].replace("Index", "") | |
+ " Collection", # get last name | |
"config": { | |
"supported_file_types": ( | |
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, " | |
".pptx, .csv, .html, .mhtml, .txt, .md, .zip" | |
), | |
"private": False, | |
}, | |
"index_type": graph_type, | |
} | |
for graph_type in GRAPHRAG_INDEX_TYPES | |
] | |
KH_INDICES = [ | |
{ | |
"name": "File Collection", | |
"config": { | |
"supported_file_types": ( | |
".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, " | |
".pptx, .csv, .html, .mhtml, .txt, .md, .zip" | |
), | |
"private": False, | |
}, | |
"index_type": "ktem.index.file.FileIndex", | |
}, | |
*GRAPHRAG_INDICES, | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment