Created
December 11, 2023 01:08
-
-
Save bendangelo/53f36aff187e64fb0357f6f1c10223f7 to your computer and use it in GitHub Desktop.
Add Chromadb methods to your Rails Models. This is a model concern.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Add to model: | |
# include Chromable | |
# chroma do | |
# hnsw_space :cosine | |
# embedding :name | |
# document :label | |
# metadata :name | |
# end | |
# Usage: | |
# Hint.upsert Hint.all | |
# @hints = Hint.chroma_query "dogs", where: {name: {"$ne": "dogs"}} | |
# Implement own embedding service: embeddings = EmbeddingsService.call texts | |
# I used https://github.com/michaelfeil/infinity | |
module Chromable | |
extend ActiveSupport::Concern | |
included do | |
class_attribute :chroma_configuration | |
end | |
class_methods do | |
def chroma(&block) | |
self.chroma_configuration = ChromaConfiguration.new | |
self.chroma_configuration.instance_eval(&block) if block_given? | |
end | |
def chroma_query query, results: 10, where: {}, where_document: {}, include: %w[documents] | |
embedding = EmbeddingsService.call query | |
col = chroma_collection | |
col.query query_embeddings: [embedding[0]], results: results, where: where, where_document: where_document, include: include | |
end | |
def chroma_collection | |
@@col ||= Chroma::Resources::Collection.get_or_create self.chroma_collection_name, { | |
"hnsw:space": self.chroma_configuration.hnsw_space_param.to_s | |
} | |
end | |
def chroma_delete | |
Chroma::Resources::Collection.delete chroma_collection_name | |
end | |
def chroma_collection_name | |
if Rails.env.test? | |
"#{self.table_name}_test" | |
else | |
self.table_name | |
end | |
end | |
def chroma_count | |
chroma_collection.count | |
end | |
def chroma_delete ids: nil, where: {}, where_document: {} | |
if ids.present? | |
ids = ids.map {|i| i.to_s } | |
end | |
chroma_collection.delete ids: ids, where: where, where_document: where_document | |
end | |
def chroma_get(ids: nil, where: {}, sort: nil, limit: nil, offset: nil, page: nil, page_size: nil, where_document: {}, include: %w[documents]) | |
if ids.present? | |
ids = ids.map {|i| i.to_s } | |
end | |
chroma_collection.get ids: ids, where: where, sort: sort, limit: limit, offset: offset, page: page, page_size: page_size, where_document: where_document, include: include | |
end | |
def chroma_upsert items | |
texts = items.pluck self.chroma_configuration.embedding_name.to_sym | |
embeddings = EmbeddingsService.call texts | |
objs = items.map.with_index do |h, i| | |
h.to_embedding embeddings[i] | |
end | |
chroma_collection.upsert objs | |
end | |
end | |
def to_embedding embedding=nil | |
embedding_hash = {id: id.to_s, embedding: embedding} | |
if self.class.chroma_configuration | |
embedding_hash.merge!(self.class.chroma_configuration.to_embedding_hash(self)) | |
end | |
Chroma::Resources::Embedding.new **embedding_hash | |
end | |
class ChromaConfiguration | |
attr_accessor :embedding_name, :document_name, :metadata_names, :hnsw_space_param | |
def initialize | |
self.hnsw_space_param = :l2 | |
end | |
def embedding(name) | |
self.embedding_name = name | |
end | |
def document(name) | |
self.document_name = name | |
end | |
def hnsw_space(name) | |
self.hnsw_space_param = name | |
end | |
def metadata(*names) | |
self.metadata_names = names | |
end | |
def to_embedding_hash(model_instance) | |
embedding_hash = {} | |
if document_name | |
embedding_hash[:document] = model_instance.send(document_name) | |
end | |
if metadata_names | |
metadata_hash = {} | |
metadata_names.each do |metadata_name| | |
metadata_hash[metadata_name.to_s] = model_instance.send(metadata_name) | |
end | |
embedding_hash[:metadata] = metadata_hash | |
end | |
embedding_hash | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I've found the issue. Searching takes a lot of cpu resources, so if the server is constrained for cpu time searching will slow to a crawl (postgres and other services don't have this issue). There must be some optimization on Chroma's side to fix this.