Skip to content

Instantly share code, notes, and snippets.

@gyk
Created June 4, 2025 02:55
Show Gist options
  • Save gyk/a6567879a728aa98b4c27bac5b3bb20a to your computer and use it in GitHub Desktop.
Save gyk/a6567879a728aa98b4c27bac5b3bb20a to your computer and use it in GitHub Desktop.
(ns ollama-embd
(:require
[babashka.http-client :as http]
[cheshire.core :as json]
[clojure.string :as str]))
; https://github.com/ggml-org/llama.cpp/issues/6980#issuecomment-2602196900
(defn embed-text
"Embed text using the Ollama API."
[text]
(let [response (http/post
"http://localhost:11434/api/embed"
{:body (json/encode
{#_#_:model "EntropyYue/jina-embeddings-v2-base-zh"
:model "cwchang/jina-embeddings-v2-base-zh"
:input text})
:headers {:content-type "application/json"}})
{:keys [status body]} response]
(if (= 200 status)
(json/decode body true)
(throw (ex-info "Failed to embed text"
{:status status
:body body})))))
(defn embed-text*
[text]
(get-in (embed-text text) [:embeddings 0]))
(defn dot-product
"Calculate the dot product of two vectors."
[v1 v2]
(assert (= (count v1) (count v2))
"Vectors must be of the same length")
(reduce + (map * v1 v2)))
(defn cosine-similarity
"Calculate the cosine similarity between two vectors."
[v1 v2]
(let [dot-prod (dot-product v1 v2)
norm1 (Math/sqrt (dot-product v1 v1))
norm2 (Math/sqrt (dot-product v2 v2))]
(/ dot-prod (* norm1 norm2))))
(defn compute-similarity
[t1 t2]
(let [e1 (embed-text* t1)
e2 (embed-text* t2)]
(dot-product e1 e2)))
(comment
(def documents
["Llamas are members of the camelid family meaning they're pretty closely related to vicuñas and camels",
"Llamas were first domesticated and used as pack animals 4,000 to 5,000 years ago in the Peruvian highlands",
"Llamas can grow as much as 6 feet tall though the average llama between 5 feet 6 inches and 5 feet 9 inches tall",
"Llamas weigh between 280 and 450 pounds and can carry 25 to 30 percent of their body weight",
"Llamas are vegetarians and have very efficient digestive systems",
"Llamas live to be about 20 years old, though some only live for 15 years and others live to be 30 years old"])
(def query "What animals are llamas related to?")
(doseq [doc documents]
(let [similarity (compute-similarity query doc)]
(println "Query:" query)
(println "Document:" doc)
(println "Similarity:" similarity)
(println "-------------------"))))
(comment
(let [e1 (embed-text* "今天天气怎么样?")
e2 (embed-text* "How is the weather today?")
e3 (embed-text* "M8.2 级太阳耀斑引发 G4 级地磁风暴")
e4 (embed-text* "美国的加密货币战略储备规模有多大")
e5 (embed-text* "AI 耗电量预计将在年底超过比特币")]
(println "e1~e1" (dot-product e1 e1))
(println "e1~e2" (dot-product e1 e2))
(println "e2~e1" (dot-product e2 e1))
(println "e1~e3" (dot-product e1 e3))
(println "e1~e4" (dot-product e1 e4))
(println "e1~e5" (dot-product e1 e5))
(println "e2~e3" (dot-product e2 e3))
(println "e4~e5" (dot-product e4 e5)))
(compute-similarity "今天天气怎么样?"
"How is the weather today?")
(compute-similarity "你好"
"Hello")
(compute-similarity "你好"
"Hi")
(compute-similarity "Hi"
"Hello")
(compute-similarity (str/join " " (repeat 5 "Hi"))
(str/join " " (repeat 5 "Hello")))
(compute-similarity (str/join " " (repeat 50 "Hi"))
(str/join " " (repeat 50 "Hello")))
nil)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment