Created
October 18, 2023 04:09
-
-
Save khubo/462d87d361530de8002dbe51c7db274d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fetch from 'node-fetch'; | |
import mongoose from 'mongoose'; | |
import { config } from 'dotenv'; | |
config(); | |
const HF_TOKEN = process.env.HF_TOKEN; | |
const EMBEDDING_URI = process.env.EMBEDDING_URI; | |
// uses the hugging face api to generate embeddings. | |
export async function generateEmbedding(text) { | |
const rsponse = await fetch(EMBEDDING_URI, { | |
method: 'post', | |
body: JSON.stringify({ | |
inputs: text, | |
}), | |
headers: { | |
'Content-Type': 'application/json', | |
Authorization: `Bearer ${HF_TOKEN}`, | |
}, | |
}); | |
const data = await rsponse.json(); | |
return data; | |
} | |
const textSchema = new mongoose.Schema({ | |
name: String, | |
text: String, | |
textEmbedding: [Number], | |
}); | |
const Text = mongoose.model('text', textSchema); | |
const data = [ | |
{ | |
text: 'this is just random text without much data in it', | |
name: 'random', | |
}, | |
{ | |
text: 'A very meaningful line of text taken from harry potter', | |
name: 'harry potter', | |
}, | |
{ | |
text: 'this is just hhello world text', | |
name: 'hello', | |
}, | |
]; | |
async function insert() { | |
await mongoose.connect(process.env.MONGO_URI); | |
for (let entry of data) { | |
const textEmbedding = await generateEmbedding(entry.text); | |
const sampleText = new Text({ | |
...entry, | |
textEmbedding, | |
}); | |
await sampleText.save(); | |
console.log('saved entry', entry.name); | |
} | |
} | |
async function query(text) { | |
await mongoose.connect(process.env.MONGO_URI); | |
const embedding = await generateEmbedding(text); | |
const result = await Text.aggregate([ | |
{ | |
$vectorSearch: { | |
queryVector: embedding, | |
path: 'textEmbedding', | |
numCandidates: 100, | |
limit: 1, | |
index: 'textInd', | |
}, | |
}, | |
]); | |
for await (const doc of result) { | |
console.log('doc is', doc.name); | |
console.log('doc text', doc.text); | |
console.log('------------'); | |
} | |
} | |
query('something meaningful').catch(console.error); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment