Skip to content

Instantly share code, notes, and snippets.

@mizchi
Last active July 4, 2025 16:25
Show Gist options
  • Save mizchi/de8a19ef473c1fe881c4a541a415bcc2 to your computer and use it in GitHub Desktop.
Save mizchi/de8a19ef473c1fe881c4a541a415bcc2 to your computer and use it in GitHub Desktop.
My Portable RAG
/**
* My Portable RAG
* $ pnpm add sqlite-vec @ai-sdk/google ai
* SQLite Vector Search + Google AI Embeddings
*
* Required environment variables:
* GOOGLE_GENERATIVE_AI_API_KEY=your-api-key
*
* Usage:
* # Index text content
* node rag.ts index --db=index.db --text="Content to search" --title="Article Title" --url="https://example.com"
*
* # Index file content
* node rag.ts index --db=index.db --file=document.txt --title="Document"
*
* # Search
* node rag.ts query --db=index.db "search query"
*
* Options:
* --db: Database file path (default: index.db)
* -t, --text: Text to index
* -f, --file: File path to index
* --title: Document title (optional)
* --url: Document URL (optional)
* -k: Number of search results (default: 5)
*/
import { DatabaseSync } from "node:sqlite";
import * as sqliteVec from "sqlite-vec";
import { google } from "@ai-sdk/google";
import { embed, embedMany } from "ai";
const DIMENSION = 768; // Google Embedding dimension
const DEFAULT_DB_PATH = "index.db";
export function chunk(text: string, size = 1000, overlap = 100): string[] {
const chunks: string[] = [];
for (let i = 0; i < text.length; i += size - overlap) {
chunks.push(text.slice(i, i + size));
}
return chunks;
}
export function rerank<T extends { content: string; score: number }>(
query: string,
results: T[]
): T[] {
const words = query.toLowerCase().split(/\s+/);
return results
.map((r) => ({
...r,
score:
r.score +
words.filter((w) => r.content.toLowerCase().includes(w)).length * 0.1,
}))
.sort((a, b) => b.score - a.score);
}
export function createDB(path = ":memory:"): DatabaseSync {
const db = new DatabaseSync(path, { allowExtension: true });
sqliteVec.load(db);
db.exec(`
CREATE TABLE IF NOT EXISTS items (
id INTEGER PRIMARY KEY,
content TEXT,
title TEXT,
url TEXT
);
CREATE VIRTUAL TABLE IF NOT EXISTS vec_items USING vec0(embedding float[${DIMENSION}]);
`);
return db;
}
export function save(
db: DatabaseSync,
content: string,
embedding: number[],
metadata?: { title?: string; url?: string }
): number {
const result = db
.prepare("INSERT INTO items (content, title, url) VALUES (?, ?, ?)")
.run(content, metadata?.title || null, metadata?.url || null);
const id = result.lastInsertRowid;
db.prepare("INSERT INTO vec_items (rowid, embedding) VALUES (?, ?)").run(
BigInt(id),
new Uint8Array(new Float32Array(embedding).buffer)
);
return Number(id);
}
export function search(
db: DatabaseSync,
embedding: number[],
k = 5
): Array<{
id: number;
content: string;
title: string | null;
url: string | null;
distance: number;
}> {
return db
.prepare(
`
SELECT i.id, i.content, i.title, i.url, v.distance
FROM vec_items v
JOIN items i ON i.id = v.rowid
WHERE v.embedding MATCH ? AND k = ?
ORDER BY v.distance
`
)
.all(new Uint8Array(new Float32Array(embedding).buffer), k);
}
export async function index(
db: DatabaseSync,
text: string,
metadata?: { title?: string; url?: string },
chunkSize = 1000
): Promise<number[]> {
const chunks = chunk(text, chunkSize);
// Batchable
const { embeddings } = await embedMany({
model: google.textEmbeddingModel("text-embedding-004"),
values: chunks,
});
const ids: number[] = [];
for (let i = 0; i < chunks.length; i++) {
const chunkText = chunks[i];
const embedding = embeddings[i] || Array(DIMENSION).fill(0);
ids.push(save(db, chunkText, embedding, metadata));
}
return ids;
}
// RAG search
export async function ragSearch(
db: DatabaseSync,
query: string,
k = 5
): Promise<
Array<{
content: string;
title: string | null;
url: string | null;
score: number;
}>
> {
const { embedding } = await embed({
model: google.textEmbeddingModel("text-embedding-004"),
value: query,
});
const results = search(db, embedding, k * 2);
// Convert distance to score
const scored = results.map((r) => ({
content: r.content,
title: r.title,
url: r.url,
score: 1 / (1 + r.distance),
}));
// Rerank and return top k results
return rerank(query, scored).slice(0, k);
}
// CLI usage example
if (import.meta.url === `file://${process.argv[1]}`) {
const { parseArgs } = await import("node:util");
const fs = await import("node:fs/promises");
const cmd = process.argv[2];
switch (cmd) {
case "q":
case "query": {
const parsed = parseArgs({
args: process.argv.slice(3),
options: {
db: { type: "string" },
k: { type: "string", short: "k" },
},
allowPositionals: true,
});
const dbPath = parsed.values.db || DEFAULT_DB_PATH;
const query = parsed.positionals[0] || "";
const k = parsed.values.k ? Number(parsed.values.k) : 5;
const db = createDB(dbPath);
const results = await ragSearch(db, query, k);
// Display results
results.forEach((r, i) => {
console.log(`\n${i + 1}. ${r.title || "(Untitled)"}`);
if (r.url) console.log(` URL: ${r.url}`);
console.log(` Score: ${r.score.toFixed(3)}`);
console.log(` Content: ${r.content.substring(0, 100)}...`);
});
db.close();
break;
}
case "i":
case "index": {
const parsed = parseArgs({
args: process.argv.slice(3),
options: {
db: { type: "string" },
text: { type: "string", short: "t" },
file: { type: "string", short: "f" },
title: { type: "string" },
url: { type: "string" },
},
allowPositionals: true,
});
const dbPath = parsed.values.db || DEFAULT_DB_PATH;
const content =
parsed.values.text ||
(parsed.values.file
? await fs.readFile(parsed.values.file, "utf-8")
: "");
if (!content) {
console.error("Please specify text or file path");
process.exit(1);
}
const metadata = {
title: parsed.values.title,
url: parsed.values.url,
};
const db = createDB(dbPath);
const ids = await index(db, content, metadata);
console.log(`Indexed ${ids.length} chunks`);
if (metadata.title) console.log(`Title: ${metadata.title}`);
if (metadata.url) console.log(`URL: ${metadata.url}`);
db.close();
break;
}
default: {
console.error(`Unknown command: ${cmd || "(none)"}`);
console.error("\nUsage:");
console.error(
" node rag.ts index --db=index.db --text='text' --title='Title' --url='URL'"
);
console.error(
" node rag.ts index --db=index.db --file=file.txt --title='Title'"
);
console.error(" node rag.ts query --db=index.db 'search query'");
console.error("\nCommands:");
console.error(" i, index Index text content");
console.error(" q, query Search indexed content");
process.exit(1);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment