Created
August 14, 2025 10:03
-
-
Save mingder78/cc81f7f37a37c09e9d7fafd5d3b37700 to your computer and use it in GitHub Desktop.
My awesome script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
interface OllamaGenerateRequest { | |
model: string; | |
prompt: string; | |
stream?: boolean; | |
options?: Record<string, any>; // For Ollama-specific options | |
} | |
interface OllamaGenerateResponse { | |
model: string; | |
created_at: string; | |
response: string; | |
done: boolean; | |
done_reason?: string; | |
context?: number[]; | |
total_duration?: number; | |
load_duration?: number; | |
prompt_eval_count?: number; | |
prompt_eval_duration?: number; | |
eval_count?: number; | |
eval_duration?: number; | |
} | |
// Simple in-memory cache | |
const cache = new Map<string, OllamaGenerateResponse>(); | |
async function generateWithCache( | |
request: OllamaGenerateRequest, | |
baseUrl: string = 'http://localhost:11434' | |
): Promise<OllamaGenerateResponse> { | |
const cacheKey = JSON.stringify(request); | |
// Check if response is in cache | |
if (cache.has(cacheKey)) { | |
console.log('Serving from cache:', cacheKey); | |
return cache.get(cacheKey)!; | |
} | |
const url = `${baseUrl}/api/generate`; | |
try { | |
const response = await fetch(url, { | |
method: 'POST', | |
headers: { | |
'Content-Type': 'application/json', | |
}, | |
body: JSON.stringify(request), | |
}); | |
if (!response.ok) { | |
throw new Error(`HTTP error! status: ${response.status}`); | |
} | |
const data: OllamaGenerateResponse = await response.json(); | |
// Store response in cache | |
cache.set(cacheKey, data); | |
return data; | |
} catch (error) { | |
console.error('Error querying Ollama API:', error); | |
throw error; | |
} | |
} | |
// Example usage: | |
async function main() { | |
const prompt1 = "Tell me a short story about a cat."; | |
const model = "llama2"; // Replace with your desired Ollama model | |
// First call - will fetch from Ollama API | |
console.log('First call for prompt 1...'); | |
const result1 = await generateWithCache({ model, prompt: prompt1, stream: false }); | |
console.log('Result 1:', result1.response); | |
// Second call with same prompt - will be served from cache | |
console.log('\nSecond call for prompt 1...'); | |
const result1Cached = await generateWithCache({ model, prompt: prompt1, stream: false }); | |
console.log('Result 1 (cached):', result1Cached.response); | |
const prompt2 = "What is the capital of France?"; | |
// New prompt - will fetch from Ollama API | |
console.log('\nFirst call for prompt 2...'); | |
const result2 = await generateWithCache({ model, prompt: prompt2, stream: false }); | |
console.log('Result 2:', result2.response); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment