Skip to content

Instantly share code, notes, and snippets.

@mingder78
Created August 14, 2025 10:03
Show Gist options
  • Save mingder78/cc81f7f37a37c09e9d7fafd5d3b37700 to your computer and use it in GitHub Desktop.
Save mingder78/cc81f7f37a37c09e9d7fafd5d3b37700 to your computer and use it in GitHub Desktop.
My awesome script
interface OllamaGenerateRequest {
model: string;
prompt: string;
stream?: boolean;
options?: Record<string, any>; // For Ollama-specific options
}
interface OllamaGenerateResponse {
model: string;
created_at: string;
response: string;
done: boolean;
done_reason?: string;
context?: number[];
total_duration?: number;
load_duration?: number;
prompt_eval_count?: number;
prompt_eval_duration?: number;
eval_count?: number;
eval_duration?: number;
}
// Simple in-memory cache
const cache = new Map<string, OllamaGenerateResponse>();
async function generateWithCache(
request: OllamaGenerateRequest,
baseUrl: string = 'http://localhost:11434'
): Promise<OllamaGenerateResponse> {
const cacheKey = JSON.stringify(request);
// Check if response is in cache
if (cache.has(cacheKey)) {
console.log('Serving from cache:', cacheKey);
return cache.get(cacheKey)!;
}
const url = `${baseUrl}/api/generate`;
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(request),
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const data: OllamaGenerateResponse = await response.json();
// Store response in cache
cache.set(cacheKey, data);
return data;
} catch (error) {
console.error('Error querying Ollama API:', error);
throw error;
}
}
// Example usage:
async function main() {
const prompt1 = "Tell me a short story about a cat.";
const model = "llama2"; // Replace with your desired Ollama model
// First call - will fetch from Ollama API
console.log('First call for prompt 1...');
const result1 = await generateWithCache({ model, prompt: prompt1, stream: false });
console.log('Result 1:', result1.response);
// Second call with same prompt - will be served from cache
console.log('\nSecond call for prompt 1...');
const result1Cached = await generateWithCache({ model, prompt: prompt1, stream: false });
console.log('Result 1 (cached):', result1Cached.response);
const prompt2 = "What is the capital of France?";
// New prompt - will fetch from Ollama API
console.log('\nFirst call for prompt 2...');
const result2 = await generateWithCache({ model, prompt: prompt2, stream: false });
console.log('Result 2:', result2.response);
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment