mingder78 · August 14, 2025 10:03
diff --git a/index.ts b/index.ts
 interface OllamaGenerateRequest {
  model: string;
  prompt: string;
  stream?: boolean;
  options?: Record<string, any>; // For Ollama-specific options
 }

 interface OllamaGenerateResponse {
  model: string;
  created_at: string;
  response: string;
  done: boolean;
  done_reason?: string;
  context?: number[];
  total_duration?: number;
  load_duration?: number;
  prompt_eval_count?: number;
  prompt_eval_duration?: number;
  eval_count?: number;
  eval_duration?: number;
 }

 // Simple in-memory cache
 const cache = new Map<string, OllamaGenerateResponse>();

 async function generateWithCache(
  request: OllamaGenerateRequest,
  baseUrl: string = 'http://localhost:11434'
 ): Promise<OllamaGenerateResponse> {
  const cacheKey = JSON.stringify(request);

  // Check if response is in cache
  if (cache.has(cacheKey)) {
    console.log('Serving from cache:', cacheKey);
    return cache.get(cacheKey)!;
  }

  const url = `${baseUrl}/api/generate`;

  try {
    const response = await fetch(url, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify(request),
    });

    if (!response.ok) {
      throw new Error(`HTTP error! status: ${response.status}`);
    }

    const data: OllamaGenerateResponse = await response.json();

    // Store response in cache
    cache.set(cacheKey, data);

    return data;
  } catch (error) {
    console.error('Error querying Ollama API:', error);
    throw error;
  }
 }

 // Example usage:
 async function main() {
  const prompt1 = "Tell me a short story about a cat.";
  const model = "llama2"; // Replace with your desired Ollama model

  // First call - will fetch from Ollama API
  console.log('First call for prompt 1...');
  const result1 = await generateWithCache({ model, prompt: prompt1, stream: false });
  console.log('Result 1:', result1.response);

  // Second call with same prompt - will be served from cache
  console.log('\nSecond call for prompt 1...');
  const result1Cached = await generateWithCache({ model, prompt: prompt1, stream: false });
  console.log('Result 1 (cached):', result1Cached.response);

  const prompt2 = "What is the capital of France?";

  // New prompt - will fetch from Ollama API
  console.log('\nFirst call for prompt 2...');
  const result2 = await generateWithCache({ model, prompt: prompt2, stream: false });
  console.log('Result 2:', result2.response);
 }

 main();
	interface OllamaGenerateRequest {
	model: string;
	prompt: string;
	stream?: boolean;
	options?: Record<string, any>; // For Ollama-specific options
	}

	interface OllamaGenerateResponse {
	model: string;
	created_at: string;
	response: string;
	done: boolean;
	done_reason?: string;
	context?: number[];
	total_duration?: number;
	load_duration?: number;
	prompt_eval_count?: number;
	prompt_eval_duration?: number;
	eval_count?: number;
	eval_duration?: number;
	}

	// Simple in-memory cache
	const cache = new Map<string, OllamaGenerateResponse>();

	async function generateWithCache(
	request: OllamaGenerateRequest,
	baseUrl: string = 'http://localhost:11434'
	): Promise<OllamaGenerateResponse> {
	const cacheKey = JSON.stringify(request);

	// Check if response is in cache
	if (cache.has(cacheKey)) {
	console.log('Serving from cache:', cacheKey);
	return cache.get(cacheKey)!;
	}

	const url = `${baseUrl}/api/generate`;

	try {
	const response = await fetch(url, {
	method: 'POST',
	headers: {
	'Content-Type': 'application/json',
	},
	body: JSON.stringify(request),
	});

	if (!response.ok) {
	throw new Error(`HTTP error! status: ${response.status}`);
	}

	const data: OllamaGenerateResponse = await response.json();

	// Store response in cache
	cache.set(cacheKey, data);

	return data;
	} catch (error) {
	console.error('Error querying Ollama API:', error);
	throw error;
	}
	}

	// Example usage:
	async function main() {
	const prompt1 = "Tell me a short story about a cat.";
	const model = "llama2"; // Replace with your desired Ollama model

	// First call - will fetch from Ollama API
	console.log('First call for prompt 1...');
	const result1 = await generateWithCache({ model, prompt: prompt1, stream: false });
	console.log('Result 1:', result1.response);

	// Second call with same prompt - will be served from cache
	console.log('\nSecond call for prompt 1...');
	const result1Cached = await generateWithCache({ model, prompt: prompt1, stream: false });
	console.log('Result 1 (cached):', result1Cached.response);

	const prompt2 = "What is the capital of France?";

	// New prompt - will fetch from Ollama API
	console.log('\nFirst call for prompt 2...');
	const result2 = await generateWithCache({ model, prompt: prompt2, stream: false });
	console.log('Result 2:', result2.response);
	}

	main();