Created
March 14, 2024 06:07
-
-
Save mrdjohnson/4c9873a702a7a8f6e0d604ba1622e2fc to your computer and use it in GitHub Desktop.
Basic Ollama streaming example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // more info on message type can be found here: https://github.com/ollama/ollama/blob/main/docs/api.md#parameters-1 | |
| type Message = { | |
| role: 'assistant' | 'user' | 'system' | |
| content: string | |
| images?: string[] | |
| } | |
| type OllamaResponse = { | |
| model: string | |
| created_at: string | |
| message: Omit<Message, 'botName'> | |
| done: boolean | |
| } | |
| export class BasicOllamaChatExample { | |
| private onAbort?: () => void | |
| // all you need to create this class is the host, and the model name | |
| constructor( | |
| private host: string, | |
| private modelName: string, | |
| private previousMessages?: Message[], // the chat's history | |
| private systemMessage?: string, | |
| ) {} | |
| makeMessages(content: string, images?: string[]): Message[] { | |
| const messages: Message[] = [] | |
| // the system message should be first | |
| if (this.systemMessage) { | |
| messages.push({ | |
| role: 'system', | |
| content: this.systemMessage, | |
| }) | |
| } | |
| if (this.previousMessages) { | |
| messages.push(...this.previousMessages) | |
| } | |
| // send at least one message | |
| messages.push({ | |
| role: 'user', | |
| content, | |
| images, | |
| }) | |
| return messages | |
| } | |
| // this is a generator function, I think its a really good option for streaming | |
| // alternative one could pass a callback function and skip the use of yield | |
| async *streamChat(content: string, images?: string[]): AsyncGenerator<string> { | |
| // if we want to cancel the chat at any time, this will allow us to do so | |
| const abortController = new AbortController() | |
| this.onAbort = () => abortController.abort('Stream ended manually') | |
| const messages = this.makeMessages(content, images) | |
| const response = await fetch(this.host + '/api/chat', { | |
| method: 'POST', | |
| headers: { | |
| 'Content-Type': 'application/json', | |
| }, | |
| // more stuff could go in here, but we just need the model name and messages | |
| // passing format json makes it easier to consume the response | |
| body: JSON.stringify({ model: this.modelName, messages, format: 'json' }), | |
| signal: abortController.signal, | |
| }) | |
| if (!response.body) return | |
| const reader = response.body.getReader() | |
| while (true) { | |
| const { done, value } = await reader.read() | |
| if (done) { | |
| break | |
| } | |
| // Decode the received value | |
| const textChunk = new TextDecoder().decode(value) | |
| // Split the text chunk by newline character in case it contains multiple JSON strings | |
| // we need this because the output is new line delimited json, the output is not valid json but instead its a series of json objects separated by a newline character....... but why | |
| const jsonStrings = textChunk.split(/(?<=})(?=\n{)/) | |
| for (const jsonString of jsonStrings) { | |
| // Skip empty strings | |
| if (!jsonString.trim()) continue | |
| let data: OllamaResponse | |
| try { | |
| // attempt to parse each line | |
| data = JSON.parse(jsonString) as OllamaResponse | |
| console.log('data response: ', data) | |
| } catch (error) { | |
| console.error('Failed to parse JSON:', jsonString) | |
| throw error | |
| } | |
| // If the response is done, stop the loop | |
| if (data.done) break | |
| // this "returns" part of the data, then keeps going, it does not stop the function from running. | |
| yield data.message.content | |
| } | |
| } | |
| this.onAbort = undefined | |
| } | |
| cancelStream() { | |
| if (!this.onAbort) return | |
| this.onAbort() | |
| this.onAbort = undefined | |
| } | |
| } | |
| // Example usage: | |
| // default host | |
| const host = 'http://localhost:11434' | |
| const modelName = 'gemma:latest' // llava series are the ones to use for images as of this writing | |
| const ollamaChat = new BasicOllamaChatExample(host, modelName) | |
| const startStream = async () => { | |
| const userMessage = 'What is a good message to send to a bot?' | |
| const stream = ollamaChat.streamChat(userMessage) | |
| let incomingMessage = '' | |
| try { | |
| for await (const messagePart of stream) { | |
| // do something with part of the message here | |
| incomingMessage += messagePart | |
| } | |
| } catch (error: unknown) { | |
| console.error(error) | |
| } finally { | |
| // the final result | |
| console.log(incomingMessage) | |
| } | |
| } | |
| startStream() | |
| // you can call ollamaChat.abort() at any time and it will stop the message |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment