Skip to content

Instantly share code, notes, and snippets.

@mrdjohnson
Created March 14, 2024 06:07
Show Gist options
  • Select an option

  • Save mrdjohnson/4c9873a702a7a8f6e0d604ba1622e2fc to your computer and use it in GitHub Desktop.

Select an option

Save mrdjohnson/4c9873a702a7a8f6e0d604ba1622e2fc to your computer and use it in GitHub Desktop.
Basic Ollama streaming example
// more info on message type can be found here: https://github.com/ollama/ollama/blob/main/docs/api.md#parameters-1
type Message = {
role: 'assistant' | 'user' | 'system'
content: string
images?: string[]
}
type OllamaResponse = {
model: string
created_at: string
message: Omit<Message, 'botName'>
done: boolean
}
export class BasicOllamaChatExample {
private onAbort?: () => void
// all you need to create this class is the host, and the model name
constructor(
private host: string,
private modelName: string,
private previousMessages?: Message[], // the chat's history
private systemMessage?: string,
) {}
makeMessages(content: string, images?: string[]): Message[] {
const messages: Message[] = []
// the system message should be first
if (this.systemMessage) {
messages.push({
role: 'system',
content: this.systemMessage,
})
}
if (this.previousMessages) {
messages.push(...this.previousMessages)
}
// send at least one message
messages.push({
role: 'user',
content,
images,
})
return messages
}
// this is a generator function, I think its a really good option for streaming
// alternative one could pass a callback function and skip the use of yield
async *streamChat(content: string, images?: string[]): AsyncGenerator<string> {
// if we want to cancel the chat at any time, this will allow us to do so
const abortController = new AbortController()
this.onAbort = () => abortController.abort('Stream ended manually')
const messages = this.makeMessages(content, images)
const response = await fetch(this.host + '/api/chat', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
// more stuff could go in here, but we just need the model name and messages
// passing format json makes it easier to consume the response
body: JSON.stringify({ model: this.modelName, messages, format: 'json' }),
signal: abortController.signal,
})
if (!response.body) return
const reader = response.body.getReader()
while (true) {
const { done, value } = await reader.read()
if (done) {
break
}
// Decode the received value
const textChunk = new TextDecoder().decode(value)
// Split the text chunk by newline character in case it contains multiple JSON strings
// we need this because the output is new line delimited json, the output is not valid json but instead its a series of json objects separated by a newline character....... but why
const jsonStrings = textChunk.split(/(?<=})(?=\n{)/)
for (const jsonString of jsonStrings) {
// Skip empty strings
if (!jsonString.trim()) continue
let data: OllamaResponse
try {
// attempt to parse each line
data = JSON.parse(jsonString) as OllamaResponse
console.log('data response: ', data)
} catch (error) {
console.error('Failed to parse JSON:', jsonString)
throw error
}
// If the response is done, stop the loop
if (data.done) break
// this "returns" part of the data, then keeps going, it does not stop the function from running.
yield data.message.content
}
}
this.onAbort = undefined
}
cancelStream() {
if (!this.onAbort) return
this.onAbort()
this.onAbort = undefined
}
}
// Example usage:
// default host
const host = 'http://localhost:11434'
const modelName = 'gemma:latest' // llava series are the ones to use for images as of this writing
const ollamaChat = new BasicOllamaChatExample(host, modelName)
const startStream = async () => {
const userMessage = 'What is a good message to send to a bot?'
const stream = ollamaChat.streamChat(userMessage)
let incomingMessage = ''
try {
for await (const messagePart of stream) {
// do something with part of the message here
incomingMessage += messagePart
}
} catch (error: unknown) {
console.error(error)
} finally {
// the final result
console.log(incomingMessage)
}
}
startStream()
// you can call ollamaChat.abort() at any time and it will stop the message
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment