Skip to content

Instantly share code, notes, and snippets.

@luiscape
Last active August 5, 2024 14:20
Show Gist options
  • Save luiscape/3abb14c008566106a62905e82c34aee4 to your computer and use it in GitHub Desktop.
Save luiscape/3abb14c008566106a62905e82c34aee4 to your computer and use it in GitHub Desktop.
Ollama with memory snapshots
#!/usr/bin/env bash
# Needed in order to support the ollama/ollama image.
exec "$@"
import subprocess
import sys
import time
import modal
# Default server port.
MODEL_ID: str = "llama3.1:8b"
OLLAMA_PORT: int = 11434
OLLAMA_URL: str = f"http://localhost:{OLLAMA_PORT}"
def _run_subprocess(cmd: list[str], block: bool = True) -> None:
if block:
subprocess.run(
cmd,
stdout=sys.stdout,
stderr=sys.stderr,
check=True,
)
else:
subprocess.Popen(
cmd,
stdout=sys.stdout,
stderr=sys.stderr,
)
def _is_server_healthy() -> bool:
import requests
try:
response = requests.get(OLLAMA_URL)
if response.ok:
print(f"ollama server running => {OLLAMA_URL}")
return True
else:
print(f"ollama server not running => {OLLAMA_URL}")
return False
except requests.RequestException:
return False
def download_model():
_run_subprocess(["ollama", "serve"], block=False)
while not _is_server_healthy():
print("waiting for server to start ...")
time.sleep(1)
_run_subprocess(["ollama", "pull", MODEL_ID])
image = (
modal.Image.from_registry(
"ollama/ollama:0.3.3",
add_python="3.11",
)
.pip_install("requests")
.copy_local_file("./entrypoint.sh", "/opt/entrypoint.sh")
.dockerfile_commands(
[
"RUN chmod a+x /opt/entrypoint.sh",
'ENTRYPOINT ["/opt/entrypoint.sh"]',
]
)
.run_function(download_model)
)
app = modal.App("ollama-server", image=image)
@app.cls(max_inputs=1, enable_memory_snapshot=True)
class Ollama:
@modal.enter(snap=True)
def load(self):
self._ollama_proc = subprocess.Popen("ollama serve", shell=True)
@modal.method()
def wait_for_start(self):
while not _is_server_healthy():
print("waiting for server to start ...")
time.sleep(1)
@app.function(enable_memory_snapshot=True)
@modal.web_server(OLLAMA_PORT)
def main():
ollama = Ollama()
ollama.wait_for_start.remote()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment