Last active
August 5, 2024 14:20
-
-
Save luiscape/3abb14c008566106a62905e82c34aee4 to your computer and use it in GitHub Desktop.
Ollama with memory snapshots
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Needed in order to support the ollama/ollama image. | |
exec "$@" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import sys | |
import time | |
import modal | |
# Default server port. | |
MODEL_ID: str = "llama3.1:8b" | |
OLLAMA_PORT: int = 11434 | |
OLLAMA_URL: str = f"http://localhost:{OLLAMA_PORT}" | |
def _run_subprocess(cmd: list[str], block: bool = True) -> None: | |
if block: | |
subprocess.run( | |
cmd, | |
stdout=sys.stdout, | |
stderr=sys.stderr, | |
check=True, | |
) | |
else: | |
subprocess.Popen( | |
cmd, | |
stdout=sys.stdout, | |
stderr=sys.stderr, | |
) | |
def _is_server_healthy() -> bool: | |
import requests | |
try: | |
response = requests.get(OLLAMA_URL) | |
if response.ok: | |
print(f"ollama server running => {OLLAMA_URL}") | |
return True | |
else: | |
print(f"ollama server not running => {OLLAMA_URL}") | |
return False | |
except requests.RequestException: | |
return False | |
def download_model(): | |
_run_subprocess(["ollama", "serve"], block=False) | |
while not _is_server_healthy(): | |
print("waiting for server to start ...") | |
time.sleep(1) | |
_run_subprocess(["ollama", "pull", MODEL_ID]) | |
image = ( | |
modal.Image.from_registry( | |
"ollama/ollama:0.3.3", | |
add_python="3.11", | |
) | |
.pip_install("requests") | |
.copy_local_file("./entrypoint.sh", "/opt/entrypoint.sh") | |
.dockerfile_commands( | |
[ | |
"RUN chmod a+x /opt/entrypoint.sh", | |
'ENTRYPOINT ["/opt/entrypoint.sh"]', | |
] | |
) | |
.run_function(download_model) | |
) | |
app = modal.App("ollama-server", image=image) | |
@app.cls(max_inputs=1, enable_memory_snapshot=True) | |
class Ollama: | |
@modal.enter(snap=True) | |
def load(self): | |
self._ollama_proc = subprocess.Popen("ollama serve", shell=True) | |
@modal.method() | |
def wait_for_start(self): | |
while not _is_server_healthy(): | |
print("waiting for server to start ...") | |
time.sleep(1) | |
@app.function(enable_memory_snapshot=True) | |
@modal.web_server(OLLAMA_PORT) | |
def main(): | |
ollama = Ollama() | |
ollama.wait_for_start.remote() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment