luiscape · August 5, 2024 14:20
diff --git a/entrypoint.sh b/entrypoint.sh
 #!/usr/bin/env bash
 # Needed in order to support the ollama/ollama image.
 exec "$@"
diff --git a/ollama.py b/ollama.py
 import subprocess
 import sys
 import time

 import modal

 # Default server port.
 MODEL_ID: str = "llama3.1:8b"
 OLLAMA_PORT: int = 11434
 OLLAMA_URL: str = f"http://localhost:{OLLAMA_PORT}"


 def _run_subprocess(cmd: list[str], block: bool = True) -> None:
    if block:
        subprocess.run(
            cmd,
            stdout=sys.stdout,
            stderr=sys.stderr,
            check=True,
        )
    else:
        subprocess.Popen(
            cmd,
            stdout=sys.stdout,
            stderr=sys.stderr,
        )


 def _is_server_healthy() -> bool:
    import requests

    try:
        response = requests.get(OLLAMA_URL)
        if response.ok:
            print(f"ollama server running => {OLLAMA_URL}")
            return True
        else:
            print(f"ollama server not running => {OLLAMA_URL}")
            return False
    except requests.RequestException:
        return False


 def download_model():
    _run_subprocess(["ollama", "serve"], block=False)
    while not _is_server_healthy():
        print("waiting for server to start ...")
        time.sleep(1)

    _run_subprocess(["ollama", "pull", MODEL_ID])


 image = (
    modal.Image.from_registry(
        "ollama/ollama:0.3.3",
        add_python="3.11",
    )
    .pip_install("requests")
    .copy_local_file("./entrypoint.sh", "/opt/entrypoint.sh")
    .dockerfile_commands(
        [
            "RUN chmod a+x /opt/entrypoint.sh",
            'ENTRYPOINT ["/opt/entrypoint.sh"]',
        ]
    )
    .run_function(download_model)
 )

 app = modal.App("ollama-server", image=image)


 @app.cls(max_inputs=1, enable_memory_snapshot=True)
 class Ollama:
    @modal.enter(snap=True)
    def load(self):
        self._ollama_proc = subprocess.Popen("ollama serve", shell=True)

    @modal.method()
    def wait_for_start(self):
        while not _is_server_healthy():
            print("waiting for server to start ...")
            time.sleep(1)


 @app.function(enable_memory_snapshot=True)
 @modal.web_server(OLLAMA_PORT)
 def main():
    ollama = Ollama()
    ollama.wait_for_start.remote()
	#!/usr/bin/env bash
	# Needed in order to support the ollama/ollama image.
	exec "$@"
	import subprocess
	import sys
	import time

	import modal

	# Default server port.
	MODEL_ID: str = "llama3.1:8b"
	OLLAMA_PORT: int = 11434
	OLLAMA_URL: str = f"http://localhost:{OLLAMA_PORT}"


	def _run_subprocess(cmd: list[str], block: bool = True) -> None:
	if block:
	subprocess.run(
	cmd,
	stdout=sys.stdout,
	stderr=sys.stderr,
	check=True,
	)
	else:
	subprocess.Popen(
	cmd,
	stdout=sys.stdout,
	stderr=sys.stderr,
	)


	def _is_server_healthy() -> bool:
	import requests

	try:
	response = requests.get(OLLAMA_URL)
	if response.ok:
	print(f"ollama server running => {OLLAMA_URL}")
	return True
	else:
	print(f"ollama server not running => {OLLAMA_URL}")
	return False
	except requests.RequestException:
	return False


	def download_model():
	_run_subprocess(["ollama", "serve"], block=False)
	while not _is_server_healthy():
	print("waiting for server to start ...")
	time.sleep(1)

	_run_subprocess(["ollama", "pull", MODEL_ID])


	image = (
	modal.Image.from_registry(
	"ollama/ollama:0.3.3",
	add_python="3.11",
	)
	.pip_install("requests")
	.copy_local_file("./entrypoint.sh", "/opt/entrypoint.sh")
	.dockerfile_commands(
	[
	"RUN chmod a+x /opt/entrypoint.sh",
	'ENTRYPOINT ["/opt/entrypoint.sh"]',
	]
	)
	.run_function(download_model)
	)

	app = modal.App("ollama-server", image=image)


	@app.cls(max_inputs=1, enable_memory_snapshot=True)
	class Ollama:
	@modal.enter(snap=True)
	def load(self):
	self._ollama_proc = subprocess.Popen("ollama serve", shell=True)

	@modal.method()
	def wait_for_start(self):
	while not _is_server_healthy():
	print("waiting for server to start ...")
	time.sleep(1)


	@app.function(enable_memory_snapshot=True)
	@modal.web_server(OLLAMA_PORT)
	def main():
	ollama = Ollama()
	ollama.wait_for_start.remote()