Skip to content

Instantly share code, notes, and snippets.

@kellemar
Last active January 14, 2024 08:39
Show Gist options
  • Save kellemar/f2ca4c4afec8f3fcaf27390ad3334783 to your computer and use it in GitHub Desktop.
Save kellemar/f2ca4c4afec8f3fcaf27390ad3334783 to your computer and use it in GitHub Desktop.
Common.py for Mitral
from modal import Stub, Image, Volume, Secret
import os
APP_NAME = "example-axolotl"
# Latest image hash of winglian/axolotl:main-py3.10-cu118-2.0.1 (2023-12-11)
AXOLOTL_REGISTRY_SHA = (
"59ec55880c8870147150dfb37646fb405d8926ea39d0ba3999ccdcdd79f968af"
)
# Need to patch transformers to an older version to avoid checkpointing errors.
TRANSFORMERS_SHA = "5324bf9c07c318015eccc5fba370a81368a8df28"
axolotl_image = (
Image.from_registry(f"winglian/axolotl@sha256:{AXOLOTL_REGISTRY_SHA}")
.run_commands(
"git clone https://github.com/OpenAccess-AI-Collective/axolotl /root/axolotl",
"cd /root/axolotl && git checkout a581e9f8f66e14c22ec914ee792dd4fe073e62f6",
)
.pip_install("huggingface_hub==0.19.4", "hf-transfer==0.1.4", "trl==0.7.7")
.pip_install("datasets==2.16.1", "-U")
.pip_install(
f"transformers @ git+https://github.com/huggingface/transformers.git@{TRANSFORMERS_SHA}",
"--force-reinstall",
)
.env(dict(HUGGINGFACE_HUB_CACHE="/pretrained", HF_HUB_ENABLE_HF_TRANSFER="1"))
)
vllm_image = (
Image.from_registry("nvidia/cuda:12.1.0-base-ubuntu22.04", add_python="3.10")
.pip_install("vllm==0.2.5")
)
dpo_image = ( Image.from_registry("nvidia/cuda:12.1.0-base-ubuntu22.04", add_python="3.10")
.run_commands(
"apt update", "apt install git -y"
)
.pip_install("datasets", "trl", "peft", "bitsandbytes", "sentencepiece", "wandb", "huggingface_hub", "torch")
)
stub = Stub(APP_NAME, secrets=[Secret.from_name("huggingface")])
# Volumes for pre-trained models and training runs.
pretrained_volume = Volume.persisted("example-pretrained-vol")
runs_volume = Volume.persisted("example-runs-vol")
datasets_volume = Volume.persisted("datasets-volume")
VOLUME_CONFIG: dict[str | os.PathLike, Volume] = {
"/pretrained": pretrained_volume,
"/runs": runs_volume,
"/data": datasets_volume,
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment