- Install Ubuntu 24.04 in WSL2
- Install CUDA inside WSL2:
wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-wsl-ubuntu.pin
sudo mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda-repo-wsl-ubuntu-12-8-local_12.8.0-1_amd64.deb
sudo dpkg -i cuda-repo-wsl-ubuntu-12-8-local_12.8.0-1_amd64.deb
sudo cp /var/cuda-repo-wsl-ubuntu-12-8-local/cuda-*-keyring.gpg /usr/share/keyrings/
sudo apt-get update
sudo apt-get -y install cuda-toolkit-12-8
echo 'export PATH=/usr/local/cuda-12.8/bin:$PATH' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-12.8/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
source ~/.bashrc
- Install vllm (Must match CUDA version, check
nvcc --version
:
sudo apt install python3-dev
python3 -m venv vllm-env
source vllm-env/bin/activate
pip install --index-url https://download.pytorch.org/whl/cu128 torch torchvision
pip install \
https://github.com/vllm-project/vllm/releases/download/v0.10.1.1/vllm-0.10.1.1-cp38-abi3-manylinux1_x86_64.whl \
--extra-index-url https://download.pytorch.org/whl/cu128
- Install Docker
- Install Open-WebUI:
docker run -d \
--name open-webui \
-p 8000:8080 \
-v open-webui:/app/backend/data \
-e OPENAI_API_BASE_URL=http://0.0.0.0:8000/v1 \
--restart always \
ghcr.io/open-webui/open-webui:main
- Run vllm:
vllm serve ./Goedel-Prover-V2-32B.Q4_K_S.gguf --dtype bfloat16 --kv-cache-dtype fp8 --max-model-len 4096 --gpu-memory-utilization 0.99 --swap-space 16 --port 7000
- https://raw.githubusercontent.com/owndev/Open-WebUI-Functions/refs/heads/master/filters/time_token_tracker.py