This article is based on the following article:
curl -LsSf https://astral.sh/uv/install.sh | shSee Installing uv for details.
wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get -y install cuda-toolkit-13-3Based on the tutorial here:
git clone https://github.com/ggml-org/llama.cpp
cd llama.cpp
gh pr checkout 24423
# NOTE: Need to specify nvcc path explicitly
cmake -B build -DGGML_CUDA=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda
cmake --build build -j --config Release --target llama-diffusion-cliuvx hf auth login
uvx hf download unsloth/diffusiongemma-26B-A4B-it-GGUF \
--local-dir unsloth/diffusiongemma-26B-A4B-it-GGUF \
--include "*Q4_K_M*"See here for more details.