Last active
April 13, 2026 16:05
-
-
Save dpaluy/657d3ed3a23fb99ff8b6172a4767a51f to your computer and use it in GitHub Desktop.
Mac Mini M4 16GB LammaCPP for Hermes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?xml version="1.0" encoding="UTF-8"?> | |
| <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | |
| <plist version="1.0"> | |
| <dict> | |
| <key>Label</key> | |
| <string>com.clawbot.llama-server</string> | |
| <key>ProgramArguments</key> | |
| <array> | |
| <string>/opt/homebrew/bin/llama-server</string> | |
| <string>-m</string> | |
| <string>/Users/clawbot/models/Carnice-9b-Q6_K.gguf</string> | |
| <string>-ngl</string> | |
| <string>99</string> | |
| <string>-c</string> | |
| <string>65536</string> | |
| <string>-np</string> | |
| <string>1</string> | |
| <string>-fa</string> | |
| <string>on</string> | |
| <string>--cache-type-k</string> | |
| <string>q4_0</string> | |
| <string>--cache-type-v</string> | |
| <string>q4_0</string> | |
| <string>--jinja</string> | |
| <string>--host</string> | |
| <string>0.0.0.0</string> | |
| <string>--port</string> | |
| <string>8080</string> | |
| </array> | |
| <key>RunAtLoad</key> | |
| <true/> | |
| <key>KeepAlive</key> | |
| <true/> | |
| <key>StandardOutPath</key> | |
| <string>/Users/[YOUR_NAME]/models/.cache/llama-server.log</string> | |
| <key>StandardErrorPath</key> | |
| <string>/Users/[YOUR_NAME]/models/.cache/llama-server.err</string> | |
| </dict> | |
| </plist> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| MODEL_DIR="$HOME/models" | |
| PORT=8080 | |
| # CTX_SIZE=32768 | |
| CTX_SIZED=65536 | |
| # Default to Q8, pass "q6" as argument for Q6_K | |
| case "${1:-q8}" in | |
| q6) MODEL="$MODEL_DIR/Carnice-9b-Q6_K.gguf" ;; | |
| q8) MODEL="$MODEL_DIR/Carnice-9b-Q8_0.gguf" ;; | |
| *) MODEL="$1" ;; | |
| esac | |
| echo "Serving: $MODEL" | |
| llama-server \ | |
| -m "$MODEL" \ | |
| -ngl 99 \ | |
| -c "$CTX_SIZE" \ | |
| -np 1 \ | |
| -fa on \ | |
| --cache-type-k q4_0 \ | |
| --cache-type-v q4_0 \ | |
| --host 0.0.0.0 \ | |
| --jinja \ | |
| --port "$PORT" | |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://huggingface.co/kai-os/Carnice-9b-GGUF
Download models:
Test
llama-cli -m ~/models/Carnice-9b-Q6_K.gguf -p "Hello" -c 4096