mmguero · January 9, 2026 05:02
diff --git a/llama-server.sh b/llama-server.sh
 File: llama-server.sh
 #!/usr/bin/env bash

 # Paths
 LLAMA_BIN="./llama.cpp/build/bin/llama-server"
 MODELS_DIR="./models"

 # Network
 HOST="127.0.0.1"
 PORT="8081"

 # Global Performance Settings
 THREADS=4
 BATCH_SIZE=256
 CTX_SIZE=16384

 taskset -c 0,1,2,3 \
  "$LLAMA_BIN" \
    --host "$HOST" \
    --port "$PORT" \
    --cpu-moe \
    --n-gpu-layers 0 \
    --mlock \
    --threads "$THREADS" \
    --batch-size "$BATCH_SIZE" \
    --ctx-size "$CTX_SIZE" \
    --cache-type-k q4_0 \
    --cache-type-v q4_0 \
    --models-dir "$MODELS_DIR" \
    --models-max 1 \
    --temp 0.3 \
    --top-p 0.85 \
    --api-prefix /v1
	File: llama-server.sh
	#!/usr/bin/env bash

	# Paths
	LLAMA_BIN="./llama.cpp/build/bin/llama-server"
	MODELS_DIR="./models"

	# Network
	HOST="127.0.0.1"
	PORT="8081"

	# Global Performance Settings
	THREADS=4
	BATCH_SIZE=256
	CTX_SIZE=16384

	taskset -c 0,1,2,3 \
	"$LLAMA_BIN" \
	--host "$HOST" \
	--port "$PORT" \
	--cpu-moe \
	--n-gpu-layers 0 \
	--mlock \
	--threads "$THREADS" \
	--batch-size "$BATCH_SIZE" \
	--ctx-size "$CTX_SIZE" \
	--cache-type-k q4_0 \
	--cache-type-v q4_0 \
	--models-dir "$MODELS_DIR" \
	--models-max 1 \
	--temp 0.3 \
	--top-p 0.85 \
	--api-prefix /v1
No results found