Skip to content

Instantly share code, notes, and snippets.

@tkafka
Created April 22, 2026 19:59
Show Gist options
  • Select an option

  • Save tkafka/9b7c66775e8755647196b011a43600eb to your computer and use it in GitHub Desktop.

Select an option

Save tkafka/9b7c66775e8755647196b011a43600eb to your computer and use it in GitHub Desktop.
Convert whisper-large-v3-czech-cv13 to ggml for VoiceInk
#!/usr/bin/env bash
# Converts mikr/whisper-large-v3-czech-cv13 (HuggingFace Safetensors)
# to GGML .bin format compatible with VoiceInk.
#
# Requirements installed by this script:
# - git clone of ggml-org/whisper.cpp (conversion script)
# - git clone of openai/whisper (vocab assets needed by conversion)
# - Python venv with: torch, transformers, numpy
#
# Usage: bash convert-to-voiceink.sh
set -euo pipefail
STAGING=~/Downloads/whisper
MODEL_ID="mikr/whisper-large-v3-czech-cv13"
OUTPUT_NAME="whisper-large-v3-czech-cv13.bin"
MODEL_DIR="model"
MODEL_FILES=(
added_tokens.json
config.json
generation_config.json
merges.txt
model.safetensors
normalizer.json
preprocessor_config.json
special_tokens_map.json
tokenizer.json
tokenizer_config.json
vocab.json
)
clone_or_update_model_repo() {
if [ ! -d "$MODEL_DIR/.git" ]; then
rm -rf "$MODEL_DIR"
echo "==> Cloning $MODEL_ID via Git..."
GIT_LFS_SKIP_SMUDGE=1 git clone --depth=1 "https://huggingface.co/$MODEL_ID" "$MODEL_DIR"
else
echo "==> Model Git repo already exists, refreshing..."
git -C "$MODEL_DIR" pull --ff-only
fi
echo "==> Downloading model.safetensors via Git LFS..."
git -C "$MODEL_DIR" lfs pull --include=model.safetensors
git -C "$MODEL_DIR" lfs checkout model.safetensors
}
clear_stale_hf_locks() {
local cache_dir="$1/.cache/huggingface"
if [ ! -d "$cache_dir" ]; then
return
fi
if pgrep -f "(^|/)hf($| )|(^|/)huggingface-cli($| )" >/dev/null 2>&1; then
return
fi
find "$cache_dir" -name '*.lock' -type f -delete
}
echo "==> Staging folder: $STAGING"
mkdir -p "$STAGING"
cd "$STAGING"
# ── Clone whisper.cpp (provides convert-h5-to-ggml.py) ───────────────────────
if [ ! -d "whisper.cpp" ]; then
echo "==> Cloning ggml-org/whisper.cpp..."
git clone --depth=1 https://github.com/ggml-org/whisper.cpp
else
echo "==> whisper.cpp already cloned, skipping."
fi
# ── Clone openai/whisper source (vocab assets required by conversion script) ──
if [ ! -d "openai-whisper" ]; then
echo "==> Cloning openai/whisper..."
git clone --depth=1 https://github.com/openai/whisper openai-whisper
else
echo "==> openai-whisper already cloned, skipping."
fi
# ── Python virtual environment ────────────────────────────────────────────────
if [ ! -d "venv" ]; then
echo "==> Creating Python venv..."
python3 -m venv venv
fi
# shellcheck disable=SC1091
source venv/bin/activate
echo "==> Installing Python dependencies (torch, transformers, numpy)..."
pip install --quiet --upgrade pip
pip install --quiet torch transformers numpy
# ── Download HuggingFace model ────────────────────────────────────────────────
if [ ! -f "$OUTPUT_NAME" ]; then
echo "==> Ensuring $MODEL_ID is fully downloaded..."
if command -v git-lfs >/dev/null 2>&1; then
clone_or_update_model_repo
else
mkdir -p "$MODEL_DIR"
clear_stale_hf_locks "$MODEL_DIR"
hf download "$MODEL_ID" \
--local-dir "./$MODEL_DIR" \
--max-workers 4 \
"${MODEL_FILES[@]}"
fi
else
echo "==> Output already exists, skipping model download."
fi
# ── Convert HF Safetensors → GGML ────────────────────────────────────────────
if [ ! -f "$OUTPUT_NAME" ]; then
echo "==> Converting to GGML format..."
rm -f ggml-model.bin ggml-model-f32.bin
python3 ./whisper.cpp/models/convert-h5-to-ggml.py "./$MODEL_DIR/" ./openai-whisper .
# ── Rename to final output name ───────────────────────────────────────────
mv ggml-model.bin "$OUTPUT_NAME"
else
echo "==> Output already exists, skipping conversion."
fi
echo ""
echo "✓ Done! Model saved at:"
echo " $STAGING/$OUTPUT_NAME"
echo ""
echo "To import into VoiceInk:"
echo " AI Models → Local tab → Import Local Model… → select $OUTPUT_NAME"
echo " Then: Set as Default"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment