jatinkrmalik · February 3, 2026 08:14
diff --git a/README.md b/README.md
diff --git a/fix-audio-binary.patch b/fix-audio-binary.patch
 # =====================================
 # OpenClaw Audio Binary Injection Fix
 # =====================================
 #
 # WHAT THIS FIXES:
 # ----------------
 # Voice messages (especially from Telegram) were being treated as text files
 # instead of audio. This caused the raw audio binary to be dumped into the
 # chat context, wasting thousands of tokens and breaking conversations.
 #
 # SYMPTOMS:
 # - Bot stops responding after a few voice messages
 # - Session token count explodes to 200K+ with just a few voice notes
 # - Voice messages show as "text/tab-separated-values" in logs
 #
 # THE FIX:
 # --------
 # Skips audio files from being embedded as text. Only the transcription
 # (the converted text) goes to the AI, not the raw audio file.
 #
 # APPLICABLE TO:
 # --------------
 # - OpenClaw v2026.1.30 and earlier
 # - Affects: Telegram voice messages, Discord audio, all voice notes
 #
 # ============================================================================
 # INSTALLATION INSTRUCTIONS
 # ============================================================================
 #
 # STEP 1: Find your OpenClaw installation directory
 #
 #   macOS (Homebrew):
 #     /opt/homebrew/lib/node_modules/openclaw/
 #
 #   Linux (nvm):
 #     ~/.nvm/versions/node/v22.22.0/lib/node_modules/openclaw/
 #
 #   Linux (npm global):
 #     /usr/lib/node_modules/openclaw/
 #
 #   Windows:
 #     %APPDATA%\npm\node_modules\openclaw\
 #
 # STEP 2: Apply this patch
 #
 #   From this directory, run:
 #
 #     patch -p1 < fix-audio-binary.patch /path/to/openclaw/dist/media-understanding/apply.js
 #
 #   Or edit the file manually:
 #   - Open: dist/media-understanding/apply.js
 #   - Apply the changes shown in the "Patch" section below
 #
 # STEP 3: Restart OpenClaw
 #
 #     openclaw gateway restart
 #
 # VERIFICATION:
 #   Send a voice note. The bot should respond normally, and you should NOT
 #   see <file name="...ogg" mime="text/tab-separated-values"> in the session.
 #
 # ============================================================================
 # PATCH START - Changes to dist/media-understanding/apply.js
 # ============================================================================

 --- a/dist/media-understanding/apply.js
 +++ b/dist/media-understanding/apply.js
 @@ -97,6 +97,27 @@ function resolveUtf16Charset(buffer) {
     }
     return undefined;
 }
 +
 +/**
 + * Detects binary audio files by their magic bytes (file signatures).
 + * This prevents OGG/Opus and MP3 files from being misidentified as text
 + * due to their ASCII-heavy headers passing the utf8 text check.
 + *
 + * OGG container: "OggS" (RFC 3533 Section 6)
 + * MP3 with ID3v2: "ID3" (id3.org spec Section 3.1)
 + */
 +function hasBinaryAudioMagic(buffer) {
 +    if (!buffer || buffer.length < 3) {
 +        return false;
 +    }
 +    // OGG container: "OggS" signature (4 bytes)
 +    if (buffer.length >= 4 &&
 +        buffer[0] === 0x4F && // O
 +        buffer[1] === 0x67 && // g
 +        buffer[2] === 0x67 && // g
 +        buffer[3] === 0x53) { // S
 +        return true;
 +    }
 +    // MP3 with ID3v2: "ID3" signature (3 bytes)
 +    if (buffer[0] === 0x49 && // I
 +        buffer[1] === 0x44 && // D
 +        buffer[2] === 0x33) { // 3
 +        return true;
 +    }
 +    return false;
 +}
 +
 function looksLikeUtf8Text(buffer) {
     if (!buffer || buffer.length === 0) {
         return false;
 @@ -177,7 +198,7 @@ async function extractFileBlocks(params) {
         }
         const forcedTextMime = resolveTextMimeFromName(attachment.path ?? attachment.url ?? "");
         const kind = forcedTextMime ? "document" : resolveAttachmentKind(attachment);
 -        if (!forcedTextMime && (kind === "image" || kind === "video")) {
 +        if (!forcedTextMime && (kind === "image" || kind === "audio" || kind === "video")) {
             continue;
         }
         if (!limits.allowUrl && attachment.url && !attachment.path) {
 @@ -199,6 +220,17 @@ async function extractFileBlocks(params) {
             continue;
         }
         const nameHint = bufferResult?.fileName ?? attachment.path ?? attachment.url;
 +
 +        // PATCH: Skip audio files by extension (defense-in-depth)
 +        // Even if kind detection fails, audio extensions should be skipped
 +        // to prevent OGG/Opus binary from being embedded as text.
 +        const _patchAudioExts = new Set(['.ogg', '.opus', '.mp3', '.wav', '.aac', '.flac', '.m4a', '.oga', '.webm']);
 +        const _patchNameExt = nameHint ? path.extname(nameHint).toLowerCase() : '';
 +        if (_patchAudioExts.has(_patchNameExt)) {
 +            continue;
 +        }
 +
         const forcedTextMimeResolved = forcedTextMime ?? resolveTextMimeFromName(nameHint ?? "");
         const utf16Charset = resolveUtf16Charset(bufferResult?.buffer);
         const textSample = decodeTextSample(bufferResult?.buffer);
 -        const textLike = Boolean(utf16Charset) || looksLikeUtf8Text(bufferResult?.buffer);
 +        const textLike = (Boolean(utf16Charset) || looksLikeUtf8Text(bufferResult?.buffer)) &&
 +            !hasBinaryAudioMagic(bufferResult?.buffer);
         if (!forcedTextMimeResolved && kind === "audio" && !textLike) {
             continue;
         }

 # ============================================================================
 # END OF PATCH
 # ============================================================================
 # 
 # ADDITIONAL NOTES:
 # -----------------
 # - This patch is temporary and will be overwritten when you update OpenClaw
 # - Re-apply after updates until the official fix is released
 # - Tracking issues: #1989, #4197 | PRs: #3904, #4235
 # - Works for Telegram, Discord, and all voice message platforms
 #
 # ============================================================================
diff --git a/optional-apply.sh b/optional-apply.sh
 #!/bin/bash
 # OpenClaw Audio Binary Fix - Auto-apply script
 # Run this script to automatically patch your OpenClaw installation
 # This is optional as you can also just follow the comments above in the patch file. 

 set -e

 echo "======================================"
 echo "  OpenClaw Audio Binary Injection Fix"
 echo "======================================="
 echo ""

 # Find OpenClaw installation
 OPENCLAW_PATH=""

 # Check common paths
 PATHS=(
    "$HOME/.nvm/versions/node"/v*/lib/node_modules/openclaw
    /opt/homebrew/lib/node_modules/openclaw
    /usr/local/lib/node_modules/openclaw
    /usr/lib/node_modules/openclaw
 )

 for path in "${PATHS[@]}"; do
    expanded=$(echo $path 2>/dev/null | head -1)
    if [ -d "$expanded" ]; then
        OPENCLAW_PATH="$expanded"
        break
    fi
 done

 if [ -z "$OPENCLAW_PATH" ]; then
    echo "❌ Could not find OpenClaw installation."
    echo ""
    echo "Please specify the path manually:"
    echo "  bash apply.sh /path/to/openclaw"
    exit 1
 fi

 echo "✓ Found OpenClaw at: $OPENCLAW_PATH"
 echo ""

 TARGET_FILE="$OPENCLAW_PATH/dist/media-understanding/apply.js"

 if [ ! -f "$TARGET_FILE" ]; then
    echo "❌ Could not find apply.js at expected location:"
    echo "   $TARGET_FILE"
    exit 1
 fi

 # Backup original file
 BACKUP_FILE="$TARGET_FILE.backup-$(date +%Y%m%d-%H%M%S)"
 cp "$TARGET_FILE" "$BACKUP_FILE"
 echo "✓ Backed up original to: $BACKUP_FILE"
 echo ""

 # Apply patch
 echo "Applying patch..."
 patch -p1 < fix-audio-binary.patch "$TARGET_FILE"

 echo ""
 echo "✓ Patch applied successfully!"
 echo ""
 echo "Restarting OpenClaw gateway..."
 if command -v openclaw &> /dev/null; then
    openclaw gateway restart
 elif systemctl --user is-active --quiet openclaw-gateway; then
    systemctl --user restart openclaw-gateway
 else
    echo "⚠️  Please restart OpenClaw manually:"
    echo "   openclaw gateway restart"
 fi

 echo ""
 echo "✓ Done! Send a voice note to test."
Voice Length	Wasted Tokens	Effect
10-15 seconds	500-2,000	Annoying
30-60 seconds	2,000-8,000+	Session becomes unusable
2+ minutes	15,000-50,000+	Immediate crash
Before Fix	After Fix
Raw binary embedded	Only transcription text
500-8000+ tokens per voice	~500-1000 tokens (transcription only)
Session crashes after 3-5 voices	Session handles many voices normally
	# =====================================
	# OpenClaw Audio Binary Injection Fix
	# =====================================
	#
	# WHAT THIS FIXES:
	# ----------------
	# Voice messages (especially from Telegram) were being treated as text files
	# instead of audio. This caused the raw audio binary to be dumped into the
	# chat context, wasting thousands of tokens and breaking conversations.
	#
	# SYMPTOMS:
	# - Bot stops responding after a few voice messages
	# - Session token count explodes to 200K+ with just a few voice notes
	# - Voice messages show as "text/tab-separated-values" in logs
	#
	# THE FIX:
	# --------
	# Skips audio files from being embedded as text. Only the transcription
	# (the converted text) goes to the AI, not the raw audio file.
	#
	# APPLICABLE TO:
	# --------------
	# - OpenClaw v2026.1.30 and earlier
	# - Affects: Telegram voice messages, Discord audio, all voice notes
	#
	# ============================================================================
	# INSTALLATION INSTRUCTIONS
	# ============================================================================
	#
	# STEP 1: Find your OpenClaw installation directory
	#
	# macOS (Homebrew):
	# /opt/homebrew/lib/node_modules/openclaw/
	#
	# Linux (nvm):
	# ~/.nvm/versions/node/v22.22.0/lib/node_modules/openclaw/
	#
	# Linux (npm global):
	# /usr/lib/node_modules/openclaw/
	#
	# Windows:
	# %APPDATA%\npm\node_modules\openclaw\
	#
	# STEP 2: Apply this patch
	#
	# From this directory, run:
	#
	# patch -p1 < fix-audio-binary.patch /path/to/openclaw/dist/media-understanding/apply.js
	#
	# Or edit the file manually:
	# - Open: dist/media-understanding/apply.js
	# - Apply the changes shown in the "Patch" section below
	#
	# STEP 3: Restart OpenClaw
	#
	# openclaw gateway restart
	#
	# VERIFICATION:
	# Send a voice note. The bot should respond normally, and you should NOT
	# see <file name="...ogg" mime="text/tab-separated-values"> in the session.
	#
	# ============================================================================
	# PATCH START - Changes to dist/media-understanding/apply.js
	# ============================================================================

	--- a/dist/media-understanding/apply.js
	+++ b/dist/media-understanding/apply.js
	@@ -97,6 +97,27 @@ function resolveUtf16Charset(buffer) {
	}
	return undefined;
	}
	+
	+/**
	+ * Detects binary audio files by their magic bytes (file signatures).
	+ * This prevents OGG/Opus and MP3 files from being misidentified as text
	+ * due to their ASCII-heavy headers passing the utf8 text check.
	+ *
	+ * OGG container: "OggS" (RFC 3533 Section 6)
	+ * MP3 with ID3v2: "ID3" (id3.org spec Section 3.1)
	+ */
	+function hasBinaryAudioMagic(buffer) {
	+ if (!buffer \|\| buffer.length < 3) {
	+ return false;
	+ }
	+ // OGG container: "OggS" signature (4 bytes)
	+ if (buffer.length >= 4 &&
	+ buffer[0] === 0x4F && // O
	+ buffer[1] === 0x67 && // g
	+ buffer[2] === 0x67 && // g
	+ buffer[3] === 0x53) { // S
	+ return true;
	+ }
	+ // MP3 with ID3v2: "ID3" signature (3 bytes)
	+ if (buffer[0] === 0x49 && // I
	+ buffer[1] === 0x44 && // D
	+ buffer[2] === 0x33) { // 3
	+ return true;
	+ }
	+ return false;
	+}
	+
	function looksLikeUtf8Text(buffer) {
	if (!buffer \|\| buffer.length === 0) {
	return false;
	@@ -177,7 +198,7 @@ async function extractFileBlocks(params) {
	}
	const forcedTextMime = resolveTextMimeFromName(attachment.path ?? attachment.url ?? "");
	const kind = forcedTextMime ? "document" : resolveAttachmentKind(attachment);
	- if (!forcedTextMime && (kind === "image" \|\| kind === "video")) {
	+ if (!forcedTextMime && (kind === "image" \|\| kind === "audio" \|\| kind === "video")) {
	continue;
	}
	if (!limits.allowUrl && attachment.url && !attachment.path) {
	@@ -199,6 +220,17 @@ async function extractFileBlocks(params) {
	continue;
	}
	const nameHint = bufferResult?.fileName ?? attachment.path ?? attachment.url;
	+
	+ // PATCH: Skip audio files by extension (defense-in-depth)
	+ // Even if kind detection fails, audio extensions should be skipped
	+ // to prevent OGG/Opus binary from being embedded as text.
	+ const _patchAudioExts = new Set(['.ogg', '.opus', '.mp3', '.wav', '.aac', '.flac', '.m4a', '.oga', '.webm']);
	+ const _patchNameExt = nameHint ? path.extname(nameHint).toLowerCase() : '';
	+ if (_patchAudioExts.has(_patchNameExt)) {
	+ continue;
	+ }
	+
	const forcedTextMimeResolved = forcedTextMime ?? resolveTextMimeFromName(nameHint ?? "");
	const utf16Charset = resolveUtf16Charset(bufferResult?.buffer);
	const textSample = decodeTextSample(bufferResult?.buffer);
	- const textLike = Boolean(utf16Charset) \|\| looksLikeUtf8Text(bufferResult?.buffer);
	+ const textLike = (Boolean(utf16Charset) \|\| looksLikeUtf8Text(bufferResult?.buffer)) &&
	+ !hasBinaryAudioMagic(bufferResult?.buffer);
	if (!forcedTextMimeResolved && kind === "audio" && !textLike) {
	continue;
	}

	# ============================================================================
	# END OF PATCH
	# ============================================================================
	#
	# ADDITIONAL NOTES:
	# -----------------
	# - This patch is temporary and will be overwritten when you update OpenClaw
	# - Re-apply after updates until the official fix is released
	# - Tracking issues: #1989, #4197 \| PRs: #3904, #4235
	# - Works for Telegram, Discord, and all voice message platforms
	#
	# ============================================================================
	#!/bin/bash
	# OpenClaw Audio Binary Fix - Auto-apply script
	# Run this script to automatically patch your OpenClaw installation
	# This is optional as you can also just follow the comments above in the patch file.

	set -e

	echo "======================================"
	echo " OpenClaw Audio Binary Injection Fix"
	echo "======================================="
	echo ""

	# Find OpenClaw installation
	OPENCLAW_PATH=""

	# Check common paths
	PATHS=(
	"$HOME/.nvm/versions/node"/v*/lib/node_modules/openclaw
	/opt/homebrew/lib/node_modules/openclaw
	/usr/local/lib/node_modules/openclaw
	/usr/lib/node_modules/openclaw
	)

	for path in "${PATHS[@]}"; do
	expanded=$(echo $path 2>/dev/null \| head -1)
	if [ -d "$expanded" ]; then
	OPENCLAW_PATH="$expanded"
	break
	fi
	done

	if [ -z "$OPENCLAW_PATH" ]; then
	echo "❌ Could not find OpenClaw installation."
	echo ""
	echo "Please specify the path manually:"
	echo " bash apply.sh /path/to/openclaw"
	exit 1
	fi

	echo "✓ Found OpenClaw at: $OPENCLAW_PATH"
	echo ""

	TARGET_FILE="$OPENCLAW_PATH/dist/media-understanding/apply.js"

	if [ ! -f "$TARGET_FILE" ]; then
	echo "❌ Could not find apply.js at expected location:"
	echo " $TARGET_FILE"
	exit 1
	fi

	# Backup original file
	BACKUP_FILE="$TARGET_FILE.backup-$(date +%Y%m%d-%H%M%S)"
	cp "$TARGET_FILE" "$BACKUP_FILE"
	echo "✓ Backed up original to: $BACKUP_FILE"
	echo ""

	# Apply patch
	echo "Applying patch..."
	patch -p1 < fix-audio-binary.patch "$TARGET_FILE"

	echo ""
	echo "✓ Patch applied successfully!"
	echo ""
	echo "Restarting OpenClaw gateway..."
	if command -v openclaw &> /dev/null; then
	openclaw gateway restart
	elif systemctl --user is-active --quiet openclaw-gateway; then
	systemctl --user restart openclaw-gateway
	else
	echo "⚠️ Please restart OpenClaw manually:"
	echo " openclaw gateway restart"
	fi

	echo ""
	echo "✓ Done! Send a voice note to test."