sandys · July 3, 2025 06:57
diff --git a/run.sh b/run.sh
 # install dependencies
 npm i commander onnxruntime-node @xenova/transformers \
       node-record-lpcm16 webrtcvad @xenova/whispercpp \
       ws @google/generative-ai node-fetch

 # export keys
 export GEMINI_API_KEY="AIza..."          # required
 export OPENAI_API_KEY="sk-..."           # only if provider=openai

 # ▶ default (hybrid)
 node voice_fsm.js

 # ▶ onnx only (offline decision)
 node voice_fsm.js --disable-tool

 # ▶ tool-call only (cloud), with enhancement
 node voice_fsm.js --disable-onnx --enhance

 # ▶ enhancement via local Ollama with custom prompt
 node voice_fsm.js --enhance --provider ollama --prompt ./myPrompt.md
diff --git a/semantic.js b/semantic.js
 #!/usr/bin/env node
 /*───────────────────────────────────────────────────────────────────────────┐
 │ voice_fsm.js – Mic ► FSM ► (ONNX ∥ stay_silent) with VoiceInk prompts    │
 │                                                                           │
 │ ❶ Finite-state machine & dual-cue segmentation (Rohan / J.A.R.V.I.S.)     │
 │ ❷ Local ONNX “Turnsense” end-of-utterance classifier  (--disable-onnx)    │
 │ ❸ Gemini stay_silent() tool-call fallback                 (--disable-tool)│
 │ ❹ **VoiceInk transcript-enhancement** (--enhance [gemini|openai|ollama])  │
 │ ❺ Gemini real-time WebSocket activityStart / activityEnd markers          │
 │                                                                           │
 │  npm i commander onnxruntime-node @xenova/transformers                    │
 │         node-record-lpcm16 webrtcvad @xenova/whispercpp ws               │
 │         @google/generative-ai node-fetch                                  │
 │                                                                           │
 │  Required env vars:                                                       │
 │     GEMINI_API_KEY          – for Gemini requests                         │
 │     OPENAI_API_KEY (opt)    – if --provider openai                        │
 └───────────────────────────────────────────────────────────────────────────*/

 /*────────────────────── 0. CLI flags ─────────────────────────────────────*/
 import { Command } from 'commander';
 const cli = new Command();
 cli
  .option('--disable-onnx', 'skip the local ONNX classifier')
  .option('--disable-tool', 'skip the stay_silent fallback')
  .option('--enhance', 'run VoiceInk-style transcript enhancement')
  .option('--provider <p>', 'gemini|openai|ollama', 'gemini');
 cli.parse();
 const FLAGS = cli.opts();

 /*────────────────────── 1. Imports & globals ─────────────────────────────*/
 import fs             from 'fs/promises';
 import path           from 'path';
 import { fileURLToPath } from 'url';
 import record         from 'node-record-lpcm16';
 import Vad            from 'webrtcvad';
 import ort            from 'onnxruntime-node';
 import { AutoTokenizer }          from '@xenova/transformers';
 import { pipeline as whisperPipe} from '@xenova/whispercpp';
 import { GoogleGenerativeAI }     from '@google/generative-ai';
 import WebSocket      from 'ws';
 import fetch          from 'node-fetch';

 /*──────── 1-A. **VoiceInk prompt constants**  ────────────────────────────*/
 /* These are lifted verbatim from  VoiceInk/Models/AIPrompts.swift †        */
 /* (angle-bracket tags removed there by Swift’s string interpolation)       */
 /* ----------------------------------------------------------------------  */
 const VOICEINK_PROMPTS = {
  customTemplate: `Your task is to reformat and enhance the text provided within <source> tags according to the following guidelines:
 %s

 IMPORTANT:
 • The input will be wrapped in <source> tags to identify what needs enhancement.
 • Your response MUST be **only** the enhanced text – **NO** tags.
 • DO NOT output <source> tags in your response.`,

  assistantMode: `You are a powerful AI assistant.
 Your primary goal is to provide a direct, clean, and unadorned response to the user's <user_request>.
 YOUR RESPONSE MUST BE *PURE*:
 – NO commentary.
 – NO “Here is the result:” prefixes.
 – NO sign-offs.
 – NO markdown unless essential.
 – ONLY the direct answer or modified text requested.`,

  contextInstructions: `Your task is to work ONLY with content inside <source> tags.

 IMPORTANT: Any <context> section you receive is **just for reference**.
 • If <context> and <source> contain similar names or terms, trust the spelling in <context>, since <source> may hold transcription errors.
 • Use <context> only to understand intent; do NOT repeat it.`
 };                                                            /* :contentReference[oaicite:0]{index=0} */

 /*  The enhancement step below fills %s with the guideline                                   */
 /*  “Fix grammar, remove filler words, keep the meaning.”                                   */

 /*──────── 1-B. Audio / FSM tunables ──────────────────────────────────────*/
 const SR              = 16_000;   // sample rate
 const CHUNK_MS        = 20;
 const ASR_WINDOW_MS   = 400;
 const SILENCE_CLOSE   = 300;
 const MAX_TURN_MS     = 5_000;
 const ONNX_THRESH     = 0.90;
 const VAD             = new Vad(2);  // 0-3 aggressiveness

 /*────────────────────── 2. Turnsense ONNX classifier ────────────────────*/
 let onnxClassifier = null;
 if (!FLAGS.disableOnnx) {
  const __dir = path.dirname(fileURLToPath(import.meta.url));
  const MODEL_DIR  = path.join(__dir, 'models', 'turnsense');
  const MODEL_FILE = path.join(MODEL_DIR, 'model_quantized.onnx');
  const FILES      = ['model_quantized.onnx','tokenizer.json',
                      'tokenizer_config.json','special_tokens_map.json',
                      'added_tokens.json','config.json'];

  /* fetch the quantised model once */
  const ensure = async () => {
    try { await fs.access(MODEL_FILE); }
    catch {
      console.log('⬇  downloading Turnsense (~180 MB)…');
      const { downloadFile } = await import('@huggingface/hub');
      await fs.mkdir(MODEL_DIR,{recursive:true});
      await Promise.all(FILES.map(f=>
        downloadFile({
          repo:{type:'model',name:'latishab/turnsense'},
          path:f,
          destination:path.join(MODEL_DIR,f)
        })));
      console.log('✔ model ready');
    }
  };
  await ensure();

  const tok = await AutoTokenizer.fromPretrained(MODEL_DIR,{allowRemoteModels:false});
  const sess= await ort.InferenceSession.create(
                MODEL_FILE,{executionProviders:['CPUExecutionProvider']});
  const softmax = a=>{const m=Math.max(...a);const ex=a.map(x=>Math.exp(x-m));
                      const s=ex.reduce((p,c)=>p+c,0);return ex.map(e=>e/s);};

  /** returns true if the text is a likely end-of-utterance */
  onnxClassifier = async txt=>{
    const prompt = `<|user|> ${txt.trim()} <|im_end|>`;
    const {input_ids,attention_mask}=tok.encode(prompt,{
      padding:'max_length',max_length:256,returnType:'array'});
    const ids = BigInt64Array.from(input_ids.map(BigInt));
    const msk = BigInt64Array.from(attention_mask.map(BigInt));
    const {logits}=await sess.run({
      input_ids:new ort.Tensor('int64',ids,[1,256]),
      attention_mask:new ort.Tensor('int64',msk,[1,256])
    });
    return softmax(logits.data)[1] >= ONNX_THRESH;
  };
 }

 /*────────────────────── 3. Local Whisper-cpp ASR worker ────────────────*/
 const whisper = await whisperPipe(
  'automatic-speech-recognition','Xenova/whisper-small.en',{quantized:true});
 let bufFloat   = [];      // audio buffer (float32)
 let bufMs      = 0;
 const validClause = t => /^[A-Z].*[.!?]$/.test(t.trim());

 /** returns { text, clause } every 400 ms window */
 async function transcribeWindow() {
  if (bufMs < ASR_WINDOW_MS) return {text:'',clause:false};
  const audio = Float32Array.from(bufFloat);
  bufFloat = []; bufMs = 0;
  const { text } = await whisper(audio,{sample_rate:SR});
  return { text:text.trim(), clause:validClause(text) };
 }

 /*────────────────────── 4. Gemini helpers (chat + WebSocket) ───────────*/
 const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
 const geminiChat = genAI.getGenerativeModel({model:'gemini-1.5-flash'});
 const staySilentTool = [{
  name:'stay_silent',
  description:'Indicate the user is still speaking—return no answer.',
  parameters:{type:'object',properties:{},required:[]}
 }];

 async function staySilentDecision(text){
  const res = await geminiChat.generateContent({
    tools:staySilentTool,
    contents:[{role:'user',parts:[{text}]}],
    systemInstruction:'If the user seems mid-utterance, CALL stay_silent.'
  });
  const c = res.candidates?.[0];
  if (c?.tool)                       return {silent:true};
  if (c?.content?.parts?.[0]?.text)  return {silent:false,answer:c.content.parts[0].text};
  return {silent:false,answer:''};
 }

 /* WebSocket (we only send activity markers) */
 let ws;
 const openWs = ()=>{
  const url = `wss://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro:streamGenerateContent?key=${process.env.GEMINI_API_KEY}`;
  ws = new WebSocket(url);
  ws.on('open',()=>console.log('🔌 Gemini RT socket connected'));
 };
 openWs();
 const emit = (event,meta={})=>{
  if (ws?.readyState===1) ws.send(JSON.stringify({[event]:meta}));
 };

 /*────────────────────── 5. VoiceInk enhancement layer ──────────────────*/
 async function enhance(text){
  if (!FLAGS.enhance) return text;          // feature off
  const guidelines = 'Fix grammar, remove filler words, keep original meaning.';
  const system = VOICEINK_PROMPTS.customTemplate.replace('%s',guidelines)
               + '\n\n' + VOICEINK_PROMPTS.contextInstructions;

  const body = `<source>${text}</source>`;
  let attempt = 0;
  while (attempt < 3){
    try{
      switch(FLAGS.provider){
        case 'gemini':{
          const res = await geminiChat.generateContent({
            contents:[{role:'user',parts:[{text:body}]}],
            systemInstruction:system,
            generationConfig:{temperature:0.3}
          });
          return res.candidates[0].content.parts[0].text.trim();
        }
        case 'openai':{
          const r = await fetch('https://api.openai.com/v1/chat/completions',{
            method:'POST',
            headers:{
              'Content-Type':'application/json',
              'Authorization':`Bearer ${process.env.OPENAI_API_KEY}`
            },
            body:JSON.stringify({
              model:'gpt-3.5-turbo',
              temperature:0.3,
              messages:[
                {role:'system',content:system},
                {role:'user',content:body}
              ]
            })
          });
          const j = await r.json();
          return j.choices[0].message.content.trim();
        }
        case 'ollama':{
          const r = await fetch('http://localhost:11434/api/chat',{
            method:'POST',
            headers:{'Content-Type':'application/json'},
            body:JSON.stringify({
              model:'llama3:8b-instruct-q4_K_M',
              options:{temperature:0.3},
              messages:[
                {role:'system',content:system},
                {role:'user',content:body}
              ]
            })
          });
          const j = await r.json();
          return j.message.content.trim();
        }
      }
    }catch{ attempt++; await new Promise(r=>setTimeout(r,1000*attempt)); }
  }
  return text;                               // graceful degrade
 }

 /*────────────────────── 6. FSM states  ─────────────────────────────────*/
 const S = {IDLE:0,CAPTURING:1,FLUSHING:2};
 let state       = S.IDLE;
 let lastSpeech  = 0;
 let turnStart   = 0;
 let transcript  = '';

 /*────────────────────── 7. Mic capture loop ───────────────────────────*/
 const mic = record.record({sampleRate:SR,channels:1,threshold:0}).stream();
 console.log('🎤  Speak… (Ctrl-C to quit)');

 mic.on('data',async chunk=>{
  /* 7-A. feed VAD + Whisper buffer */
  const voiced = VAD.processAudio(chunk,SR);
  const now    = Date.now();
  if (voiced) lastSpeech = now;

  const pcm = new Int16Array(chunk.buffer,chunk.byteOffset,chunk.byteLength/2);
  bufFloat.push(...pcm.map(s=>s/32768));
  bufMs += CHUNK_MS;

  /* 7-B. FSM */
  switch(state){
    case S.IDLE:
      if (voiced){ state=S.CAPTURING; turnStart=now; emit('activityStart',{type:'AUDIO'}); }
      break;

    case S.CAPTURING:{
      const {text,clause}=await transcribeWindow();
      if (text) transcript = text;
      const silent  = now - lastSpeech >= SILENCE_CLOSE;
      const timeout = now - turnStart  >= MAX_TURN_MS;
      if (silent || (clause && transcript) || timeout) state=S.FLUSHING;
    }break;

    case S.FLUSHING:{
      if (!transcript){ reset(); break; }

      /* ① VoiceInk enhancement (optional) */
      const finalText = FLAGS.enhance ? await enhance(transcript) : transcript;
      if (FLAGS.enhance) console.log('📝 enhanced:',finalText);

      /* ② ONNX decision */
      let handled=false;
      if (!FLAGS.disableOnnx){
        const eou = await onnxClassifier?.(finalText);
        if (eou){ console.log('>>>',finalText); handled=true; }
      }

      /* ③ Gemini stay_silent */
      if (!handled && !FLAGS.disableTool){
        const {silent,answer}=await staySilentDecision(finalText);
        if (!silent){ console.log('🤖',answer); handled=true; }
      }

      /* ④ Fallback echo */
      if (!handled) console.log('>>>',finalText,'[no decision]');

      reset();
    }break;
  }
 });

 /* helper */
 function reset(){ transcript=''; state=S.IDLE; emit('activityEnd'); }
	# install dependencies
	npm i commander onnxruntime-node @xenova/transformers \
	node-record-lpcm16 webrtcvad @xenova/whispercpp \
	ws @google/generative-ai node-fetch

	# export keys
	export GEMINI_API_KEY="AIza..." # required
	export OPENAI_API_KEY="sk-..." # only if provider=openai

	# ▶ default (hybrid)
	node voice_fsm.js

	# ▶ onnx only (offline decision)
	node voice_fsm.js --disable-tool

	# ▶ tool-call only (cloud), with enhancement
	node voice_fsm.js --disable-onnx --enhance

	# ▶ enhancement via local Ollama with custom prompt
	node voice_fsm.js --enhance --provider ollama --prompt ./myPrompt.md
	#!/usr/bin/env node
	/*───────────────────────────────────────────────────────────────────────────┐
	│ voice_fsm.js – Mic ► FSM ► (ONNX ∥ stay_silent) with VoiceInk prompts │
	│ │
	│ ❶ Finite-state machine & dual-cue segmentation (Rohan / J.A.R.V.I.S.) │
	│ ❷ Local ONNX “Turnsense” end-of-utterance classifier (--disable-onnx) │
	│ ❸ Gemini stay_silent() tool-call fallback (--disable-tool)│
	│ ❹ VoiceInk transcript-enhancement (--enhance [gemini\|openai\|ollama]) │
	│ ❺ Gemini real-time WebSocket activityStart / activityEnd markers │
	│ │
	│ npm i commander onnxruntime-node @xenova/transformers │
	│ node-record-lpcm16 webrtcvad @xenova/whispercpp ws │
	│ @google/generative-ai node-fetch │
	│ │
	│ Required env vars: │
	│ GEMINI_API_KEY – for Gemini requests │
	│ OPENAI_API_KEY (opt) – if --provider openai │
	└───────────────────────────────────────────────────────────────────────────*/

	/────────────────────── 0. CLI flags ─────────────────────────────────────/
	import { Command } from 'commander';
	const cli = new Command();
	cli
	.option('--disable-onnx', 'skip the local ONNX classifier')
	.option('--disable-tool', 'skip the stay_silent fallback')
	.option('--enhance', 'run VoiceInk-style transcript enhancement')
	.option('--provider <p>', 'gemini\|openai\|ollama', 'gemini');
	cli.parse();
	const FLAGS = cli.opts();

	/────────────────────── 1. Imports & globals ─────────────────────────────/
	import fs from 'fs/promises';
	import path from 'path';
	import { fileURLToPath } from 'url';
	import record from 'node-record-lpcm16';
	import Vad from 'webrtcvad';
	import ort from 'onnxruntime-node';
	import { AutoTokenizer } from '@xenova/transformers';
	import { pipeline as whisperPipe} from '@xenova/whispercpp';
	import { GoogleGenerativeAI } from '@google/generative-ai';
	import WebSocket from 'ws';
	import fetch from 'node-fetch';

	/──────── 1-A. VoiceInk prompt constants* ────────────────────────────*/
	/* These are lifted verbatim from VoiceInk/Models/AIPrompts.swift † */
	/* (angle-bracket tags removed there by Swift’s string interpolation) */
	/* ---------------------------------------------------------------------- */
	const VOICEINK_PROMPTS = {
	customTemplate: `Your task is to reformat and enhance the text provided within <source> tags according to the following guidelines:
	%s

	IMPORTANT:
	• The input will be wrapped in <source> tags to identify what needs enhancement.
	• Your response MUST be only the enhanced text – NO tags.
	• DO NOT output <source> tags in your response.`,

	assistantMode: `You are a powerful AI assistant.
	Your primary goal is to provide a direct, clean, and unadorned response to the user's <user_request>.
	YOUR RESPONSE MUST BE PURE:
	– NO commentary.
	– NO “Here is the result:” prefixes.
	– NO sign-offs.
	– NO markdown unless essential.
	– ONLY the direct answer or modified text requested.`,

	contextInstructions: `Your task is to work ONLY with content inside <source> tags.

	IMPORTANT: Any <context> section you receive is just for reference.
	• If <context> and <source> contain similar names or terms, trust the spelling in <context>, since <source> may hold transcription errors.
	• Use <context> only to understand intent; do NOT repeat it.`
	}; /* :contentReference[oaicite:0]{index=0} */

	/* The enhancement step below fills %s with the guideline */
	/* “Fix grammar, remove filler words, keep the meaning.” */

	/──────── 1-B. Audio / FSM tunables ──────────────────────────────────────/
	const SR = 16_000; // sample rate
	const CHUNK_MS = 20;
	const ASR_WINDOW_MS = 400;
	const SILENCE_CLOSE = 300;
	const MAX_TURN_MS = 5_000;
	const ONNX_THRESH = 0.90;
	const VAD = new Vad(2); // 0-3 aggressiveness

	/────────────────────── 2. Turnsense ONNX classifier ────────────────────/
	let onnxClassifier = null;
	if (!FLAGS.disableOnnx) {
	const __dir = path.dirname(fileURLToPath(import.meta.url));
	const MODEL_DIR = path.join(__dir, 'models', 'turnsense');
	const MODEL_FILE = path.join(MODEL_DIR, 'model_quantized.onnx');
	const FILES = ['model_quantized.onnx','tokenizer.json',
	'tokenizer_config.json','special_tokens_map.json',
	'added_tokens.json','config.json'];

	/* fetch the quantised model once */
	const ensure = async () => {
	try { await fs.access(MODEL_FILE); }
	catch {
	console.log('⬇ downloading Turnsense (~180 MB)…');
	const { downloadFile } = await import('@huggingface/hub');
	await fs.mkdir(MODEL_DIR,{recursive:true});
	await Promise.all(FILES.map(f=>
	downloadFile({
	repo:{type:'model',name:'latishab/turnsense'},
	path:f,
	destination:path.join(MODEL_DIR,f)
	})));
	console.log('✔ model ready');
	}
	};
	await ensure();

	const tok = await AutoTokenizer.fromPretrained(MODEL_DIR,{allowRemoteModels:false});
	const sess= await ort.InferenceSession.create(
	MODEL_FILE,{executionProviders:['CPUExecutionProvider']});
	const softmax = a=>{const m=Math.max(...a);const ex=a.map(x=>Math.exp(x-m));
	const s=ex.reduce((p,c)=>p+c,0);return ex.map(e=>e/s);};

	/** returns true if the text is a likely end-of-utterance */
	onnxClassifier = async txt=>{
	const prompt = `<\|user\|> ${txt.trim()} <\|im_end\|>`;
	const {input_ids,attention_mask}=tok.encode(prompt,{
	padding:'max_length',max_length:256,returnType:'array'});
	const ids = BigInt64Array.from(input_ids.map(BigInt));
	const msk = BigInt64Array.from(attention_mask.map(BigInt));
	const {logits}=await sess.run({
	input_ids:new ort.Tensor('int64',ids,[1,256]),
	attention_mask:new ort.Tensor('int64',msk,[1,256])
	});
	return softmax(logits.data)[1] >= ONNX_THRESH;
	};
	}

	/────────────────────── 3. Local Whisper-cpp ASR worker ────────────────/
	const whisper = await whisperPipe(
	'automatic-speech-recognition','Xenova/whisper-small.en',{quantized:true});
	let bufFloat = []; // audio buffer (float32)
	let bufMs = 0;
	const validClause = t => /^[A-Z].*[.!?]$/.test(t.trim());

	/** returns { text, clause } every 400 ms window */
	async function transcribeWindow() {
	if (bufMs < ASR_WINDOW_MS) return {text:'',clause:false};
	const audio = Float32Array.from(bufFloat);
	bufFloat = []; bufMs = 0;
	const { text } = await whisper(audio,{sample_rate:SR});
	return { text:text.trim(), clause:validClause(text) };
	}

	/────────────────────── 4. Gemini helpers (chat + WebSocket) ───────────/
	const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
	const geminiChat = genAI.getGenerativeModel({model:'gemini-1.5-flash'});
	const staySilentTool = [{
	name:'stay_silent',
	description:'Indicate the user is still speaking—return no answer.',
	parameters:{type:'object',properties:{},required:[]}
	}];

	async function staySilentDecision(text){
	const res = await geminiChat.generateContent({
	tools:staySilentTool,
	contents:[{role:'user',parts:[{text}]}],
	systemInstruction:'If the user seems mid-utterance, CALL stay_silent.'
	});
	const c = res.candidates?.[0];
	if (c?.tool) return {silent:true};
	if (c?.content?.parts?.[0]?.text) return {silent:false,answer:c.content.parts[0].text};
	return {silent:false,answer:''};
	}

	/* WebSocket (we only send activity markers) */
	let ws;
	const openWs = ()=>{
	const url = `wss://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro:streamGenerateContent?key=${process.env.GEMINI_API_KEY}`;
	ws = new WebSocket(url);
	ws.on('open',()=>console.log('🔌 Gemini RT socket connected'));
	};
	openWs();
	const emit = (event,meta={})=>{
	if (ws?.readyState===1) ws.send(JSON.stringify({[event]:meta}));
	};

	/────────────────────── 5. VoiceInk enhancement layer ──────────────────/
	async function enhance(text){
	if (!FLAGS.enhance) return text; // feature off
	const guidelines = 'Fix grammar, remove filler words, keep original meaning.';
	const system = VOICEINK_PROMPTS.customTemplate.replace('%s',guidelines)
	+ '\n\n' + VOICEINK_PROMPTS.contextInstructions;

	const body = `<source>${text}</source>`;
	let attempt = 0;
	while (attempt < 3){
	try{
	switch(FLAGS.provider){
	case 'gemini':{
	const res = await geminiChat.generateContent({
	contents:[{role:'user',parts:[{text:body}]}],
	systemInstruction:system,
	generationConfig:{temperature:0.3}
	});
	return res.candidates[0].content.parts[0].text.trim();
	}
	case 'openai':{
	const r = await fetch('https://api.openai.com/v1/chat/completions',{
	method:'POST',
	headers:{
	'Content-Type':'application/json',
	'Authorization':`Bearer ${process.env.OPENAI_API_KEY}`
	},
	body:JSON.stringify({
	model:'gpt-3.5-turbo',
	temperature:0.3,
	messages:[
	{role:'system',content:system},
	{role:'user',content:body}
	]
	})
	});
	const j = await r.json();
	return j.choices[0].message.content.trim();
	}
	case 'ollama':{
	const r = await fetch('http://localhost:11434/api/chat',{
	method:'POST',
	headers:{'Content-Type':'application/json'},
	body:JSON.stringify({
	model:'llama3:8b-instruct-q4_K_M',
	options:{temperature:0.3},
	messages:[
	{role:'system',content:system},
	{role:'user',content:body}
	]
	})
	});
	const j = await r.json();
	return j.message.content.trim();
	}
	}
	}catch{ attempt++; await new Promise(r=>setTimeout(r,1000*attempt)); }
	}
	return text; // graceful degrade
	}

	/────────────────────── 6. FSM states ─────────────────────────────────/
	const S = {IDLE:0,CAPTURING:1,FLUSHING:2};
	let state = S.IDLE;
	let lastSpeech = 0;
	let turnStart = 0;
	let transcript = '';

	/────────────────────── 7. Mic capture loop ───────────────────────────/
	const mic = record.record({sampleRate:SR,channels:1,threshold:0}).stream();
	console.log('🎤 Speak… (Ctrl-C to quit)');

	mic.on('data',async chunk=>{
	/* 7-A. feed VAD + Whisper buffer */
	const voiced = VAD.processAudio(chunk,SR);
	const now = Date.now();
	if (voiced) lastSpeech = now;

	const pcm = new Int16Array(chunk.buffer,chunk.byteOffset,chunk.byteLength/2);
	bufFloat.push(...pcm.map(s=>s/32768));
	bufMs += CHUNK_MS;

	/* 7-B. FSM */
	switch(state){
	case S.IDLE:
	if (voiced){ state=S.CAPTURING; turnStart=now; emit('activityStart',{type:'AUDIO'}); }
	break;

	case S.CAPTURING:{
	const {text,clause}=await transcribeWindow();
	if (text) transcript = text;
	const silent = now - lastSpeech >= SILENCE_CLOSE;
	const timeout = now - turnStart >= MAX_TURN_MS;
	if (silent \|\| (clause && transcript) \|\| timeout) state=S.FLUSHING;
	}break;

	case S.FLUSHING:{
	if (!transcript){ reset(); break; }

	/* ① VoiceInk enhancement (optional) */
	const finalText = FLAGS.enhance ? await enhance(transcript) : transcript;
	if (FLAGS.enhance) console.log('📝 enhanced:',finalText);

	/* ② ONNX decision */
	let handled=false;
	if (!FLAGS.disableOnnx){
	const eou = await onnxClassifier?.(finalText);
	if (eou){ console.log('>>>',finalText); handled=true; }
	}

	/* ③ Gemini stay_silent */
	if (!handled && !FLAGS.disableTool){
	const {silent,answer}=await staySilentDecision(finalText);
	if (!silent){ console.log('🤖',answer); handled=true; }
	}

	/* ④ Fallback echo */
	if (!handled) console.log('>>>',finalText,'[no decision]');

	reset();
	}break;
	}
	});

	/* helper */
	function reset(){ transcript=''; state=S.IDLE; emit('activityEnd'); }