Last active
November 30, 2025 22:21
-
-
Save christopherwoodall/5f0d0af900b184e3ba965ca479e1b563 to your computer and use it in GitHub Desktop.
arXiv TTS UserScript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // ==UserScript== | |
| // @name ArXiv HTML TTS Reader (Refactored) | |
| // @namespace http://tampermonkey.net/ | |
| // @version 2.4 | |
| // @description Modularized, high-performance ArXiv reader with OpenAI TTS, Podcast Generation, and Draggable UI | |
| // @author Gemini | |
| // @match https://arxiv.org/html/* | |
| // @grant none | |
| // ==/UserScript== | |
| (function() { | |
| 'use strict'; | |
| // ========================================== | |
| // 1. CONSTANTS & CONFIGURATION | |
| // ========================================== | |
| const CONSTANTS = { | |
| DEFAULT_PROMPT: `You are a science communicator creating a podcast script. | |
| Summarize the following academic paper as an engaging conversation between two hosts: "Host" (enthusiastic, explains context) and "Guest" (curious, asks clarifying questions, expert details). | |
| STRICT FORMAT: | |
| Host: [Text] | |
| Guest: [Text] | |
| Host: [Text] | |
| ... | |
| Keep it under 800 words. Focus on the main contribution, the "why it matters", and the results. | |
| Avoid latex symbols. Use plain English text for math concepts.`, | |
| PRICING: { | |
| llm: { | |
| 'google/gemini-2.0-flash-001': { in: 0.10, out: 0.40 }, | |
| 'anthropic/claude-3.5-haiku': { in: 0.80, out: 4.00 }, | |
| 'openai/gpt-4o-mini': { in: 0.15, out: 0.60 } | |
| }, | |
| tts: { | |
| 'tts-1': 15.00 | |
| } | |
| }, | |
| SELECTORS: ['h1', 'h2', 'h3', 'h4', 'p', 'li', '.ltx_title', '.ltx_abstract'], | |
| STYLES: ` | |
| #arxiv-tts-widget { position: fixed; bottom: 20px; left: 20px; width: 320px; background: #fff; border: 1px solid #ccc; box-shadow: 0 4px 12px rgba(0,0,0,0.15); border-radius: 8px; padding: 10px; z-index: 9999; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; font-size: 13px; color: #333; transition: opacity 0.2s ease; } | |
| #tts-drag-handle { height: 14px; background: #f5f5f5; border-radius: 6px 6px 0 0; cursor: move; margin: -10px -10px 8px -10px; border-bottom: 1px solid #e0e0e0; display: flex; justify-content: center; align-items: center; } | |
| #tts-drag-handle:hover { background: #eeeeee; } | |
| .tts-grip-line { width: 40px; height: 3px; background: #d0d0d0; border-radius: 2px; } | |
| .tts-header-row { display: flex; justify-content: space-around; margin-bottom: 10px; border-bottom: 1px solid #eee; padding-bottom: 5px; } | |
| .tts-tab { cursor: pointer; padding: 5px 10px; font-weight: bold; color: #888; font-size: 12px; flex: 1; text-align: center; } | |
| .tts-tab:hover { background: #f9f9f9; border-radius:4px; } | |
| .tts-tab.active { color: #b31b1b; border-bottom: 2px solid #b31b1b; } | |
| .tts-main-row { display: flex; gap: 5px; margin-bottom: 10px; } | |
| .tts-main-btn { background-color: #b31b1b; color: white; border: none; padding: 8px 15px; border-radius: 4px; font-weight: bold; cursor: pointer; flex-grow: 1; display: flex; align-items: center; justify-content: center; gap: 8px; } | |
| .tts-main-btn:disabled { background-color: #ccc; cursor: not-allowed; } | |
| .tts-main-btn:hover:not(:disabled) { background-color: #9e1717; } | |
| #arxiv-tts-controls-expanded { margin-top: 5px; padding-top: 5px; } | |
| .tts-sub-controls { display: flex; gap: 5px; margin-bottom: 10px; } | |
| .tts-btn { flex: 1; padding: 5px; cursor: pointer; background: #f0f0f0; border: 1px solid #ddd; border-radius: 4px; text-align: center; user-select: none; font-size: 12px; } | |
| .tts-btn:hover { background: #e0e0e0; } | |
| .tts-select, .tts-input { width: 100%; margin-bottom: 5px; padding: 6px; border: 1px solid #ddd; border-radius: 4px; font-size: 12px; box-sizing: border-box; } | |
| .tts-textarea { width: 100%; height: 100px; margin-bottom: 10px; padding: 6px; border: 1px solid #ddd; border-radius: 4px; font-size: 11px; font-family: monospace; box-sizing: border-box; resize: vertical; } | |
| .tts-label { font-size: 10px; font-weight: bold; color: #666; display: block; margin-bottom: 2px; margin-top: 8px; } | |
| .tts-slider-container { display: flex; align-items: center; gap: 10px; margin-top: 5px; margin-bottom: 5px; background: #f9f9f9; padding: 5px; border-radius: 4px; } | |
| .podcast-script-preview { max-height: 80px; overflow-y: auto; background: #f4f4f4; padding: 5px; font-size: 10px; border: 1px solid #ddd; margin-bottom: 5px; white-space: pre-wrap; display: none; } | |
| .podcast-status { font-size: 11px; color: #b31b1b; text-align: center; margin-top: 5px; font-weight: bold; } | |
| .podcast-casting { display: flex; gap: 5px; } | |
| .cost-badge { font-size: 10px; background: #e8f5e9; color: #2e7d32; padding: 2px 6px; border-radius: 4px; border: 1px solid #c8e6c9; white-space: nowrap; } | |
| .reader-cost-est { font-size: 10px; color: #666; margin-left: auto; align-self: center; } | |
| .active-read-block { background-color: rgba(255, 235, 59, 0.2) !important; border-left: 4px solid #b31b1b !important; padding-left: 4px !important; transition: all 0.3s ease; } | |
| ::-webkit-scrollbar { width: 6px; height: 6px; } | |
| ::-webkit-scrollbar-track { background: #f1f1f1; } | |
| ::-webkit-scrollbar-thumb { background: #ccc; border-radius: 3px; } | |
| ::-webkit-scrollbar-thumb:hover { background: #bbb; } | |
| ` | |
| }; | |
| /** | |
| * Centralized configuration manager. | |
| * Encapsulates all LocalStorage operations. | |
| */ | |
| class ConfigManager { | |
| constructor() { | |
| this.keys = { | |
| rate: 'arxiv_tts_rate', | |
| openAIKey: 'arxiv_tts_openai_key', | |
| openRouterKey: 'arxiv_tts_openrouter_key', | |
| // Voice Settings | |
| readerNativeVoice: 'arxiv_tts_reader_native_voice', | |
| openAIVoice: 'arxiv_tts_openai_voice', | |
| podcastHost: 'arxiv_tts_podcast_host', | |
| podcastGuest: 'arxiv_tts_podcast_guest', | |
| // Podcast Config | |
| podcastModel: 'arxiv_tts_podcast_model', | |
| podcastPrompt: 'arxiv_tts_prompt' | |
| }; | |
| } | |
| get(key, defaultVal) { | |
| return localStorage.getItem(this.keys[key]) || defaultVal; | |
| } | |
| set(key, val) { | |
| localStorage.setItem(this.keys[key], val); | |
| } | |
| get settings() { | |
| return { | |
| rate: parseFloat(this.get('rate', '1.0')), | |
| apiKey: this.get('openAIKey', ''), | |
| openRouterKey: this.get('openRouterKey', ''), | |
| readerNativeVoice: this.get('readerNativeVoice', ''), | |
| openAIVoice: this.get('openAIVoice', 'alloy'), | |
| podcastHost: this.get('podcastHost', 'onyx'), | |
| podcastGuest: this.get('podcastGuest', 'nova'), | |
| podcastModel: this.get('podcastModel', 'google/gemini-2.0-flash-001'), | |
| podcastPrompt: this.get('podcastPrompt', CONSTANTS.DEFAULT_PROMPT) | |
| }; | |
| } | |
| } | |
| // ========================================== | |
| // 2. DOM & PARSING SERVICES | |
| // ========================================== | |
| class ScraperService { | |
| static scan() { | |
| const container = document.querySelector('.ltx_page_content') || | |
| document.querySelector('.ltx_page_main') || | |
| document.body; | |
| const nodeList = container.querySelectorAll(CONSTANTS.SELECTORS.join(',')); | |
| return Array.from(nodeList).filter(el => { | |
| const hasText = el.innerText.trim().length > 1; | |
| const isWidget = el.closest('#arxiv-tts-widget'); | |
| const isScript = ['SCRIPT', 'STYLE'].includes(el.tagName); | |
| const isMathOnly = el.classList.contains('ltx_equation') || el.classList.contains('ltx_Math'); | |
| return hasText && !isWidget && !isScript && !isMathOnly && el.offsetParent !== null; | |
| }); | |
| } | |
| static getFullText(blocks) { | |
| return blocks.map(b => b.innerText).join('\n\n').slice(0, 150000); | |
| } | |
| } | |
| class CostService { | |
| static calculate(textLength, modelName) { | |
| const inputTokens = textLength / 4; | |
| const readerCost = (textLength / 1000000) * CONSTANTS.PRICING.tts['tts-1']; | |
| // Podcast Estimates (Output ~800 words = ~1100 tokens = ~4400 chars) | |
| const outputTokens = 1100; | |
| const outputChars = 4400; | |
| const modelRates = CONSTANTS.PRICING.llm[modelName] || CONSTANTS.PRICING.llm['google/gemini-2.0-flash-001']; | |
| const ttsRate = CONSTANTS.PRICING.tts['tts-1']; | |
| const llmCost = ((inputTokens / 1000000) * modelRates.in) + ((outputTokens / 1000000) * modelRates.out); | |
| const podcastTtsCost = (outputChars / 1000000) * ttsRate; | |
| return { | |
| reader: readerCost, | |
| podcastTotal: llmCost + podcastTtsCost, | |
| details: { llmCost, podcastTtsCost } | |
| }; | |
| } | |
| } | |
| // ========================================== | |
| // 3. AUDIO ENGINE (Abstracts Native vs OpenAI) | |
| // ========================================== | |
| class AudioEngine { | |
| constructor() { | |
| this.audioElement = null; // For OpenAI | |
| this.nativeUtterance = null; // For Browser TTS | |
| this.nativeVoices = []; | |
| this.currentPlayId = 0; // Token to prevent race conditions | |
| } | |
| loadNativeVoices() { | |
| return new Promise((resolve) => { | |
| let voices = window.speechSynthesis.getVoices(); | |
| if (voices.length > 0) { | |
| this.nativeVoices = voices; | |
| resolve(voices); | |
| } else { | |
| window.speechSynthesis.onvoiceschanged = () => { | |
| this.nativeVoices = window.speechSynthesis.getVoices(); | |
| resolve(this.nativeVoices); | |
| }; | |
| } | |
| }); | |
| } | |
| stop() { | |
| this.currentPlayId++; // Invalidate pending fetches | |
| window.speechSynthesis.cancel(); | |
| if (this.audioElement) { | |
| this.audioElement.pause(); | |
| this.audioElement = null; | |
| } | |
| } | |
| // Returns a Promise that resolves when audio finishes | |
| async play(text, options) { | |
| this.stop(); // Ensure clean slate, increment ID | |
| const myPlayId = this.currentPlayId; | |
| if (options.useOpenAI && options.apiKey) { | |
| return this._playOpenAI(text, options, myPlayId); | |
| } else { | |
| return this._playNative(text, options, myPlayId); | |
| } | |
| } | |
| _playNative(text, options, playId) { | |
| return new Promise((resolve, reject) => { | |
| if (this.currentPlayId !== playId) return resolve(); // Abort if ID changed | |
| const u = new SpeechSynthesisUtterance(text); | |
| u.rate = options.rate || 1.0; | |
| if (options.nativeVoiceName) { | |
| u.voice = this.nativeVoices.find(v => v.name === options.nativeVoiceName); | |
| } | |
| u.onend = resolve; | |
| u.onerror = reject; | |
| this.nativeUtterance = u; | |
| window.speechSynthesis.speak(u); | |
| }); | |
| } | |
| async _playOpenAI(text, options, playId) { | |
| try { | |
| const response = await fetch("https://api.openai.com/v1/audio/speech", { | |
| method: "POST", | |
| headers: { | |
| "Authorization": `Bearer ${options.apiKey}`, | |
| "Content-Type": "application/json" | |
| }, | |
| body: JSON.stringify({ | |
| model: "tts-1", | |
| input: text, | |
| voice: options.aiVoice || 'alloy', | |
| speed: options.rate || 1.0 | |
| }) | |
| }); | |
| if (this.currentPlayId !== playId) return; // Abort if stopped during fetch | |
| if (!response.ok) throw new Error(`OpenAI Error: ${response.status}`); | |
| const blob = await response.blob(); | |
| const url = URL.createObjectURL(blob); | |
| if (this.currentPlayId !== playId) return; // Abort if stopped during blob processing | |
| const audio = new Audio(url); | |
| this.audioElement = audio; | |
| return new Promise((resolve, reject) => { | |
| audio.onended = () => { | |
| URL.revokeObjectURL(url); | |
| resolve(); | |
| }; | |
| audio.onerror = reject; | |
| // Final check before playing | |
| if (this.currentPlayId === playId) { | |
| audio.play(); | |
| } else { | |
| resolve(); // Pretend we finished so promise chain stops cleanly | |
| } | |
| }); | |
| } catch (e) { | |
| console.error(e); | |
| alert("Audio Generation Failed. Check API Key."); | |
| throw e; | |
| } | |
| } | |
| } | |
| // ========================================== | |
| // 4. LOGIC CONTROLLERS | |
| // ========================================== | |
| class PodcastController { | |
| constructor(config, audioEngine) { | |
| this.config = config; | |
| this.audio = audioEngine; | |
| this.queue = []; | |
| this.currentIndex = 0; | |
| this.isPlaying = false; | |
| } | |
| async generateScript(fullText, onStatusUpdate) { | |
| const settings = this.config.settings; | |
| const prompt = `${settings.podcastPrompt}\n\nPAPER CONTENT:\n${fullText}`; | |
| onStatusUpdate("Prompting LLM..."); | |
| const response = await fetch("https://openrouter.ai/api/v1/chat/completions", { | |
| method: "POST", | |
| headers: { | |
| "Authorization": `Bearer ${settings.openRouterKey}`, | |
| "Content-Type": "application/json", | |
| "HTTP-Referer": window.location.href | |
| }, | |
| body: JSON.stringify({ | |
| model: settings.podcastModel, | |
| messages: [ | |
| { role: "system", content: "You are a podcast script writer." }, | |
| { role: "user", content: prompt } | |
| ] | |
| }) | |
| }); | |
| if (!response.ok) throw new Error("OpenRouter API Error"); | |
| const data = await response.json(); | |
| const script = data.choices[0].message.content; | |
| this._parseScript(script); | |
| return script; | |
| } | |
| _parseScript(scriptText) { | |
| const lines = scriptText.split('\n'); | |
| this.queue = []; | |
| lines.forEach(line => { | |
| const trimmed = line.trim(); | |
| if (!trimmed) return; | |
| if (trimmed.startsWith("Host:")) { | |
| this.queue.push({ speaker: 'Host', text: trimmed.replace("Host:", "").trim() }); | |
| } else if (trimmed.startsWith("Guest:")) { | |
| this.queue.push({ speaker: 'Guest', text: trimmed.replace("Guest:", "").trim() }); | |
| } | |
| }); | |
| this.currentIndex = 0; | |
| } | |
| async play(onUpdate) { | |
| if (this.currentIndex >= this.queue.length) return; | |
| this.isPlaying = true; | |
| while (this.isPlaying && this.currentIndex < this.queue.length) { | |
| const segment = this.queue[this.currentIndex]; | |
| const settings = this.config.settings; | |
| // Determine voice based on speaker | |
| const voice = segment.speaker === 'Host' ? settings.podcastHost : settings.podcastGuest; | |
| if (onUpdate) onUpdate(segment, this.currentIndex); | |
| try { | |
| await this.audio.play(segment.text, { | |
| useOpenAI: true, | |
| apiKey: settings.apiKey, | |
| aiVoice: voice, | |
| rate: settings.rate | |
| }); | |
| this.currentIndex++; | |
| } catch (e) { | |
| this.isPlaying = false; | |
| break; | |
| } | |
| } | |
| this.isPlaying = false; | |
| } | |
| pause() { | |
| this.isPlaying = false; | |
| this.audio.stop(); | |
| } | |
| } | |
| class ReaderController { | |
| constructor(config, audioEngine) { | |
| this.config = config; | |
| this.audio = audioEngine; | |
| this.blocks = []; | |
| this.currentIndex = 0; | |
| this.isPlaying = false; | |
| this.selectedNativeVoice = null; | |
| } | |
| setBlocks(blocks) { | |
| this.blocks = blocks; | |
| } | |
| highlight(index) { | |
| this.blocks.forEach(b => b.classList.remove('active-read-block')); | |
| if (this.blocks[index]) { | |
| this.blocks[index].classList.add('active-read-block'); | |
| this.blocks[index].scrollIntoView({ behavior: 'smooth', block: 'center' }); | |
| } | |
| } | |
| async play(onUpdate) { | |
| this.isPlaying = true; | |
| while (this.isPlaying && this.currentIndex < this.blocks.length) { | |
| const block = this.blocks[this.currentIndex]; | |
| const settings = this.config.settings; | |
| this.highlight(this.currentIndex); | |
| if(onUpdate) onUpdate(this.currentIndex, this.blocks.length); | |
| try { | |
| await this.audio.play(block.innerText, { | |
| useOpenAI: !!settings.apiKey, | |
| apiKey: settings.apiKey, | |
| aiVoice: settings.openAIVoice, | |
| nativeVoiceName: this.selectedNativeVoice, | |
| rate: settings.rate | |
| }); | |
| this.currentIndex++; | |
| } catch (e) { | |
| this.isPlaying = false; | |
| break; | |
| } | |
| } | |
| this.isPlaying = false; | |
| if(onUpdate) onUpdate(this.currentIndex, this.blocks.length, false); // Finished | |
| } | |
| pause() { | |
| this.isPlaying = false; | |
| this.audio.stop(); | |
| } | |
| stop() { | |
| this.pause(); | |
| this.highlight(-1); // Clear all highlights | |
| } | |
| skip(direction) { | |
| let next = this.currentIndex + direction; | |
| if (next < 0) next = 0; | |
| if (next >= this.blocks.length) next = this.blocks.length - 1; | |
| this.currentIndex = next; | |
| if (this.isPlaying) { | |
| this.audio.stop(); | |
| } else { | |
| this.highlight(next); | |
| } | |
| } | |
| } | |
| // ========================================== | |
| // 5. UI MANAGER (The View) | |
| // ========================================== | |
| class UIManager { | |
| constructor(config, reader, podcast, audio) { | |
| this.config = config; | |
| this.reader = reader; | |
| this.podcast = podcast; | |
| this.audio = audio; | |
| this.activeTab = 'read'; | |
| this.init(); | |
| } | |
| init() { | |
| // Inject Styles | |
| const styleSheet = document.createElement("style"); | |
| styleSheet.innerText = CONSTANTS.STYLES; | |
| document.head.appendChild(styleSheet); | |
| // Create Container | |
| this.widget = document.createElement('div'); | |
| this.widget.id = 'arxiv-tts-widget'; | |
| this.widget.innerHTML = this.getHTML(); | |
| document.body.appendChild(this.widget); | |
| this.makeDraggable(); | |
| this.bindEvents(); | |
| this.updateEstimates(); | |
| // Populate Native Voices when ready | |
| this.audio.loadNativeVoices().then(voices => { | |
| this.populateNativeVoices(voices); | |
| }); | |
| } | |
| getHTML() { | |
| const s = this.config.settings; | |
| const openAiOpts = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'] | |
| .map(v => `<option value="${v}" ${s.openAIVoice === v ? 'selected' : ''}>${v}</option>`).join(''); | |
| const hostOpts = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'] | |
| .map(v => `<option value="${v}" ${s.podcastHost === v ? 'selected' : ''}>${v}</option>`).join(''); | |
| const guestOpts = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'] | |
| .map(v => `<option value="${v}" ${s.podcastGuest === v ? 'selected' : ''}>${v}</option>`).join(''); | |
| return ` | |
| <div id="tts-drag-handle" title="Drag to move"><div class="tts-grip-line"></div></div> | |
| <div class="tts-header-row"> | |
| <div id="tab-read" class="tts-tab active">Reader</div> | |
| <div id="tab-podcast" class="tts-tab">Podcast</div> | |
| <div id="tab-settings" class="tts-tab">Settings</div> | |
| </div> | |
| <!-- READER --> | |
| <div id="view-read"> | |
| <div class="tts-main-row"> | |
| <button id="tts-main-toggle" class="tts-main-btn"><span>▶</span> Read</button> | |
| </div> | |
| <div id="arxiv-tts-controls-expanded"> | |
| <div class="tts-sub-controls"> | |
| <button id="tts-prev" class="tts-btn">⏮</button> | |
| <button id="tts-next" class="tts-btn">⏭</button> | |
| </div> | |
| <div id="tts-native-controls" style="${s.apiKey ? 'display:none' : ''}"> | |
| <div class="tts-label">System Voice</div> | |
| <select id="tts-voice-select" class="tts-select"></select> | |
| </div> | |
| <div id="tts-ai-controls" style="${s.apiKey ? '' : 'display:none'}"> | |
| <div class="tts-label">AI Voice</div> | |
| <select id="tts-ai-main-voice" class="tts-select">${openAiOpts}</select> | |
| </div> | |
| <div class="tts-slider-container"> | |
| <label>Speed:</label> | |
| <input type="range" id="tts-rate" min="0.5" max="2.5" step="0.1" value="${s.rate}" style="flex:1"> | |
| <span id="tts-rate-val">${s.rate}x</span> | |
| </div> | |
| <div style="display:flex; justify-content:space-between; margin-top:5px;"> | |
| <div id="tts-status" style="font-size:10px; color:#666;">Ready</div> | |
| <div id="reader-cost-display" class="reader-cost-est" style="display:none;"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- PODCAST --> | |
| <div id="view-podcast" style="display:none;"> | |
| <div style="display:flex; gap:5px; align-items:center; margin-bottom:10px;"> | |
| <button id="podcast-gen-btn" class="tts-main-btn" style="flex:2;"><span>✨</span> Generate Script</button> | |
| <div id="cost-badge" class="cost-badge" title="Est Cost">Est: ...</div> | |
| </div> | |
| <div class="podcast-casting"> | |
| <div style="flex:1"><div class="tts-label">Host</div><select id="podcast-host-select" class="tts-select">${hostOpts}</select></div> | |
| <div style="flex:1"><div class="tts-label">Guest</div><select id="podcast-guest-select" class="tts-select">${guestOpts}</select></div> | |
| </div> | |
| <div class="tts-slider-container"> | |
| <label>Speed:</label> | |
| <input type="range" id="podcast-rate" min="0.5" max="2.5" step="0.1" value="${s.rate}" style="flex:1"> | |
| <span id="podcast-rate-val-2">${s.rate}x</span> | |
| </div> | |
| <div id="podcast-player-controls" style="display:none; margin-top:10px;"> | |
| <button id="podcast-play-btn" class="tts-main-btn">▶ Play Podcast</button> | |
| <div id="podcast-script-preview" class="podcast-script-preview"></div> | |
| </div> | |
| <div id="podcast-status" class="podcast-status"></div> | |
| </div> | |
| <!-- SETTINGS --> | |
| <div id="view-settings" style="display:none;"> | |
| <div style="max-height: 300px; overflow-y:auto;"> | |
| <div class="tts-label">OpenAI API Key</div> | |
| <input type="password" id="tts-api-key" class="tts-input" value="${s.apiKey}"> | |
| <div class="tts-label">OpenRouter API Key</div> | |
| <input type="password" id="tts-or-key" class="tts-input" value="${s.openRouterKey}"> | |
| <div class="tts-label">Model</div> | |
| <select id="tts-model-select" class="tts-select"> | |
| <option value="google/gemini-2.0-flash-001" ${s.podcastModel.includes('gemini') ? 'selected' : ''}>Gemini 2.0 Flash</option> | |
| <option value="anthropic/claude-3.5-haiku" ${s.podcastModel.includes('haiku') ? 'selected' : ''}>Claude 3.5 Haiku</option> | |
| <option value="openai/gpt-4o-mini" ${s.podcastModel.includes('gpt-4o') ? 'selected' : ''}>GPT-4o Mini</option> | |
| </select> | |
| <div class="tts-label">Prompt</div> | |
| <textarea id="tts-podcast-prompt" class="tts-textarea">${s.podcastPrompt}</textarea> | |
| <button id="tts-restore-defaults" class="tts-btn" style="margin-bottom:10px;">Restore Default</button> | |
| <button id="tts-save-settings" class="tts-main-btn">Save</button> | |
| </div> | |
| </div>`; | |
| } | |
| makeDraggable() { | |
| const el = this.widget; | |
| const handle = el.querySelector('#tts-drag-handle'); | |
| let isDragging = false; | |
| let startX, startY, initialLeft, initialTop; | |
| handle.onmousedown = (e) => { | |
| e.preventDefault(); | |
| isDragging = true; | |
| startX = e.clientX; | |
| startY = e.clientY; | |
| const rect = el.getBoundingClientRect(); | |
| initialLeft = rect.left; | |
| initialTop = rect.top; | |
| el.style.bottom = 'auto'; | |
| el.style.right = 'auto'; | |
| el.style.left = initialLeft + 'px'; | |
| el.style.top = initialTop + 'px'; | |
| el.style.transition = 'none'; | |
| document.onmousemove = onMouseMove; | |
| document.onmouseup = onMouseUp; | |
| }; | |
| const onMouseMove = (e) => { | |
| if (!isDragging) return; | |
| e.preventDefault(); | |
| const dx = e.clientX - startX; | |
| const dy = e.clientY - startY; | |
| el.style.left = (initialLeft + dx) + 'px'; | |
| el.style.top = (initialTop + dy) + 'px'; | |
| }; | |
| const onMouseUp = () => { | |
| isDragging = false; | |
| el.style.transition = 'opacity 0.2s ease'; | |
| document.onmousemove = null; | |
| document.onmouseup = null; | |
| }; | |
| } | |
| bindEvents() { | |
| const $ = (id) => this.widget.querySelector(`#${id}`); | |
| // Tabs | |
| ['read', 'podcast', 'settings'].forEach(tab => { | |
| $(`tab-${tab}`).onclick = () => this.switchTab(tab); | |
| }); | |
| // Toggle Play | |
| $(`tts-main-toggle`).onclick = () => { | |
| this.toggleReader(); | |
| }; | |
| // Reader Controls | |
| $(`tts-prev`).onclick = () => this.reader.skip(-1); | |
| $(`tts-next`).onclick = () => this.reader.skip(1); | |
| // Speed Sync | |
| const updateSpeed = (v) => { | |
| this.config.set('rate', v); | |
| $(`tts-rate`).value = v; | |
| $(`podcast-rate`).value = v; | |
| $(`tts-rate-val`).innerText = v + 'x'; | |
| $(`podcast-rate-val-2`).innerText = v + 'x'; | |
| }; | |
| $(`tts-rate`).oninput = (e) => updateSpeed(e.target.value); | |
| $(`podcast-rate`).oninput = (e) => updateSpeed(e.target.value); | |
| // Settings Save | |
| $(`tts-save-settings`).onclick = () => this.saveSettings(); | |
| $(`tts-restore-defaults`).onclick = () => { | |
| $(`tts-podcast-prompt`).value = CONSTANTS.DEFAULT_PROMPT; | |
| }; | |
| // Podcast Generation | |
| $(`podcast-gen-btn`).onclick = async () => { | |
| const btn = $(`podcast-gen-btn`); | |
| const status = $(`podcast-status`); | |
| if (!this.config.settings.openRouterKey) return alert("Set OpenRouter Key first!"); | |
| btn.disabled = true; | |
| btn.innerHTML = '⏳ Generating...'; | |
| try { | |
| const fullText = ScraperService.getFullText(this.reader.blocks); | |
| const script = await this.podcast.generateScript(fullText, (msg) => status.innerText = msg); | |
| status.innerText = "Ready!"; | |
| btn.innerHTML = '🔄 Regenerate'; | |
| btn.disabled = false; | |
| $(`podcast-player-controls`).style.display = 'block'; | |
| $(`podcast-script-preview`).style.display = 'block'; | |
| $(`podcast-script-preview`).innerText = script; | |
| } catch(e) { | |
| console.error(e); | |
| status.innerText = "Error!"; | |
| btn.disabled = false; | |
| } | |
| }; | |
| // Podcast Play | |
| $(`podcast-play-btn`).onclick = () => { | |
| if (this.podcast.isPlaying) { | |
| this.podcast.pause(); | |
| $(`podcast-play-btn`).innerText = "▶ Play Podcast"; | |
| } else { | |
| if(!this.config.settings.apiKey) return alert("OpenAI Key required for voices."); | |
| $(`podcast-play-btn`).innerText = "⏸ Pause"; | |
| this.podcast.play((seg) => { | |
| $(`podcast-status`).innerText = `Playing: ${seg.speaker}`; | |
| }).then(() => { | |
| $(`podcast-play-btn`).innerText = "▶ Play Podcast"; | |
| $(`podcast-status`).innerText = "Finished"; | |
| }); | |
| } | |
| }; | |
| // ---- VOICE SELECTORS (Persist on Change) ---- | |
| // Native Reader Voice | |
| $(`tts-voice-select`).onchange = (e) => { | |
| this.reader.selectedNativeVoice = e.target.value; | |
| this.config.set('readerNativeVoice', e.target.value); | |
| }; | |
| // AI Reader Voice | |
| $(`tts-ai-main-voice`).onchange = (e) => { | |
| this.config.set('openAIVoice', e.target.value); | |
| }; | |
| // Podcast Voices | |
| $(`podcast-host-select`).onchange = (e) => { | |
| this.config.set('podcastHost', e.target.value); | |
| }; | |
| $(`podcast-guest-select`).onchange = (e) => { | |
| this.config.set('podcastGuest', e.target.value); | |
| }; | |
| } | |
| populateNativeVoices(voices) { | |
| const select = this.widget.querySelector('#tts-voice-select'); | |
| select.innerHTML = ''; | |
| voices.forEach(v => { | |
| const opt = document.createElement('option'); | |
| opt.value = v.name; | |
| opt.innerText = `${v.name.slice(0, 20)}... (${v.lang})`; | |
| select.appendChild(opt); | |
| }); | |
| // PREFERENCE LOGIC: Saved -> Microsoft -> English -> First | |
| const savedName = this.config.settings.readerNativeVoice; | |
| let targetVoice = voices.find(v => v.name === savedName); | |
| if (!targetVoice) { | |
| // Try finding a Microsoft English voice | |
| targetVoice = voices.find(v => v.name.includes('Microsoft') && v.lang.startsWith('en')); | |
| } | |
| if (!targetVoice) { | |
| targetVoice = voices.find(v => v.lang.startsWith('en')); | |
| } | |
| if (!targetVoice) { | |
| targetVoice = voices[0]; | |
| } | |
| if(targetVoice) { | |
| select.value = targetVoice.name; | |
| this.reader.selectedNativeVoice = targetVoice.name; | |
| } | |
| } | |
| toggleReader() { | |
| const btn = this.widget.querySelector('#tts-main-toggle span'); | |
| if (this.reader.isPlaying) { | |
| this.reader.pause(); | |
| btn.innerText = '▶'; | |
| } else { | |
| btn.innerText = '⏸'; | |
| this.reader.play((idx, total, active=true) => { | |
| this.widget.querySelector('#tts-status').innerText = active ? `Reading ${idx+1}/${total}` : `Finished`; | |
| if(!active) btn.innerText = '▶'; | |
| }); | |
| } | |
| } | |
| switchTab(tab) { | |
| // Stop Reader Audio & Highlight if playing/present | |
| this.reader.stop(); | |
| this.widget.querySelector('#tts-main-toggle span').innerText = '▶'; | |
| // Stop Podcast Audio if playing | |
| if (this.podcast.isPlaying) { | |
| this.podcast.pause(); | |
| this.widget.querySelector('#podcast-play-btn').innerText = "▶ Play Podcast"; | |
| this.widget.querySelector('#podcast-status').innerText = "Paused"; | |
| } | |
| ['read', 'podcast', 'settings'].forEach(t => { | |
| this.widget.querySelector(`#tab-${t}`).classList.toggle('active', t === tab); | |
| this.widget.querySelector(`#view-${t}`).style.display = t === tab ? 'block' : 'none'; | |
| }); | |
| } | |
| saveSettings() { | |
| const $ = (id) => this.widget.querySelector(`#${id}`); | |
| const c = this.config; | |
| c.set('openAIKey', $(`tts-api-key`).value); | |
| c.set('openRouterKey', $(`tts-or-key`).value); | |
| c.set('podcastModel', $(`tts-model-select`).value); | |
| c.set('podcastPrompt', $(`tts-podcast-prompt`).value); | |
| // Voices already saved on change, but we can ensure sync here just in case | |
| c.set('openAIVoice', $(`tts-ai-main-voice`).value); | |
| c.set('podcastHost', $(`podcast-host-select`).value); | |
| c.set('podcastGuest', $(`podcast-guest-select`).value); | |
| // UI Refresh | |
| const hasKey = !!$(`tts-api-key`).value; | |
| $(`tts-native-controls`).style.display = hasKey ? 'none' : 'block'; | |
| $(`tts-ai-controls`).style.display = hasKey ? 'block' : 'none'; | |
| this.updateEstimates(); | |
| // Visual feedback | |
| const btn = $(`tts-save-settings`); | |
| btn.style.background = '#4caf50'; | |
| btn.innerText = 'Saved!'; | |
| setTimeout(() => { | |
| btn.style.background = '#b31b1b'; | |
| btn.innerText = 'Save'; | |
| this.switchTab('read'); | |
| }, 800); | |
| } | |
| updateEstimates() { | |
| const fullText = ScraperService.getFullText(this.reader.blocks); | |
| const model = this.config.settings.podcastModel; | |
| const costs = CostService.calculate(fullText.length, model); | |
| // Reader Estimate | |
| const rBadge = this.widget.querySelector('#reader-cost-display'); | |
| if (this.config.settings.apiKey) { | |
| rBadge.innerText = `Full Read Est: $${costs.reader.toFixed(2)}`; | |
| rBadge.style.display = 'block'; | |
| } else { | |
| rBadge.style.display = 'none'; | |
| } | |
| // Podcast Estimate | |
| const pBadge = this.widget.querySelector('#cost-badge'); | |
| pBadge.innerText = `Est: $${costs.podcastTotal.toFixed(2)}`; | |
| pBadge.title = `LLM: $${costs.details.llmCost.toFixed(4)} | TTS: $${costs.details.podcastTtsCost.toFixed(4)}`; | |
| } | |
| } | |
| // ========================================== | |
| // 6. MAIN BOOTSTRAP | |
| // ========================================== | |
| function init() { | |
| if (document.getElementById('arxiv-tts-widget')) return; | |
| // Find blocks | |
| const blocks = ScraperService.scan(); | |
| if (blocks.length === 0) { | |
| // Retry once for async loading | |
| return setTimeout(init, 1000); | |
| } | |
| console.log(`ArXiv TTS: Loaded with ${blocks.length} blocks.`); | |
| // Instantiate System | |
| const config = new ConfigManager(); | |
| const audio = new AudioEngine(); | |
| const reader = new ReaderController(config, audio); | |
| const podcast = new PodcastController(config, audio); | |
| reader.setBlocks(blocks); | |
| // Instantiate UI (glues everything) | |
| new UIManager(config, reader, podcast, audio); | |
| } | |
| if (document.readyState === 'complete' || document.readyState === 'interactive') { | |
| init(); | |
| } else { | |
| window.addEventListener('load', init); | |
| } | |
| })(); |
Comments are disabled for this gist.