Real-time speech-to-text using OpenAI Whisper (faster-whisper). Features browser audio capture, WebSocket streaming, and customizable display settings. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
356 lines
9.9 KiB
JavaScript
356 lines
9.9 KiB
JavaScript
/**
|
|
* Live Captions - Main Application
|
|
* Handles audio capture and WebSocket communication
|
|
*/
|
|
|
|
const App = {
|
|
// WebSocket connection
|
|
socket: null,
|
|
|
|
// Audio recording
|
|
mediaRecorder: null,
|
|
audioStream: null,
|
|
audioChunks: [],
|
|
isRecording: false,
|
|
recordingInterval: null,
|
|
|
|
// Continuous caption stream
|
|
wordBuffer: [],
|
|
pendingWords: [],
|
|
wordAnimationTimer: null,
|
|
|
|
// Auto-save recording state
|
|
sessionStartTime: null,
|
|
sessionTranscript: [],
|
|
|
|
// DOM elements
|
|
elements: {},
|
|
|
|
/**
|
|
* Initialize the application
|
|
*/
|
|
init() {
|
|
this.cacheElements();
|
|
this.bindEvents();
|
|
this.connectSocket();
|
|
|
|
// Initialize settings module
|
|
Settings.init();
|
|
},
|
|
|
|
/**
|
|
* Cache DOM element references
|
|
*/
|
|
cacheElements() {
|
|
this.elements = {
|
|
btnStart: document.getElementById('btn-start'),
|
|
btnStop: document.getElementById('btn-stop'),
|
|
btnClear: document.getElementById('btn-clear'),
|
|
autoSaveToggle: document.getElementById('auto-save-toggle'),
|
|
captions: document.getElementById('captions'),
|
|
statusDot: document.getElementById('status-dot'),
|
|
statusText: document.getElementById('status-text'),
|
|
};
|
|
},
|
|
|
|
/**
|
|
* Bind event listeners
|
|
*/
|
|
bindEvents() {
|
|
this.elements.btnStart.addEventListener('click', () => this.startRecording());
|
|
this.elements.btnStop.addEventListener('click', () => this.stopRecording());
|
|
this.elements.btnClear.addEventListener('click', () => this.clearCaptions());
|
|
|
|
// Load auto-save preference from localStorage
|
|
const savedPref = localStorage.getItem('autoSaveEnabled');
|
|
if (savedPref === 'true') {
|
|
this.elements.autoSaveToggle.checked = true;
|
|
}
|
|
|
|
// Save preference when toggled
|
|
this.elements.autoSaveToggle.addEventListener('change', (e) => {
|
|
localStorage.setItem('autoSaveEnabled', e.target.checked);
|
|
});
|
|
},
|
|
|
|
/**
|
|
* Connect to WebSocket server
|
|
*/
|
|
connectSocket() {
|
|
this.socket = io();
|
|
|
|
this.socket.on('connect', () => {
|
|
console.log('Connected to server');
|
|
this.setStatus('connected', 'Connected');
|
|
});
|
|
|
|
this.socket.on('disconnect', () => {
|
|
console.log('Disconnected from server');
|
|
this.setStatus('disconnected', 'Disconnected');
|
|
});
|
|
|
|
this.socket.on('transcription', (data) => {
|
|
this.addWords(data.text);
|
|
});
|
|
|
|
this.socket.on('settings_updated', (settings) => {
|
|
Settings.applySettings(settings);
|
|
});
|
|
|
|
this.socket.on('error', (data) => {
|
|
console.error('Server error:', data.message);
|
|
});
|
|
|
|
this.socket.on('recording_saved', (data) => {
|
|
console.log('Recording saved:', data.filename);
|
|
});
|
|
|
|
this.socket.on('recording_error', (data) => {
|
|
console.error('Recording error:', data.message);
|
|
});
|
|
},
|
|
|
|
/**
|
|
* Update status indicator
|
|
*/
|
|
setStatus(state, text) {
|
|
this.elements.statusDot.className = `dot ${state}`;
|
|
this.elements.statusText.textContent = text;
|
|
},
|
|
|
|
/**
|
|
* Start audio recording
|
|
*/
|
|
async startRecording() {
|
|
try {
|
|
this.audioStream = await navigator.mediaDevices.getUserMedia({
|
|
audio: {
|
|
echoCancellation: true,
|
|
noiseSuppression: true,
|
|
sampleRate: 16000,
|
|
}
|
|
});
|
|
|
|
this.isRecording = true;
|
|
this.elements.btnStart.disabled = true;
|
|
this.elements.btnStop.disabled = false;
|
|
this.setStatus('recording', 'Recording...');
|
|
|
|
// Reset session transcript for auto-save
|
|
this.sessionStartTime = new Date();
|
|
this.sessionTranscript = [];
|
|
|
|
// Start the recording cycle
|
|
this.startRecordingCycle();
|
|
|
|
} catch (error) {
|
|
console.error('Error starting recording:', error);
|
|
this.setStatus('error', 'Microphone access denied');
|
|
}
|
|
},
|
|
|
|
/**
|
|
* Start a recording cycle - record for a duration, then send and restart
|
|
*/
|
|
startRecordingCycle() {
|
|
if (!this.isRecording || !this.audioStream) return;
|
|
|
|
// Determine best supported MIME type
|
|
let mimeType = 'audio/webm';
|
|
if (MediaRecorder.isTypeSupported('audio/webm;codecs=opus')) {
|
|
mimeType = 'audio/webm;codecs=opus';
|
|
}
|
|
|
|
this.audioChunks = [];
|
|
this.mediaRecorder = new MediaRecorder(this.audioStream, { mimeType });
|
|
|
|
this.mediaRecorder.ondataavailable = (event) => {
|
|
if (event.data.size > 0) {
|
|
this.audioChunks.push(event.data);
|
|
}
|
|
};
|
|
|
|
this.mediaRecorder.onstop = () => {
|
|
// Create a complete blob from all chunks
|
|
if (this.audioChunks.length > 0) {
|
|
const blob = new Blob(this.audioChunks, { type: 'audio/webm' });
|
|
this.sendAudioBlob(blob);
|
|
}
|
|
|
|
// Start next cycle if still recording
|
|
if (this.isRecording) {
|
|
this.startRecordingCycle();
|
|
}
|
|
};
|
|
|
|
// Start recording
|
|
this.mediaRecorder.start();
|
|
|
|
// Stop after the configured duration to get a complete blob
|
|
// Using 1.5 seconds for more responsive streaming
|
|
const chunkDuration = 1500;
|
|
this.recordingInterval = setTimeout(() => {
|
|
if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
|
|
this.mediaRecorder.stop();
|
|
}
|
|
}, chunkDuration);
|
|
},
|
|
|
|
/**
|
|
* Stop audio recording
|
|
*/
|
|
stopRecording() {
|
|
this.isRecording = false;
|
|
|
|
// Clear the recording interval
|
|
if (this.recordingInterval) {
|
|
clearTimeout(this.recordingInterval);
|
|
this.recordingInterval = null;
|
|
}
|
|
|
|
// Stop the media recorder
|
|
if (this.mediaRecorder && this.mediaRecorder.state === 'recording') {
|
|
this.mediaRecorder.stop();
|
|
}
|
|
|
|
// Stop all tracks
|
|
if (this.audioStream) {
|
|
this.audioStream.getTracks().forEach(track => track.stop());
|
|
this.audioStream = null;
|
|
}
|
|
|
|
this.elements.btnStart.disabled = false;
|
|
this.elements.btnStop.disabled = true;
|
|
this.setStatus('connected', 'Connected');
|
|
|
|
// Auto-save if enabled and we have content
|
|
if (this.elements.autoSaveToggle.checked && this.sessionTranscript.length > 0) {
|
|
this.saveRecording();
|
|
}
|
|
},
|
|
|
|
/**
|
|
* Send complete audio blob to server
|
|
*/
|
|
sendAudioBlob(blob) {
|
|
const reader = new FileReader();
|
|
reader.onloadend = () => {
|
|
// Get base64 data without the data URL prefix
|
|
const base64 = reader.result.split(',')[1];
|
|
|
|
this.socket.emit('audio_data', {
|
|
audio: base64,
|
|
format: 'webm'
|
|
});
|
|
};
|
|
reader.readAsDataURL(blob);
|
|
},
|
|
|
|
/**
|
|
* Add words to the continuous caption stream
|
|
*/
|
|
addWords(text) {
|
|
if (!text.trim()) return;
|
|
|
|
// Split incoming text into words
|
|
const newWords = text.trim().split(/\s+/);
|
|
|
|
// Add to pending queue for animated display
|
|
this.pendingWords.push(...newWords);
|
|
|
|
// Accumulate to session transcript for auto-save
|
|
if (this.isRecording) {
|
|
this.sessionTranscript.push(...newWords);
|
|
}
|
|
|
|
// Start animation if not already running
|
|
if (!this.wordAnimationTimer) {
|
|
this.animateNextWord();
|
|
}
|
|
},
|
|
|
|
/**
|
|
* Animate words appearing one by one
|
|
*/
|
|
animateNextWord() {
|
|
if (this.pendingWords.length === 0) {
|
|
this.wordAnimationTimer = null;
|
|
return;
|
|
}
|
|
|
|
// Get next word from queue
|
|
const word = this.pendingWords.shift();
|
|
this.wordBuffer.push(word);
|
|
|
|
// Get max words from settings
|
|
const maxWords = Settings.current.max_words || 30;
|
|
|
|
// Trim buffer to max words
|
|
while (this.wordBuffer.length > maxWords) {
|
|
this.wordBuffer.shift();
|
|
}
|
|
|
|
// Update display
|
|
this.updateCaptionDisplay();
|
|
|
|
// Calculate delay based on pending words
|
|
// Faster if more words pending, slower if caught up
|
|
const baseDelay = 80; // ms per word
|
|
const minDelay = 30;
|
|
const delay = this.pendingWords.length > 10 ? minDelay : baseDelay;
|
|
|
|
// Schedule next word
|
|
this.wordAnimationTimer = setTimeout(() => {
|
|
this.animateNextWord();
|
|
}, delay);
|
|
},
|
|
|
|
/**
|
|
* Update the caption display with current word buffer
|
|
*/
|
|
updateCaptionDisplay() {
|
|
const text = this.wordBuffer.join(' ');
|
|
this.elements.captions.textContent = text;
|
|
},
|
|
|
|
/**
|
|
* Clear all captions
|
|
*/
|
|
clearCaptions() {
|
|
// Clear animation timer
|
|
if (this.wordAnimationTimer) {
|
|
clearTimeout(this.wordAnimationTimer);
|
|
this.wordAnimationTimer = null;
|
|
}
|
|
this.wordBuffer = [];
|
|
this.pendingWords = [];
|
|
this.elements.captions.textContent = '';
|
|
},
|
|
|
|
/**
|
|
* Save the current recording session
|
|
*/
|
|
saveRecording() {
|
|
if (!this.sessionStartTime) return;
|
|
|
|
const endTime = new Date();
|
|
const transcript = this.sessionTranscript.join(' ');
|
|
|
|
this.socket.emit('save_recording', {
|
|
startTime: this.sessionStartTime.toISOString(),
|
|
endTime: endTime.toISOString(),
|
|
transcript: transcript,
|
|
wordCount: this.sessionTranscript.length
|
|
});
|
|
|
|
// Reset session state
|
|
this.sessionStartTime = null;
|
|
this.sessionTranscript = [];
|
|
}
|
|
};
|
|
|
|
// Initialize when DOM is ready
|
|
document.addEventListener('DOMContentLoaded', () => {
|
|
App.init();
|
|
});
|