browser-captions/transcriber.py
bunker-admin c7becf330c Initial commit: Live Captions web application
Real-time speech-to-text using OpenAI Whisper (faster-whisper).
Features browser audio capture, WebSocket streaming, and customizable display settings.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 08:53:40 -07:00

103 lines
2.5 KiB
Python

"""
Whisper transcription module using faster-whisper.
"""
import os
import io
import tempfile
import logging
from faster_whisper import WhisperModel
from pydub import AudioSegment
logger = logging.getLogger(__name__)
# Global model instance (loaded once)
_model = None
def get_model():
"""Get or initialize the Whisper model."""
global _model
if _model is None:
model_size = os.environ.get('WHISPER_MODEL', 'base')
device = os.environ.get('WHISPER_DEVICE', 'cpu')
compute_type = os.environ.get('WHISPER_COMPUTE_TYPE', 'int8')
logger.info(f"Loading Whisper model: {model_size} on {device} ({compute_type})")
_model = WhisperModel(
model_size,
device=device,
compute_type=compute_type
)
logger.info("Whisper model loaded successfully")
return _model
def transcribe_audio(audio_bytes, format='webm'):
"""
Transcribe audio bytes to text.
Args:
audio_bytes: Raw audio data
format: Audio format (default: webm)
Returns:
Transcribed text string
"""
if not audio_bytes:
return ""
try:
# Convert audio to WAV format that Whisper expects
audio = AudioSegment.from_file(
io.BytesIO(audio_bytes),
format=format
)
# Convert to 16kHz mono WAV (Whisper's expected format)
audio = audio.set_frame_rate(16000).set_channels(1)
# Export to temporary file (faster-whisper needs a file path)
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
audio.export(tmp.name, format='wav')
tmp_path = tmp.name
try:
# Transcribe
model = get_model()
segments, info = model.transcribe(
tmp_path,
beam_size=5,
vad_filter=True,
vad_parameters=dict(
min_silence_duration_ms=500
)
)
# Combine all segments into text
text = ' '.join(segment.text.strip() for segment in segments)
return text.strip()
finally:
# Clean up temp file
if os.path.exists(tmp_path):
os.unlink(tmp_path)
except Exception as e:
logger.error(f"Transcription error: {e}")
return ""
def preload_model():
"""Preload the model during startup."""
try:
get_model()
return True
except Exception as e:
logger.error(f"Failed to preload model: {e}")
return False