268 lines
7.3 KiB
Python
268 lines
7.3 KiB
Python
"""
|
|
Live Captions - Flask Application
|
|
|
|
A web-based live captioning application using Whisper for speech recognition.
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
from datetime import datetime
|
|
|
|
from flask import Flask, render_template, jsonify, request
|
|
from flask_socketio import SocketIO, emit
|
|
from dotenv import load_dotenv
|
|
|
|
import database
|
|
import transcriber
|
|
import recordings
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Initialize Flask app
|
|
app = Flask(__name__)
|
|
app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'live-captions-secret')
|
|
|
|
# Initialize SocketIO with gevent
|
|
socketio = SocketIO(
|
|
app,
|
|
cors_allowed_origins="*",
|
|
async_mode='gevent'
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Routes
|
|
# =============================================================================
|
|
|
|
@app.route('/')
|
|
def index():
|
|
"""Serve the main page."""
|
|
return render_template('index.html')
|
|
|
|
|
|
@app.route('/api/health')
|
|
def health():
|
|
"""Health check endpoint."""
|
|
return jsonify({'status': 'healthy'})
|
|
|
|
|
|
@app.route('/api/settings', methods=['GET'])
|
|
def get_settings():
|
|
"""Get current user settings."""
|
|
settings = database.get_settings()
|
|
return jsonify(settings)
|
|
|
|
|
|
@app.route('/api/settings', methods=['PUT'])
|
|
def update_settings():
|
|
"""Update user settings."""
|
|
data = request.get_json()
|
|
if not data:
|
|
return jsonify({'error': 'No data provided'}), 400
|
|
|
|
settings = database.update_settings(data)
|
|
|
|
# Broadcast settings update to all clients
|
|
socketio.emit('settings_updated', settings)
|
|
|
|
return jsonify(settings)
|
|
|
|
|
|
@app.route('/api/settings/reset', methods=['POST'])
|
|
def reset_settings():
|
|
"""Reset settings to defaults."""
|
|
settings = database.reset_settings()
|
|
|
|
# Broadcast settings update to all clients
|
|
socketio.emit('settings_updated', settings)
|
|
|
|
return jsonify(settings)
|
|
|
|
|
|
@app.route('/api/recordings', methods=['GET'])
|
|
def list_recordings():
|
|
"""List all saved recordings."""
|
|
return jsonify(recordings.list_recordings())
|
|
|
|
|
|
@app.route('/api/recordings/<filename>', methods=['GET'])
|
|
def get_recording(filename):
|
|
"""Get a specific recording's content."""
|
|
recording = recordings.get_recording(filename)
|
|
if recording:
|
|
return jsonify(recording)
|
|
return jsonify({'error': 'Recording not found'}), 404
|
|
|
|
|
|
@app.route('/api/recordings/<filename>', methods=['DELETE'])
|
|
def delete_recording(filename):
|
|
"""Delete a specific recording."""
|
|
if recordings.delete_recording(filename):
|
|
return jsonify({'success': True})
|
|
return jsonify({'error': 'Failed to delete recording'}), 400
|
|
|
|
|
|
# =============================================================================
|
|
# WebSocket Events
|
|
# =============================================================================
|
|
|
|
@socketio.on('connect')
|
|
def handle_connect():
|
|
"""Handle client connection."""
|
|
logger.info(f"Client connected: {request.sid}")
|
|
# Send current settings to the newly connected client
|
|
settings = database.get_settings()
|
|
emit('settings_updated', settings)
|
|
|
|
|
|
@socketio.on('disconnect')
|
|
def handle_disconnect():
|
|
"""Handle client disconnection."""
|
|
logger.info(f"Client disconnected: {request.sid}")
|
|
|
|
|
|
@socketio.on('audio_data')
|
|
def handle_audio_data(data):
|
|
"""
|
|
Handle incoming audio data from client.
|
|
|
|
Args:
|
|
data: Dictionary containing 'audio' (base64 or bytes) and 'format'
|
|
"""
|
|
try:
|
|
audio_bytes = data.get('audio')
|
|
audio_format = data.get('format', 'webm')
|
|
|
|
if not audio_bytes:
|
|
return
|
|
|
|
# Handle base64 encoded audio
|
|
if isinstance(audio_bytes, str):
|
|
import base64
|
|
audio_bytes = base64.b64decode(audio_bytes)
|
|
|
|
# Transcribe audio
|
|
text = transcriber.transcribe_audio(audio_bytes, format=audio_format)
|
|
|
|
if text:
|
|
logger.info(f"Transcription: {text}")
|
|
emit('transcription', {'text': text})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing audio: {e}")
|
|
emit('error', {'message': 'Failed to process audio'})
|
|
|
|
|
|
@socketio.on('desktop_audio_data')
|
|
def handle_desktop_audio_data(data):
|
|
"""
|
|
Handle incoming desktop audio data from client.
|
|
|
|
Args:
|
|
data: Dictionary containing 'audio' (base64 or bytes) and 'format'
|
|
"""
|
|
try:
|
|
audio_bytes = data.get('audio')
|
|
audio_format = data.get('format', 'webm')
|
|
|
|
if not audio_bytes:
|
|
return
|
|
|
|
# Handle base64 encoded audio
|
|
if isinstance(audio_bytes, str):
|
|
import base64
|
|
audio_bytes = base64.b64decode(audio_bytes)
|
|
|
|
# Transcribe audio
|
|
text = transcriber.transcribe_audio(audio_bytes, format=audio_format)
|
|
|
|
if text:
|
|
logger.info(f"Desktop transcription: {text}")
|
|
emit('desktop_transcription', {'text': text})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing desktop audio: {e}")
|
|
emit('error', {'message': 'Failed to process desktop audio'})
|
|
|
|
|
|
@socketio.on('save_recording')
|
|
def handle_save_recording(data):
|
|
"""Handle saving a recording session."""
|
|
client_id = request.sid
|
|
|
|
try:
|
|
# Parse timestamps from client
|
|
start_time_str = data.get('startTime')
|
|
end_time_str = data.get('endTime')
|
|
|
|
if start_time_str:
|
|
start_time = datetime.fromisoformat(start_time_str.replace('Z', '+00:00'))
|
|
else:
|
|
start_time = datetime.now()
|
|
|
|
if end_time_str:
|
|
end_time = datetime.fromisoformat(end_time_str.replace('Z', '+00:00'))
|
|
else:
|
|
end_time = datetime.now()
|
|
|
|
transcript = data.get('transcript', '')
|
|
word_count = data.get('wordCount', 0)
|
|
|
|
# Save the recording
|
|
filename = recordings.save_recording(
|
|
start_time=start_time,
|
|
end_time=end_time,
|
|
transcript=transcript,
|
|
word_count=word_count,
|
|
client_id=client_id
|
|
)
|
|
|
|
if filename:
|
|
logger.info(f"Recording saved: {filename}")
|
|
emit('recording_saved', {'filename': filename})
|
|
else:
|
|
emit('recording_error', {'message': 'Failed to save recording'})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error saving recording: {e}")
|
|
emit('recording_error', {'message': str(e)})
|
|
|
|
|
|
# =============================================================================
|
|
# Startup
|
|
# =============================================================================
|
|
|
|
def initialize():
|
|
"""Initialize application components."""
|
|
logger.info("Initializing Live Captions...")
|
|
|
|
# Initialize database
|
|
database.init_db()
|
|
logger.info("Database initialized")
|
|
|
|
# Preload Whisper model
|
|
logger.info("Preloading Whisper model (this may take a moment)...")
|
|
if transcriber.preload_model():
|
|
logger.info("Whisper model ready")
|
|
else:
|
|
logger.warning("Failed to preload Whisper model")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
initialize()
|
|
|
|
host = os.environ.get('HOST', '0.0.0.0')
|
|
port = int(os.environ.get('PORT', 5000))
|
|
debug = os.environ.get('DEBUG', 'false').lower() == 'true'
|
|
|
|
logger.info(f"Starting Live Captions on {host}:{port}")
|
|
socketio.run(app, host=host, port=port, debug=debug)
|