""" Live Captions - Flask Application A web-based live captioning application using Whisper for speech recognition. """ import os import logging from datetime import datetime from flask import Flask, render_template, jsonify, request from flask_socketio import SocketIO, emit from dotenv import load_dotenv import database import transcriber import recordings # Load environment variables load_dotenv() # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Initialize Flask app app = Flask(__name__) app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'live-captions-secret') # Initialize SocketIO with gevent socketio = SocketIO( app, cors_allowed_origins="*", async_mode='gevent' ) # ============================================================================= # Routes # ============================================================================= @app.route('/') def index(): """Serve the main page.""" return render_template('index.html') @app.route('/api/health') def health(): """Health check endpoint.""" return jsonify({'status': 'healthy'}) @app.route('/api/settings', methods=['GET']) def get_settings(): """Get current user settings.""" settings = database.get_settings() return jsonify(settings) @app.route('/api/settings', methods=['PUT']) def update_settings(): """Update user settings.""" data = request.get_json() if not data: return jsonify({'error': 'No data provided'}), 400 settings = database.update_settings(data) # Broadcast settings update to all clients socketio.emit('settings_updated', settings) return jsonify(settings) @app.route('/api/settings/reset', methods=['POST']) def reset_settings(): """Reset settings to defaults.""" settings = database.reset_settings() # Broadcast settings update to all clients socketio.emit('settings_updated', settings) return jsonify(settings) @app.route('/api/recordings', methods=['GET']) def list_recordings(): """List all saved recordings.""" return jsonify(recordings.list_recordings()) @app.route('/api/recordings/', methods=['GET']) def get_recording(filename): """Get a specific recording's content.""" recording = recordings.get_recording(filename) if recording: return jsonify(recording) return jsonify({'error': 'Recording not found'}), 404 @app.route('/api/recordings/', methods=['DELETE']) def delete_recording(filename): """Delete a specific recording.""" if recordings.delete_recording(filename): return jsonify({'success': True}) return jsonify({'error': 'Failed to delete recording'}), 400 # ============================================================================= # WebSocket Events # ============================================================================= @socketio.on('connect') def handle_connect(): """Handle client connection.""" logger.info(f"Client connected: {request.sid}") # Send current settings to the newly connected client settings = database.get_settings() emit('settings_updated', settings) @socketio.on('disconnect') def handle_disconnect(): """Handle client disconnection.""" logger.info(f"Client disconnected: {request.sid}") @socketio.on('audio_data') def handle_audio_data(data): """ Handle incoming audio data from client. Args: data: Dictionary containing 'audio' (base64 or bytes) and 'format' """ try: audio_bytes = data.get('audio') audio_format = data.get('format', 'webm') if not audio_bytes: return # Handle base64 encoded audio if isinstance(audio_bytes, str): import base64 audio_bytes = base64.b64decode(audio_bytes) # Transcribe audio text = transcriber.transcribe_audio(audio_bytes, format=audio_format) if text: logger.info(f"Transcription: {text}") emit('transcription', {'text': text}) except Exception as e: logger.error(f"Error processing audio: {e}") emit('error', {'message': 'Failed to process audio'}) @socketio.on('save_recording') def handle_save_recording(data): """Handle saving a recording session.""" client_id = request.sid try: # Parse timestamps from client start_time_str = data.get('startTime') end_time_str = data.get('endTime') if start_time_str: start_time = datetime.fromisoformat(start_time_str.replace('Z', '+00:00')) else: start_time = datetime.now() if end_time_str: end_time = datetime.fromisoformat(end_time_str.replace('Z', '+00:00')) else: end_time = datetime.now() transcript = data.get('transcript', '') word_count = data.get('wordCount', 0) # Save the recording filename = recordings.save_recording( start_time=start_time, end_time=end_time, transcript=transcript, word_count=word_count, client_id=client_id ) if filename: logger.info(f"Recording saved: {filename}") emit('recording_saved', {'filename': filename}) else: emit('recording_error', {'message': 'Failed to save recording'}) except Exception as e: logger.error(f"Error saving recording: {e}") emit('recording_error', {'message': str(e)}) # ============================================================================= # Startup # ============================================================================= def initialize(): """Initialize application components.""" logger.info("Initializing Live Captions...") # Initialize database database.init_db() logger.info("Database initialized") # Preload Whisper model logger.info("Preloading Whisper model (this may take a moment)...") if transcriber.preload_model(): logger.info("Whisper model ready") else: logger.warning("Failed to preload Whisper model") if __name__ == '__main__': initialize() host = os.environ.get('HOST', '0.0.0.0') port = int(os.environ.get('PORT', 5000)) debug = os.environ.get('DEBUG', 'false').lower() == 'true' logger.info(f"Starting Live Captions on {host}:{port}") socketio.run(app, host=host, port=port, debug=debug)