268 lines
7.3 KiB
Python

"""
Live Captions - Flask Application
A web-based live captioning application using Whisper for speech recognition.
"""
import os
import logging
from datetime import datetime
from flask import Flask, render_template, jsonify, request
from flask_socketio import SocketIO, emit
from dotenv import load_dotenv
import database
import transcriber
import recordings
# Load environment variables
load_dotenv()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Initialize Flask app
app = Flask(__name__)
app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'live-captions-secret')
# Initialize SocketIO with gevent
socketio = SocketIO(
app,
cors_allowed_origins="*",
async_mode='gevent'
)
# =============================================================================
# Routes
# =============================================================================
@app.route('/')
def index():
"""Serve the main page."""
return render_template('index.html')
@app.route('/api/health')
def health():
"""Health check endpoint."""
return jsonify({'status': 'healthy'})
@app.route('/api/settings', methods=['GET'])
def get_settings():
"""Get current user settings."""
settings = database.get_settings()
return jsonify(settings)
@app.route('/api/settings', methods=['PUT'])
def update_settings():
"""Update user settings."""
data = request.get_json()
if not data:
return jsonify({'error': 'No data provided'}), 400
settings = database.update_settings(data)
# Broadcast settings update to all clients
socketio.emit('settings_updated', settings)
return jsonify(settings)
@app.route('/api/settings/reset', methods=['POST'])
def reset_settings():
"""Reset settings to defaults."""
settings = database.reset_settings()
# Broadcast settings update to all clients
socketio.emit('settings_updated', settings)
return jsonify(settings)
@app.route('/api/recordings', methods=['GET'])
def list_recordings():
"""List all saved recordings."""
return jsonify(recordings.list_recordings())
@app.route('/api/recordings/<filename>', methods=['GET'])
def get_recording(filename):
"""Get a specific recording's content."""
recording = recordings.get_recording(filename)
if recording:
return jsonify(recording)
return jsonify({'error': 'Recording not found'}), 404
@app.route('/api/recordings/<filename>', methods=['DELETE'])
def delete_recording(filename):
"""Delete a specific recording."""
if recordings.delete_recording(filename):
return jsonify({'success': True})
return jsonify({'error': 'Failed to delete recording'}), 400
# =============================================================================
# WebSocket Events
# =============================================================================
@socketio.on('connect')
def handle_connect():
"""Handle client connection."""
logger.info(f"Client connected: {request.sid}")
# Send current settings to the newly connected client
settings = database.get_settings()
emit('settings_updated', settings)
@socketio.on('disconnect')
def handle_disconnect():
"""Handle client disconnection."""
logger.info(f"Client disconnected: {request.sid}")
@socketio.on('audio_data')
def handle_audio_data(data):
"""
Handle incoming audio data from client.
Args:
data: Dictionary containing 'audio' (base64 or bytes) and 'format'
"""
try:
audio_bytes = data.get('audio')
audio_format = data.get('format', 'webm')
if not audio_bytes:
return
# Handle base64 encoded audio
if isinstance(audio_bytes, str):
import base64
audio_bytes = base64.b64decode(audio_bytes)
# Transcribe audio
text = transcriber.transcribe_audio(audio_bytes, format=audio_format)
if text:
logger.info(f"Transcription: {text}")
emit('transcription', {'text': text})
except Exception as e:
logger.error(f"Error processing audio: {e}")
emit('error', {'message': 'Failed to process audio'})
@socketio.on('desktop_audio_data')
def handle_desktop_audio_data(data):
"""
Handle incoming desktop audio data from client.
Args:
data: Dictionary containing 'audio' (base64 or bytes) and 'format'
"""
try:
audio_bytes = data.get('audio')
audio_format = data.get('format', 'webm')
if not audio_bytes:
return
# Handle base64 encoded audio
if isinstance(audio_bytes, str):
import base64
audio_bytes = base64.b64decode(audio_bytes)
# Transcribe audio
text = transcriber.transcribe_audio(audio_bytes, format=audio_format)
if text:
logger.info(f"Desktop transcription: {text}")
emit('desktop_transcription', {'text': text})
except Exception as e:
logger.error(f"Error processing desktop audio: {e}")
emit('error', {'message': 'Failed to process desktop audio'})
@socketio.on('save_recording')
def handle_save_recording(data):
"""Handle saving a recording session."""
client_id = request.sid
try:
# Parse timestamps from client
start_time_str = data.get('startTime')
end_time_str = data.get('endTime')
if start_time_str:
start_time = datetime.fromisoformat(start_time_str.replace('Z', '+00:00'))
else:
start_time = datetime.now()
if end_time_str:
end_time = datetime.fromisoformat(end_time_str.replace('Z', '+00:00'))
else:
end_time = datetime.now()
transcript = data.get('transcript', '')
word_count = data.get('wordCount', 0)
# Save the recording
filename = recordings.save_recording(
start_time=start_time,
end_time=end_time,
transcript=transcript,
word_count=word_count,
client_id=client_id
)
if filename:
logger.info(f"Recording saved: {filename}")
emit('recording_saved', {'filename': filename})
else:
emit('recording_error', {'message': 'Failed to save recording'})
except Exception as e:
logger.error(f"Error saving recording: {e}")
emit('recording_error', {'message': str(e)})
# =============================================================================
# Startup
# =============================================================================
def initialize():
"""Initialize application components."""
logger.info("Initializing Live Captions...")
# Initialize database
database.init_db()
logger.info("Database initialized")
# Preload Whisper model
logger.info("Preloading Whisper model (this may take a moment)...")
if transcriber.preload_model():
logger.info("Whisper model ready")
else:
logger.warning("Failed to preload Whisper model")
if __name__ == '__main__':
initialize()
host = os.environ.get('HOST', '0.0.0.0')
port = int(os.environ.get('PORT', 5000))
debug = os.environ.get('DEBUG', 'false').lower() == 'true'
logger.info(f"Starting Live Captions on {host}:{port}")
socketio.run(app, host=host, port=port, debug=debug)