#!/bin/bash set -e echo "Starting Whisper transcription service..." echo "Model: ${WHISPER_MODEL:-base}" echo "Device: ${WHISPER_DEVICE:-cuda}" echo "Compute Type: ${WHISPER_COMPUTE:-float16}" # Start with gunicorn for production # - 1 worker to avoid loading model multiple times # - 2 threads for concurrent request handling # - 600s timeout for long transcriptions exec gunicorn \ --bind 0.0.0.0:5000 \ --workers 1 \ --threads 2 \ --timeout 600 \ --log-level info \ --access-logfile - \ transcribe:app