#!/bin/bash
set -e

echo "Starting Whisper transcription service..."
echo "Model: ${WHISPER_MODEL:-base}"
echo "Device: ${WHISPER_DEVICE:-cuda}"
echo "Compute Type: ${WHISPER_COMPUTE:-float16}"

# Start with gunicorn for production
# - 1 worker to avoid loading model multiple times
# - 2 threads for concurrent request handling
# - 600s timeout for long transcriptions
exec gunicorn \
    --bind 0.0.0.0:5000 \
    --workers 1 \
    --threads 2 \
    --timeout 600 \
    --log-level info \
    --access-logfile - \
    transcribe:app