21 lines
535 B
Bash

#!/bin/bash
set -e
echo "Starting Whisper transcription service..."
echo "Model: ${WHISPER_MODEL:-base}"
echo "Device: ${WHISPER_DEVICE:-cuda}"
echo "Compute Type: ${WHISPER_COMPUTE:-float16}"
# Start with gunicorn for production
# - 1 worker to avoid loading model multiple times
# - 2 threads for concurrent request handling
# - 600s timeout for long transcriptions
exec gunicorn \
--bind 0.0.0.0:5000 \
--workers 1 \
--threads 2 \
--timeout 600 \
--log-level info \
--access-logfile - \
transcribe:app