changemaker.lite/media-manager/yolo-detection/main.py

"""
YOLO11 Person Detection Service

FastAPI service for detecting persons in images using YOLO11.
Optimized for adult content performer counting.
"""

import base64
import io
import os
import time
from typing import Optional

import numpy as np
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from PIL import Image
from pydantic import BaseModel
from ultralytics import YOLO

# Configuration from environment
MODEL_NAME = os.getenv("MODEL", "yolo11s.pt")
CONF_THRESHOLD = float(os.getenv("CONF_THRESHOLD", "0.5"))
DEVICE = os.getenv("DEVICE", "0")  # GPU device, "cpu" for CPU-only

app = FastAPI(
    title="YOLO11 Person Detection",
    description="Detect persons in images for performer counting",
    version="1.0.0"
)

# CORS for local development
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Global model instance
model: Optional[YOLO] = None


class DetectionRequest(BaseModel):
    """Request body for detection endpoint"""
    image: str  # Base64 encoded image
    classes: list[str] = ["person"]  # Classes to detect (default: person only)
    conf: float = CONF_THRESHOLD  # Confidence threshold


class BoundingBox(BaseModel):
    """Bounding box for a detection"""
    x1: float
    y1: float
    x2: float
    y2: float
    width: float
    height: float


class Detection(BaseModel):
    """Single detection result"""
    class_name: str
    confidence: float
    bbox: BoundingBox


class DetectionResponse(BaseModel):
    """Response from detection endpoint"""
    detections: list[Detection]
    count: int
    avg_confidence: float
    inference_ms: float
    model: str
    image_size: tuple[int, int]


class HealthResponse(BaseModel):
    """Health check response"""
    status: str
    model_loaded: bool
    model_name: str
    device: str


def load_model():
    """Load YOLO model on startup"""
    global model
    print(f"[YOLO] Loading model: {MODEL_NAME}")
    print(f"[YOLO] Device: {DEVICE}")
    print(f"[YOLO] Default confidence threshold: {CONF_THRESHOLD}")

    try:
        model = YOLO(MODEL_NAME)
        # Warm up with a dummy inference
        dummy = np.zeros((640, 640, 3), dtype=np.uint8)
        model.predict(dummy, verbose=False, device=DEVICE)
        print(f"[YOLO] Model loaded successfully")
    except Exception as e:
        print(f"[YOLO] Failed to load model: {e}")
        raise


def decode_image(base64_string: str) -> Image.Image:
    """Decode base64 image string to PIL Image"""
    # Remove data URL prefix if present
    if "," in base64_string:
        base64_string = base64_string.split(",", 1)[1]

    image_data = base64.b64decode(base64_string)
    image = Image.open(io.BytesIO(image_data))

    # Convert to RGB if necessary
    if image.mode != "RGB":
        image = image.convert("RGB")

    return image


@app.on_event("startup")
async def startup_event():
    """Load model on startup"""
    load_model()


@app.get("/health", response_model=HealthResponse)
async def health_check():
    """Health check endpoint"""
    return HealthResponse(
        status="healthy" if model is not None else "unhealthy",
        model_loaded=model is not None,
        model_name=MODEL_NAME,
        device=DEVICE
    )


@app.post("/detect", response_model=DetectionResponse)
async def detect(request: DetectionRequest):
    """
    Detect persons in an image.

    Returns bounding boxes, confidence scores, and count.
    """
    if model is None:
        raise HTTPException(status_code=503, detail="Model not loaded")

    try:
        # Decode image
        image = decode_image(request.image)
        image_np = np.array(image)
        width, height = image.size

        # Run inference
        start_time = time.time()
        results = model.predict(
            image_np,
            conf=request.conf,
            classes=[0],  # COCO class 0 = person
            verbose=False,
            device=DEVICE
        )
        inference_ms = (time.time() - start_time) * 1000

        # Parse results
        detections = []
        confidences = []

        for result in results:
            if result.boxes is None:
                continue

            for box in result.boxes:
                conf = float(box.conf[0])
                cls = int(box.cls[0])

                # Only include person class (0 in COCO)
                if cls != 0:
                    continue

                # Get bounding box coordinates
                x1, y1, x2, y2 = box.xyxy[0].tolist()

                detections.append(Detection(
                    class_name="person",
                    confidence=conf,
                    bbox=BoundingBox(
                        x1=x1,
                        y1=y1,
                        x2=x2,
                        y2=y2,
                        width=x2 - x1,
                        height=y2 - y1
                    )
                ))
                confidences.append(conf)

        # Calculate average confidence
        avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0

        print(f"[YOLO] Detected {len(detections)} person(s) in {inference_ms:.1f}ms (avg conf: {avg_confidence:.2f})")

        return DetectionResponse(
            detections=detections,
            count=len(detections),
            avg_confidence=avg_confidence,
            inference_ms=inference_ms,
            model=MODEL_NAME,
            image_size=(width, height)
        )

    except Exception as e:
        print(f"[YOLO] Detection error: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/count")
async def count_persons(request: DetectionRequest):
    """
    Simplified endpoint that just returns person count.

    Useful for quick performer counting without full detection details.
    """
    result = await detect(request)
    return {
        "count": result.count,
        "confidence": result.avg_confidence,
        "inference_ms": result.inference_ms
    }


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=5002)