235 lines
6.0 KiB
Python

"""
YOLO11 Person Detection Service
FastAPI service for detecting persons in images using YOLO11.
Optimized for adult content performer counting.
"""
import base64
import io
import os
import time
from typing import Optional
import numpy as np
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from PIL import Image
from pydantic import BaseModel
from ultralytics import YOLO
# Configuration from environment
MODEL_NAME = os.getenv("MODEL", "yolo11s.pt")
CONF_THRESHOLD = float(os.getenv("CONF_THRESHOLD", "0.5"))
DEVICE = os.getenv("DEVICE", "0") # GPU device, "cpu" for CPU-only
app = FastAPI(
title="YOLO11 Person Detection",
description="Detect persons in images for performer counting",
version="1.0.0"
)
# CORS for local development
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Global model instance
model: Optional[YOLO] = None
class DetectionRequest(BaseModel):
"""Request body for detection endpoint"""
image: str # Base64 encoded image
classes: list[str] = ["person"] # Classes to detect (default: person only)
conf: float = CONF_THRESHOLD # Confidence threshold
class BoundingBox(BaseModel):
"""Bounding box for a detection"""
x1: float
y1: float
x2: float
y2: float
width: float
height: float
class Detection(BaseModel):
"""Single detection result"""
class_name: str
confidence: float
bbox: BoundingBox
class DetectionResponse(BaseModel):
"""Response from detection endpoint"""
detections: list[Detection]
count: int
avg_confidence: float
inference_ms: float
model: str
image_size: tuple[int, int]
class HealthResponse(BaseModel):
"""Health check response"""
status: str
model_loaded: bool
model_name: str
device: str
def load_model():
"""Load YOLO model on startup"""
global model
print(f"[YOLO] Loading model: {MODEL_NAME}")
print(f"[YOLO] Device: {DEVICE}")
print(f"[YOLO] Default confidence threshold: {CONF_THRESHOLD}")
try:
model = YOLO(MODEL_NAME)
# Warm up with a dummy inference
dummy = np.zeros((640, 640, 3), dtype=np.uint8)
model.predict(dummy, verbose=False, device=DEVICE)
print(f"[YOLO] Model loaded successfully")
except Exception as e:
print(f"[YOLO] Failed to load model: {e}")
raise
def decode_image(base64_string: str) -> Image.Image:
"""Decode base64 image string to PIL Image"""
# Remove data URL prefix if present
if "," in base64_string:
base64_string = base64_string.split(",", 1)[1]
image_data = base64.b64decode(base64_string)
image = Image.open(io.BytesIO(image_data))
# Convert to RGB if necessary
if image.mode != "RGB":
image = image.convert("RGB")
return image
@app.on_event("startup")
async def startup_event():
"""Load model on startup"""
load_model()
@app.get("/health", response_model=HealthResponse)
async def health_check():
"""Health check endpoint"""
return HealthResponse(
status="healthy" if model is not None else "unhealthy",
model_loaded=model is not None,
model_name=MODEL_NAME,
device=DEVICE
)
@app.post("/detect", response_model=DetectionResponse)
async def detect(request: DetectionRequest):
"""
Detect persons in an image.
Returns bounding boxes, confidence scores, and count.
"""
if model is None:
raise HTTPException(status_code=503, detail="Model not loaded")
try:
# Decode image
image = decode_image(request.image)
image_np = np.array(image)
width, height = image.size
# Run inference
start_time = time.time()
results = model.predict(
image_np,
conf=request.conf,
classes=[0], # COCO class 0 = person
verbose=False,
device=DEVICE
)
inference_ms = (time.time() - start_time) * 1000
# Parse results
detections = []
confidences = []
for result in results:
if result.boxes is None:
continue
for box in result.boxes:
conf = float(box.conf[0])
cls = int(box.cls[0])
# Only include person class (0 in COCO)
if cls != 0:
continue
# Get bounding box coordinates
x1, y1, x2, y2 = box.xyxy[0].tolist()
detections.append(Detection(
class_name="person",
confidence=conf,
bbox=BoundingBox(
x1=x1,
y1=y1,
x2=x2,
y2=y2,
width=x2 - x1,
height=y2 - y1
)
))
confidences.append(conf)
# Calculate average confidence
avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
print(f"[YOLO] Detected {len(detections)} person(s) in {inference_ms:.1f}ms (avg conf: {avg_confidence:.2f})")
return DetectionResponse(
detections=detections,
count=len(detections),
avg_confidence=avg_confidence,
inference_ms=inference_ms,
model=MODEL_NAME,
image_size=(width, height)
)
except Exception as e:
print(f"[YOLO] Detection error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/count")
async def count_persons(request: DetectionRequest):
"""
Simplified endpoint that just returns person count.
Useful for quick performer counting without full detection details.
"""
result = await detect(request)
return {
"count": result.count,
"confidence": result.avg_confidence,
"inference_ms": result.inference_ms
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=5002)