version: '3.8' services: prometheus-api: build: . container_name: prometheus-api ports: - "7860:7860" environment: - PORT=7860 - HOST=0.0.0.0 - OMP_NUM_THREADS=4 - MAX_INPUT_LENGTH=1500 - ALLOWED_ORIGINS=* deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] volumes: - model_cache:/home/appuser/.cache/huggingface restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:7860/health"] interval: 30s timeout: 10s retries: 3 start_period: 60s volumes: model_cache: