FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive WORKDIR /home/user/app ENV OMP_NUM_THREADS=1 \ TOKENIZERS_PARALLELISM=false \ PYTHONUNBUFFERED=1 \ HF_HOME=/home/user/.cache/huggingface \ TRANSFORMERS_CACHE=/home/user/.cache/huggingface/transformers \ HF_DATASETS_CACHE=/home/user/.cache/huggingface/datasets \ OFFLOAD_DIR=/home/user/app/offload \ MERGED_MODEL_DIR=/home/user/app/merged-model \ QUANTIZE=none \ # <── default: no quantization USE_ADAPTER_INFERENCE=1 \ FORCE_REMERGE=0 \ ALLOW_RUNTIME_MERGE=0 \ NVIDIA_VISIBLE_DEVICES=all \ NVIDIA_DRIVER_CAPABILITIES=compute,utility # System deps RUN apt-get update && apt-get install -y --no-install-recommends \ python3 python3-pip git git-lfs ca-certificates && \ rm -rf /var/lib/apt/lists/* && \ git lfs install # Torch stack aligned with transformers 4.44 RUN python3 -m pip install --upgrade pip RUN python3 -m pip install --no-cache-dir \ --index-url https://download.pytorch.org/whl/cu121 \ torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 # Only CPU FAISS (not GPU) COPY requirements.txt . RUN python3 -m pip install --no-cache-dir -r requirements.txt COPY . . RUN mkdir -p "$OFFLOAD_DIR" "$MERGED_MODEL_DIR" "$HF_HOME" "$TRANSFORMERS_CACHE" EXPOSE 7860 CMD ["python3", "app.py"]