# Lyra-2 on HuggingFace Spaces — Docker build via uv.
#
# Mirrors tools/modal/lyra2_modal_notebook.py's install ordering (torch 2.7.1+cu128,
# flash-attn 2.7.4.post1 prebuilt wheel, TE/VIPE/DA3/gsplat with --no-build-isolation)
# but uses uv to provision Python 3.12 and resolve pip deps — sidesteps the
# deadsnakes-PPA + python3.12-distutils (PEP 632) problem and cuts install time.
#
# Build environment constraints:
# - No GPU available during build — TORCH_CUDA_ARCH_LIST pinned to A100 (8.0)
# - Runs as UID 1000 per HF Docker Space convention
# - /data persistent volume is runtime-only; checkpoints download on first boot

FROM nvidia/cuda:12.8.0-devel-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    CUDA_HOME=/usr/local/cuda \
    LD_LIBRARY_PATH=/usr/local/cuda/lib64 \
    TORCH_CUDA_ARCH_LIST=8.0 \
    MAX_JOBS=1 \
    USE_SYSTEM_EIGEN=1 \
    UV_LINK_MODE=copy \
    UV_PYTHON_INSTALL_DIR=/opt/python \
    UV_CONCURRENT_DOWNLOADS=2 \
    UV_CONCURRENT_BUILDS=1 \
    UV_CONCURRENT_INSTALLS=2 \
    VIRTUAL_ENV=/opt/venv
ENV PATH=/opt/venv/bin:/usr/local/cuda/bin:$PATH

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

# Minimal system deps: no Python/pip (uv handles those). VIPE needs libeigen3-dev;
# ffmpeg for video muxing; git for the Lyra-2 submodule clone; ninja for CUDA builds.
RUN apt-get update && apt-get install -y --no-install-recommends \
        ca-certificates curl wget \
        libeigen3-dev ffmpeg git build-essential ninja-build \
        libgl1 libglib2.0-0 libsm6 libxext6 libxrender1 && \
    rm -rf /var/lib/apt/lists/*

# Install uv (standalone binary, no system Python required). Pinned for reproducibility.
COPY --from=ghcr.io/astral-sh/uv:0.11.7 /uv /uvx /usr/local/bin/

# Provision Python 3.12 as a standalone distribution and create the project venv.
# Installing as root here; we chown to user 1000 below.
RUN uv python install 3.12 && \
    uv venv --python 3.12 /opt/venv

# HF convention: run as UID 1000. Give user ownership of venv + uv python dir
# so CUDA-extension editable installs can write metadata.
RUN useradd -m -u 1000 user && \
    chown -R user:user /opt/venv /opt/python
USER user
ENV HOME=/home/user \
    PYTHONPATH=/home/user/app/Lyra-2
WORKDIR /home/user/app

# Torch first — every CUDA extension below links against this exact build.
# uv pip install respects VIRTUAL_ENV automatically (no --system flag needed).
RUN uv pip install \
        torch==2.7.1 torchvision==0.22.1 \
        --index-url https://download.pytorch.org/whl/cu128

# CPATH construction: TE's CUDA build needs headers from the pip-installed
# nvidia/*/include dirs. setup_cpath.py discovers site-packages dynamically
# (so it works regardless of Python location) and creates the legacy
# `nvidia/cudart -> cuda_runtime` symlink TE's setup.py looks for.
COPY --chown=user build_support/setup_cpath.py ./build_support/setup_cpath.py
RUN python ./build_support/setup_cpath.py > /home/user/.cpath && \
    test -s /home/user/.cpath || (echo "ERROR: .cpath is empty — CUDA headers won't be found" && exit 1) && \
    echo "CPATH = $(cat /home/user/.cpath)" && \
    echo 'export CPATH="$(cat /home/user/.cpath)"' > /home/user/.buildrc

# Clone Lyra-2 with submodules (VIPE + DA3).
RUN git clone --recursive https://github.com/nv-tlabs/lyra.git repo && \
    mv repo/Lyra-2 Lyra-2 && \
    rm -rf repo

# Upstream pins `tensorstore==0.1.45` but that version has no Python 3.12
# wheels — source-building it takes ~60 min and ~8+ GB RAM, which OOMs or
# times out HF's build sandbox. 0.1.50 is the earliest with cp312 manylinux
# wheels and is API-compatible for the jax/orbax uses in Lyra-2.
RUN sed -i 's/^tensorstore==0\.1\.45/tensorstore==0.1.50/' Lyra-2/requirements.txt

# Pure-Python deps from upstream requirements.txt, then MoGe.
RUN uv pip install -r Lyra-2/requirements.txt && \
    uv pip install "git+https://github.com/microsoft/MoGe.git"

# Build backend metadata (needed for wheel install of editables and as a safety net).
RUN uv pip install setuptools wheel ninja

# --- CUDA extensions: install from pre-built wheels via Git LFS ------------
# HF Space builders can't compile TE/VIPE/DA3/gsplat (OOMs silently after 10+ min).
# Wheels in ./wheels/ are compiled once on Modal (1.9 TB RAM) and shipped via Git LFS.
# They're ABI-pinned to Python 3.12 + torch 2.7.1+cu128 which is what this image has.
COPY --chown=user wheels/ ./wheels/
RUN uv pip install ./wheels/*.whl

# flash-attn: upstream prebuilt wheel for torch 2.7 / cu12 / py312.
RUN uv pip install \
        "https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.7cxx11abiFALSE-cp312-cp312-linux_x86_64.whl"

# DA3's undeclared transitive runtime deps (addict missing from its pyproject;
# these aren't resolved when we install from local wheels with pre-compiled .so).
RUN uv pip install \
        addict evo e3nn plyfile pillow-heif moviepy open3d typer \
        kornia pycolmap trimesh fastapi uvicorn gradio python-multipart

# Repin huggingface_hub < 1.0 (chain installs can bump past 1.0 and break
# transformers) and gdown < 6 (6.0 removed the `fuzzy` kwarg VIPE uses).
RUN uv pip install "huggingface_hub>=0.36.0,<1.0" "gdown<6"

# Sanity check — GPU-free imports only. `import transformer_engine` triggers
# Triton autotune eagerly (via transformer_engine.pytorch submodule auto-loaded
# by __init__.py) which requires a CUDA driver. Skip TE/DA3/VIPE/gsplat imports
# at build time; they'll be verified at runtime (where we have an A100).
RUN python -c "import torch, torchvision, flash_attn; \
    print('torch', torch.__version__, '| tv', torchvision.__version__, \
          '| flash_attn', flash_attn.__version__)"
# Verify the heavy wheels are INSTALLED (metadata check, no import) so a silent
# wheel-install bug would surface here rather than at runtime.
RUN python -c "import importlib.metadata as m; \
    names = ['transformer_engine', 'transformer_engine_torch', 'vipe', 'depth_anything_3', 'gsplat']; \
    print({n: m.version(n) for n in names})"

# --- App code --------------------------------------------------------------
COPY --chown=user app.py resident_inference.py warm_model_test.py \
     download_checkpoints.py entrypoint.sh ./
COPY --chown=user previews/ ./previews/
RUN chmod +x entrypoint.sh

EXPOSE 7860
ENV GRADIO_SERVER_NAME=0.0.0.0 \
    GRADIO_SERVER_PORT=7860

CMD ["./entrypoint.sh"]