Spaces:

augustolucasg
/

lyra2-endpoint

Paused

App Files Files Community

lyra2-endpoint / Dockerfile

augustolucasg

Fix Docker entrypoint line endings and API upload dependency

2c73c1a verified 22 days ago

raw

history blame contribute delete

6.57 kB

	# Lyra-2 on HuggingFace Spaces — Docker build via uv.
	#
	# Mirrors tools/modal/lyra2_modal_notebook.py's install ordering (torch 2.7.1+cu128,
	# flash-attn 2.7.4.post1 prebuilt wheel, TE/VIPE/DA3/gsplat with --no-build-isolation)
	# but uses uv to provision Python 3.12 and resolve pip deps — sidesteps the
	# deadsnakes-PPA + python3.12-distutils (PEP 632) problem and cuts install time.
	#
	# Build environment constraints:
	# - No GPU available during build — TORCH_CUDA_ARCH_LIST pinned to A100 (8.0)
	# - Runs as UID 1000 per HF Docker Space convention
	# - /data persistent volume is runtime-only; checkpoints download on first boot

	FROM nvidia/cuda:12.8.0-devel-ubuntu22.04

	ENV DEBIAN_FRONTEND=noninteractive \
	PYTHONUNBUFFERED=1 \
	CUDA_HOME=/usr/local/cuda \
	LD_LIBRARY_PATH=/usr/local/cuda/lib64 \
	TORCH_CUDA_ARCH_LIST=8.0 \
	MAX_JOBS=1 \
	USE_SYSTEM_EIGEN=1 \
	UV_LINK_MODE=copy \
	UV_PYTHON_INSTALL_DIR=/opt/python \
	UV_CONCURRENT_DOWNLOADS=2 \
	UV_CONCURRENT_BUILDS=1 \
	UV_CONCURRENT_INSTALLS=2 \
	VIRTUAL_ENV=/opt/venv
	ENV PATH=/opt/venv/bin:/usr/local/cuda/bin:$PATH

	SHELL ["/bin/bash", "-o", "pipefail", "-c"]

	# Minimal system deps: no Python/pip (uv handles those). VIPE needs libeigen3-dev;
	# ffmpeg for video muxing; git for the Lyra-2 submodule clone; ninja for CUDA builds.
	RUN apt-get update && apt-get install -y --no-install-recommends \
	ca-certificates curl wget \
	libeigen3-dev ffmpeg git build-essential ninja-build \
	libgl1 libglib2.0-0 libsm6 libxext6 libxrender1 && \
	rm -rf /var/lib/apt/lists/*

	# Install uv (standalone binary, no system Python required). Pinned for reproducibility.
	COPY --from=ghcr.io/astral-sh/uv:0.11.7 /uv /uvx /usr/local/bin/

	# Provision Python 3.12 as a standalone distribution and create the project venv.
	# Installing as root here; we chown to user 1000 below.
	RUN uv python install 3.12 && \
	uv venv --python 3.12 /opt/venv

	# HF convention: run as UID 1000. Give user ownership of venv + uv python dir
	# so CUDA-extension editable installs can write metadata.
	RUN useradd -m -u 1000 user && \
	chown -R user:user /opt/venv /opt/python
	USER user
	ENV HOME=/home/user \
	PYTHONPATH=/home/user/app/Lyra-2
	WORKDIR /home/user/app

	# Torch first — every CUDA extension below links against this exact build.
	# uv pip install respects VIRTUAL_ENV automatically (no --system flag needed).
	RUN uv pip install \
	torch==2.7.1 torchvision==0.22.1 \
	--index-url https://download.pytorch.org/whl/cu128

	# CPATH construction: TE's CUDA build needs headers from the pip-installed
	# nvidia/*/include dirs. setup_cpath.py discovers site-packages dynamically
	# (so it works regardless of Python location) and creates the legacy
	# `nvidia/cudart -> cuda_runtime` symlink TE's setup.py looks for.
	COPY --chown=user build_support/setup_cpath.py ./build_support/setup_cpath.py
	RUN python ./build_support/setup_cpath.py > /home/user/.cpath && \
	test -s /home/user/.cpath \|\| (echo "ERROR: .cpath is empty — CUDA headers won't be found" && exit 1) && \
	echo "CPATH = $(cat /home/user/.cpath)" && \
	echo 'export CPATH="$(cat /home/user/.cpath)"' > /home/user/.buildrc

	# Clone Lyra-2 with submodules (VIPE + DA3).
	RUN git clone --recursive https://github.com/nv-tlabs/lyra.git repo && \
	mv repo/Lyra-2 Lyra-2 && \
	rm -rf repo

	# Upstream pins `tensorstore==0.1.45` but that version has no Python 3.12
	# wheels — source-building it takes ~60 min and ~8+ GB RAM, which OOMs or
	# times out HF's build sandbox. 0.1.50 is the earliest with cp312 manylinux
	# wheels and is API-compatible for the jax/orbax uses in Lyra-2.
	RUN sed -i 's/^tensorstore==0\.1\.45/tensorstore==0.1.50/' Lyra-2/requirements.txt

	# Pure-Python deps from upstream requirements.txt, then MoGe.
	RUN uv pip install -r Lyra-2/requirements.txt && \
	uv pip install "git+https://github.com/microsoft/MoGe.git"

	# Build backend metadata (needed for wheel install of editables and as a safety net).
	RUN uv pip install setuptools wheel ninja

	# --- CUDA extensions: install from pre-built wheels via Git LFS ------------
	# HF Space builders can't compile TE/VIPE/DA3/gsplat (OOMs silently after 10+ min).
	# Wheels in ./wheels/ are compiled once on Modal (1.9 TB RAM) and shipped via Git LFS.
	# They're ABI-pinned to Python 3.12 + torch 2.7.1+cu128 which is what this image has.
	COPY --chown=user wheels/ ./wheels/
	RUN uv pip install ./wheels/*.whl

	# flash-attn: upstream prebuilt wheel for torch 2.7 / cu12 / py312.
	RUN uv pip install \
	"https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.7cxx11abiFALSE-cp312-cp312-linux_x86_64.whl"

	# DA3's undeclared transitive runtime deps (addict missing from its pyproject;
	# these aren't resolved when we install from local wheels with pre-compiled .so).
	RUN uv pip install \
	addict evo e3nn plyfile pillow-heif moviepy open3d typer \
	kornia pycolmap trimesh fastapi uvicorn gradio python-multipart

	# Repin huggingface_hub < 1.0 (chain installs can bump past 1.0 and break
	# transformers) and gdown < 6 (6.0 removed the `fuzzy` kwarg VIPE uses).
	RUN uv pip install "huggingface_hub>=0.36.0,<1.0" "gdown<6"

	# Sanity check — GPU-free imports only. `import transformer_engine` triggers
	# Triton autotune eagerly (via transformer_engine.pytorch submodule auto-loaded
	# by __init__.py) which requires a CUDA driver. Skip TE/DA3/VIPE/gsplat imports
	# at build time; they'll be verified at runtime (where we have an A100).
	RUN python -c "import torch, torchvision, flash_attn; \
	print('torch', torch.__version__, '\| tv', torchvision.__version__, \
	'\| flash_attn', flash_attn.__version__)"
	# Verify the heavy wheels are INSTALLED (metadata check, no import) so a silent
	# wheel-install bug would surface here rather than at runtime.
	RUN python -c "import importlib.metadata as m; \
	names = ['transformer_engine', 'transformer_engine_torch', 'vipe', 'depth_anything_3', 'gsplat']; \
	print({n: m.version(n) for n in names})"

	# --- App code --------------------------------------------------------------
	COPY --chown=user app.py resident_inference.py warm_model_test.py \
	download_checkpoints.py entrypoint.sh ./
	COPY --chown=user previews/ ./previews/
	RUN chmod +x entrypoint.sh

	EXPOSE 7860
	ENV GRADIO_SERVER_NAME=0.0.0.0 \
	GRADIO_SERVER_PORT=7860

	CMD ["./entrypoint.sh"]