Spaces:

spac333
/

MONK3YSPAC333

Running

File size: 12,303 Bytes


"""
DynamiCrafter Image Animation
Anima immagini con interpolazione intelligente
"""

import gradio as gr
import torch
from diffusers import DiffusionPipeline
from diffusers.utils import export_to_video
from PIL import Image
import numpy as np
import tempfile
import os

print("🔧 Initializing DynamiCrafter pipeline...")

# Configurazione
MODEL_ID = "Doubiiu/DynamiCrafter_512_Interp"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Carica pipeline
print(f"📦 Loading model from {MODEL_ID}...")

try:
    pipe = DiffusionPipeline.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
        custom_pipeline="dynamicrafter_interpolation"
    )
    pipe.to(DEVICE)
    
    # Ottimizzazioni
    if DEVICE == "cuda":
        pipe.enable_model_cpu_offload()
        pipe.enable_vae_slicing()
        print("✅ GPU optimizations enabled")
    
    print(f"✅ Pipeline loaded successfully on {DEVICE}")
    
except Exception as e:
    print(f"❌ Error loading pipeline: {e}")
    print("⚠️  Trying alternative loading method...")
    
    # Fallback a loading standard
    from diffusers import StableVideoDiffusionPipeline
    pipe = StableVideoDiffusionPipeline.from_pretrained(
        "stabilityai/stable-video-diffusion-img2vid",
        torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
    )
    pipe.to(DEVICE)
    print("✅ Loaded fallback model (SVD)")


def preprocess_image(image):
    """
    Preprocessa l'immagine per DynamiCrafter
    """
    if image is None:
        raise ValueError("No image provided")
    
    # Converti in PIL se necessario
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)
    
    # Converti in RGB
    if image.mode != "RGB":
        image = image.convert("RGB")
    
    # Ridimensiona a 512x512 (ottimale per DynamiCrafter)
    original_size = image.size
    image = image.resize((512, 512), Image.LANCZOS)
    
    return image, original_size


def create_seamless_loop(frames, blend_frames=3):
    """
    Crea un loop perfetto con blending
    
    Args:
        frames: Lista di frame
        blend_frames: Numero di frame da usare per il blend
    """
    if len(frames) < blend_frames * 2:
        # Se troppo pochi frame, usa metodo semplice
        return list(frames) + list(frames[-2:0:-1])
    
    # Crea loop con blending avanzato
    forward = list(frames)
    reverse = list(frames[-2:0:-1])
    
    # Blend tra ultimo frame forward e primo frame reverse
    blended = []
    for i in range(blend_frames):
        alpha = i / blend_frames
        frame1 = np.array(forward[-1 - i])
        frame2 = np.array(reverse[i])
        blended_frame = (frame1 * (1 - alpha) + frame2 * alpha).astype(np.uint8)
        blended.append(Image.fromarray(blended_frame))
    
    # Combina tutto
    loop = forward[:-blend_frames] + blended + reverse[blend_frames:]
    
    return loop


def animate_image(
    image,
    num_frames=16,
    num_inference_steps=25,
    motion_strength=127,
    fps=8,
    use_loop=True,
    seed=-1,
    progress=gr.Progress()
):
    """
    Anima un'immagine con DynamiCrafter
    
    Args:
        image: Input image
        num_frames: Numero di frame da generare (8-32)
        num_inference_steps: Step di qualità (10-50)
        motion_strength: Intensità movimento (1-255)
        fps: Frame per secondo
        use_loop: Crea loop perfetto
        seed: Random seed (-1 per random)
        progress: Progress tracker
    """
    
    if image is None:
        return None, "❌ Carica un'immagine prima!"
    
    try:
        progress(0, desc="🖼️ Processing image...")
        
        # Preprocessa immagine
        processed_image, original_size = preprocess_image(image)
        
        print(f"📸 Image processed: {original_size} -> 512x512")
        
        progress(0.2, desc="🎬 Generating animation...")
        
        # Imposta seed se specificato
        if seed != -1:
            torch.manual_seed(seed)
            if torch.cuda.is_available():
                torch.cuda.manual_seed_all(seed)
        else:
            seed = torch.randint(0, 1000000, (1,)).item()
        
        print(f"🎲 Using seed: {seed}")
        print(f"🎞️ Generating {num_frames} frames...")
        
        # Genera frames
        with torch.no_grad():
            output = pipe(
                processed_image,
                num_frames=num_frames,
                num_inference_steps=num_inference_steps,
                decode_chunk_size=4,
                motion_bucket_id=motion_strength,
                fps=fps,
                height=512,
                width=512
            )
        
        frames = output.frames[0]
        
        progress(0.7, desc="🔄 Creating loop...")
        
        # Crea loop se richiesto
        if use_loop:
            frames = create_seamless_loop(frames, blend_frames=3)
        
        progress(0.9, desc="💾 Saving video...")
        
        # Salva video
        output_path = tempfile.NamedTemporaryFile(
            suffix=".mp4",
            delete=False
        ).name
        
        export_to_video(frames, output_path, fps=fps)
        
        progress(1.0, desc="✅ Complete!")
        
        # Info
        total_frames = len(frames)
        duration = total_frames / fps
        
        info = f"""
        ✅ **Animazione creata con successo!**
        
        📊 **Dettagli:**
        - Frame generati: {total_frames}
        - FPS: {fps}
        - Durata: ~{duration:.1f} secondi
        - Loop: {'Sì ✅' if use_loop else 'No ❌'}
        - Motion strength: {motion_strength}
        - Seed: {seed}
        - Risoluzione: 512x512
        - Device: {DEVICE.upper()}
        - Inference steps: {num_inference_steps}
        
        💡 **Tip:** Salva il seed per ricreare animazioni simili!
        """
        
        return output_path, info
        
    except Exception as e:
        error_msg = f"""
        ❌ **Errore durante la generazione:**
        
        {str(e)}
        
        💡 **Possibili soluzioni:**
        - Riduci il numero di frame
        - Riduci gli inference steps
        - Prova con un'altra immagine
        - Verifica che l'immagine sia valida
        """
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()
        return None, error_msg


# Esempi predefiniti (placeholder - aggiungi immagini vere)
EXAMPLES = [
    ["examples/landscape.jpg", 16, 25, 127, 8, True, 42],
    ["examples/portrait.jpg", 16, 20, 100, 8, True, 123],
    ["examples/abstract.jpg", 24, 25, 150, 8, True, 456],
]


# Interfaccia Gradio
with gr.Blocks(
    title="🎬 DynamiCrafter Image Animator",
    theme=gr.themes.Soft(
        primary_hue="indigo",
        secondary_hue="purple"
    ),
    css="""
    .gradio-container {max-width: 1200px !important}
    .output-class {height: 500px !important}
    """
) as demo:
    
    gr.Markdown("""
    # 🎬 DynamiCrafter Image Animator
    ### Transform Static Images into Smooth Animations
    
    Powered by **DynamiCrafter** - State-of-the-art image interpolation for fluid animations
    
    💡 **Best results with:**
    - Clear, well-lit images
    - Subjects with potential for natural movement
    - Landscapes, portraits, or objects
    """)
    
    with gr.Row():
        # Colonna sinistra - Input
        with gr.Column(scale=1):
            image_input = gr.Image(
                label="📸 Upload Image",
                type="pil",
                sources=["upload", "webcam", "clipboard"],
                height=400
            )
            
            gr.Markdown("### ⚙️ Animation Settings")
            
            num_frames = gr.Slider(
                minimum=8,
                maximum=32,
                value=16,
                step=1,
                label="🎞️ Number of Frames",
                info="More frames = longer animation (but slower)"
            )
            
            motion_strength = gr.Slider(
                minimum=1,
                maximum=255,
                value=127,
                step=1,
                label="💨 Motion Strength",
                info="Higher = more movement (127 is balanced)"
            )
            
            with gr.Accordion("🎨 Advanced Options", open=False):
                num_inference_steps = gr.Slider(
                    minimum=10,
                    maximum=50,
                    value=25,
                    step=5,
                    label="🎨 Quality (Inference Steps)",
                    info="Higher = better quality but slower"
                )
                
                fps = gr.Slider(
                    minimum=4,
                    maximum=30,
                    value=8,
                    step=1,
                    label="🎥 FPS (Frames per Second)",
                    info="Playback speed"
                )
                
                use_loop = gr.Checkbox(
                    value=True,
                    label="🔄 Create Seamless Loop",
                    info="Enable for repeating animations"
                )
                
                seed = gr.Number(
                    value=-1,
                    label="🎲 Seed (-1 for random)",
                    info="Use same seed for consistent results",
                    precision=0
                )
            
            generate_btn = gr.Button(
                "🎬 Animate Image",
                variant="primary",
                size="lg"
            )
            
            gr.Markdown("""
            ### 📊 Performance Guide
            
            **CPU (Free tier):**
            - Frames: 8-12
            - Steps: 15-20
            - Time: ~2-3 min
            
            **GPU T4 ($0.60/h):**
            - Frames: 16-24
            - Steps: 25-30
            - Time: ~30-60 sec
            """)
        
        # Colonna destra - Output
        with gr.Column(scale=1):
            video_output = gr.Video(
                label="🎬 Animated Result",
                autoplay=True,
                loop=True,
                height=400
            )
            
            info_output = gr.Markdown(
                value="👆 Upload an image and click 'Animate' to start!",
                label="ℹ️ Generation Info"
            )
            
            gr.Markdown("""
            ### 💡 Tips for Best Results
            
            - **Landscapes**: Natural scenes with clouds, water work great
            - **Portraits**: Clear face shots animate smoothly
            - **Objects**: Items with potential movement (flags, hair, etc.)
            - **Lighting**: Well-lit images produce better results
            - **Resolution**: 512x512 is optimal (auto-resized)
            
            ### 🎨 Motion Strength Guide
            
            - **50-100**: Subtle movement (breathing, gentle sway)
            - **100-150**: Medium movement (clouds, water)
            - **150-200**: Strong movement (wind, dynamic action)
            - **200+**: Extreme movement (experimental)
            """)
    
    # Event handler
    generate_btn.click(
        fn=animate_image,
        inputs=[
            image_input,
            num_frames,
            num_inference_steps,
            motion_strength,
            fps,
            use_loop,
            seed
        ],
        outputs=[video_output, info_output],
    )
    
    # Footer
    gr.Markdown("""
    ---
    ### 🔧 Technical Details
    
    - **Model**: DynamiCrafter 512 Interpolation
    - **Method**: Diffusion-based frame interpolation
    - **Resolution**: 512x512 (optimized)
    - **Device**: {device}
    
    ### 📚 Resources
    
    - [DynamiCrafter Paper](https://arxiv.org/abs/2310.12190)
    - [Model on HuggingFace](https://huggingface.co/Doubiiu/DynamiCrafter_512_Interp)
    - [GitHub Repository](https://github.com/Doubiiu/DynamiCrafter)
    
    ---
    
    **Made with ❤️ using HuggingFace Diffusers**
    """.replace("{device}", DEVICE.upper()))

# Launch
if __name__ == "__main__":
    demo.queue(max_size=10)
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False
    )