Spaces:

bzouiri
/

un-Flux

Running on Zero

App Files Files Community

badrerootunix commited on 2 days ago

Commit

c45ec28

1 Parent(s): b18bc5f

Restore Wan 2.1 with proper aspect ratio and optimized settings

Browse files

Files changed (2) hide show

app.py +96 -79
requirements.txt +7 -4

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import spaces
 import torch
-from diffusers import StableVideoDiffusionPipeline
 from diffusers.utils import export_to_video
 import gradio as gr
 import tempfile
@@ -13,28 +13,36 @@ import random
 # MODEL CONFIGURATION
 # =========================================================
-MODEL_ID = "stabilityai/stable-video-diffusion-img2vid-xt"
 MAX_SEED = np.iinfo(np.int32).max
-FIXED_FPS = 7
-NUM_FRAMES = 25
 # =========================================================
 # LOAD PIPELINE
 # =========================================================
 print("Loading pipeline...")
-pipe = StableVideoDiffusionPipeline.from_pretrained(
     MODEL_ID,
-    torch_dtype=torch.float16,
-    variant="fp16"
 )
-pipe.to("cpu")
 # =========================================================
 # DEFAULT PROMPTS
 # =========================================================
-default_negative_prompt = "low quality, worst quality, blurry, distorted, deformed"
 # =========================================================
 # IMAGE RESIZING LOGIC
@@ -42,35 +50,46 @@ default_negative_prompt = "low quality, worst quality, blurry, distorted, deform
 def resize_image(image: Image.Image) -> Image.Image:
     width, height = image.size
-    aspect_ratio = width / height
-    # SVD works best with 1024x576 or 576x1024
-    if aspect_ratio > 1:  # Landscape
-        new_width = 1024
-        new_height = 576
-    elif aspect_ratio < 1:  # Portrait
-        new_width = 576
-        new_height = 1024
     else:  # Square
-        new_width = 768
-        new_height = 768
-    return image.resize((new_width, new_height), Image.LANCZOS)
 # =========================================================
 # MAIN GENERATION FUNCTION
 # =========================================================
-@spaces.GPU(duration=120)
 def generate_video(
     input_image,
-    motion_bucket_id,
-    noise_aug_strength,
-    fps,
-    num_frames,
-    decode_chunk_size,
-    seed,
-    randomize_seed,
     progress=gr.Progress(track_tqdm=True),
 ):
     if input_image is None:
@@ -78,24 +97,26 @@ def generate_video(
     pipe.to("cuda")
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized_image = resize_image(input_image)
-    generator = torch.Generator(device="cuda").manual_seed(current_seed)
-    frames = pipe(
         image=resized_image,
-        num_frames=int(num_frames),
-        motion_bucket_id=int(motion_bucket_id),
-        noise_aug_strength=float(noise_aug_strength),
-        decode_chunk_size=int(decode_chunk_size),
-        generator=generator,
     ).frames[0]
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
-    export_to_video(frames, video_path, fps=int(fps))
     return video_path, current_seed
 # =========================================================
@@ -106,15 +127,15 @@ with gr.Blocks() as demo:
     gr.HTML("""
     <style>
     .gradio-container {
-        background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%) !important;
     }
     footer {display: none !important;}
     </style>
     <div style="text-align: center; margin-bottom: 20px;">
-        <h1 style="color: #e94560; font-size: 2.2rem; font-weight: 700; margin-bottom: 0.3rem;">
             NSFW Uncensored "Image to Video"
         </h1>
-        <p style="color: #a0a0a0; font-size: 1rem;">Powered by Stable Video Diffusion</p>
     </div>
     """)
@@ -126,46 +147,42 @@ with gr.Blocks() as demo:
                 height=350
             )
-            with gr.Accordion("Video Settings", open=True):
-                motion_bucket_id = gr.Slider(
-                    minimum=1,
-                    maximum=255,
-                    step=1,
-                    value=127,
-                    label="Motion Intensity (higher = more motion)"
-                )
-                fps_slider = gr.Slider(
-                    minimum=5,
-                    maximum=30,
-                    step=1,
-                    value=7,
-                    label="FPS"
-                )
-                num_frames_slider = gr.Slider(
-                    minimum=14,
-                    maximum=25,
-                    step=1,
-                    value=25,
-                    label="Number of Frames"
-                )
             with gr.Accordion("Advanced Options", open=False):
-                noise_aug_strength = gr.Slider(
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.01,
-                    value=0.02,
-                    label="Noise Augmentation"
                 )
-                decode_chunk_size = gr.Slider(
                     minimum=1,
-                    maximum=25,
                     step=1,
-                    value=8,
-                    label="Decode Chunk Size"
                 )
                 seed_input = gr.Slider(
@@ -195,11 +212,11 @@ with gr.Blocks() as demo:
     ui_inputs = [
         input_image_component,
-        motion_bucket_id,
-        noise_aug_strength,
-        fps_slider,
-        num_frames_slider,
-        decode_chunk_size,
         seed_input,
         randomize_seed_checkbox
     ]

 import os
 import spaces
 import torch
+from diffusers import WanImageToVideoPipeline
 from diffusers.utils import export_to_video
 import gradio as gr
 import tempfile
 # MODEL CONFIGURATION
 # =========================================================
+MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
+HF_TOKEN = os.environ.get("HF_TOKEN")
+MAX_DIM = 832
+MIN_DIM = 480
+SQUARE_DIM = 640
+MULTIPLE_OF = 16
 MAX_SEED = np.iinfo(np.int32).max
+FIXED_FPS = 16
+MIN_FRAMES_MODEL = 8
+MAX_FRAMES_MODEL = 49
+MIN_DURATION = 0.5
+MAX_DURATION = 2.0
 # =========================================================
 # LOAD PIPELINE
 # =========================================================
 print("Loading pipeline...")
+pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
+    torch_dtype=torch.bfloat16,
+    token=HF_TOKEN
 )
 # =========================================================
 # DEFAULT PROMPTS
 # =========================================================
+default_prompt_i2v = "Generate a video with smooth and natural movement. Objects should have visible motion while maintaining fluid transitions."
+default_negative_prompt = "low quality, worst quality, blurry, distorted, deformed, ugly, bad anatomy, static, frozen"
 # =========================================================
 # IMAGE RESIZING LOGIC
 def resize_image(image: Image.Image) -> Image.Image:
     width, height = image.size
+    # Determine orientation and set target dimensions
+    if width > height:  # Landscape
+        target_w = MAX_DIM
+        target_h = MIN_DIM
+    elif height > width:  # Portrait
+        target_w = MIN_DIM
+        target_h = MAX_DIM
     else:  # Square
+        target_w = SQUARE_DIM
+        target_h = SQUARE_DIM
+    # Make divisible by 16
+    target_w = (target_w // MULTIPLE_OF) * MULTIPLE_OF
+    target_h = (target_h // MULTIPLE_OF) * MULTIPLE_OF
+    return image.resize((target_w, target_h), Image.LANCZOS)
+# =========================================================
+# UTILITY FUNCTIONS
+# =========================================================
+def get_num_frames(duration_seconds: float):
+    frames = int(round(duration_seconds * FIXED_FPS))
+    return max(MIN_FRAMES_MODEL, min(MAX_FRAMES_MODEL, frames))
 # =========================================================
 # MAIN GENERATION FUNCTION
 # =========================================================
+@spaces.GPU(duration=300)
 def generate_video(
     input_image,
+    prompt,
+    negative_prompt=default_negative_prompt,
+    duration_seconds=1.5,
+    steps=4,
+    guidance_scale=1.0,
+    seed=42,
+    randomize_seed=False,
     progress=gr.Progress(track_tqdm=True),
 ):
     if input_image is None:
     pipe.to("cuda")
+    num_frames = get_num_frames(duration_seconds)
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     resized_image = resize_image(input_image)
+    output_frames_list = pipe(
         image=resized_image,
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        height=resized_image.height,
+        width=resized_image.width,
+        num_frames=num_frames,
+        guidance_scale=float(guidance_scale),
+        num_inference_steps=int(steps),
+        generator=torch.Generator(device="cuda").manual_seed(current_seed),
     ).frames[0]
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
+    export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
     return video_path, current_seed
 # =========================================================
     gr.HTML("""
     <style>
     .gradio-container {
+        background: linear-gradient(135deg, #fef9f3 0%, #f0e6fa 50%, #e6f0fa 100%) !important;
     }
     footer {display: none !important;}
     </style>
     <div style="text-align: center; margin-bottom: 20px;">
+        <h1 style="color: #6b5b7a; font-size: 2.2rem; font-weight: 700; margin-bottom: 0.3rem;">
             NSFW Uncensored "Image to Video"
         </h1>
+        <p style="color: #8b7b9b; font-size: 1rem;">Powered by Wan 2.1 Model</p>
     </div>
     """)
                 height=350
             )
+            prompt_input = gr.Textbox(
+                label="Prompt",
+                value=default_prompt_i2v,
+                placeholder="Describe the motion you want...",
+                lines=3
+            )
+            duration_seconds_input = gr.Slider(
+                minimum=MIN_DURATION,
+                maximum=MAX_DURATION,
+                step=0.5,
+                value=1.0,
+                label="Duration (seconds)"
+            )
             with gr.Accordion("Advanced Options", open=False):
+                negative_prompt_input = gr.Textbox(
+                    label="Negative Prompt",
+                    value=default_negative_prompt,
+                    lines=2
                 )
+                steps_slider = gr.Slider(
                     minimum=1,
+                    maximum=10,
                     step=1,
+                    value=4,
+                    label="Inference Steps"
+                )
+                guidance_scale_input = gr.Slider(
+                    minimum=0.0,
+                    maximum=5.0,
+                    step=0.5,
+                    value=1.0,
+                    label="Guidance Scale"
                 )
                 seed_input = gr.Slider(
     ui_inputs = [
         input_image_component,
+        prompt_input,
+        negative_prompt_input,
+        duration_seconds_input,
+        steps_slider,
+        guidance_scale_input,
         seed_input,
         randomize_seed_checkbox
     ]

requirements.txt CHANGED Viewed

@@ -1,11 +1,14 @@
-diffusers
 transformers
 accelerate
 safetensors
-torch
 gradio
 spaces
 numpy
 Pillow
-imageio
-imageio-ffmpeg

+git+https://github.com/huggingface/diffusers.git
 transformers
 accelerate
 safetensors
+sentencepiece
+peft
+ftfy
+imageio-ffmpeg
+opencv-python
 gradio
+torch
 spaces
 numpy
 Pillow