import os import spaces import torch from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline from diffusers.models.transformers.transformer_wan import WanTransformer3DModel from diffusers.utils.export_utils import export_to_video import gradio as gr import tempfile import numpy as np from PIL import Image import random # ========================================================= # MODEL CONFIGURATION # ========================================================= MODEL_ID = os.getenv("MODEL_ID", "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers") HF_TOKEN = os.environ.get("HF_TOKEN") MAX_DIM = 832 MIN_DIM = 480 SQUARE_DIM = 640 MULTIPLE_OF = 16 MAX_SEED = np.iinfo(np.int32).max FIXED_FPS = 24 MIN_FRAMES_MODEL = 8 MAX_FRAMES_MODEL = 81 MIN_DURATION = 0.5 MAX_DURATION = 3.0 # ========================================================= # LOAD PIPELINE # ========================================================= print("Loading pipeline...") pipe = WanImageToVideoPipeline.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, token=HF_TOKEN ) # ========================================================= # DEFAULT PROMPTS # ========================================================= default_prompt_i2v = "Generate a video with smooth and natural movement. Objects should have visible motion while maintaining fluid transitions." default_negative_prompt = "low quality, worst quality, blurry, distorted, deformed, ugly, bad anatomy, static, frozen, overall gray" # ========================================================= # IMAGE RESIZING LOGIC # ========================================================= def resize_image(image: Image.Image) -> Image.Image: width, height = image.size if width == height: return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS) aspect_ratio = width / height MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM image_to_resize = image if aspect_ratio > MAX_ASPECT_RATIO: crop_width = int(round(height * MAX_ASPECT_RATIO)) left = (width - crop_width) // 2 image_to_resize = image.crop((left, 0, left + crop_width, height)) elif aspect_ratio < MIN_ASPECT_RATIO: crop_height = int(round(width / MIN_ASPECT_RATIO)) top = (height - crop_height) // 2 image_to_resize = image.crop((0, top, width, top + crop_height)) current_width, current_height = image_to_resize.size current_aspect = current_width / current_height if current_width > current_height: target_w = MAX_DIM target_h = int(round(target_w / current_aspect)) else: target_h = MAX_DIM target_w = int(round(target_h * current_aspect)) final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF final_w = max(MIN_DIM, min(MAX_DIM, final_w)) final_h = max(MIN_DIM, min(MAX_DIM, final_h)) return image_to_resize.resize((final_w, final_h), Image.LANCZOS) # ========================================================= # UTILITY FUNCTIONS # ========================================================= def get_num_frames(duration_seconds: float): return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)) # ========================================================= # MAIN GENERATION FUNCTION # ========================================================= @spaces.GPU(duration=300) def generate_video( input_image, prompt, negative_prompt=default_negative_prompt, duration_seconds=2.0, steps=6, guidance_scale=1.0, seed=42, randomize_seed=False, progress=gr.Progress(track_tqdm=True), ): if input_image is None: raise gr.Error("Please upload an image.") pipe.to("cuda") num_frames = get_num_frames(duration_seconds) current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed) resized_image = resize_image(input_image) output_frames_list = pipe( image=resized_image, prompt=prompt, negative_prompt=negative_prompt, height=resized_image.height, width=resized_image.width, num_frames=num_frames, guidance_scale=float(guidance_scale), num_inference_steps=int(steps), generator=torch.Generator(device="cuda").manual_seed(current_seed), ).frames[0] with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile: video_path = tmpfile.name export_to_video(output_frames_list, video_path, fps=FIXED_FPS) return video_path, current_seed # ========================================================= # GRADIO UI # ========================================================= with gr.Blocks() as demo: gr.HTML("""

NSFW Uncensored "Image to Video"

Powered by Wan 2.1 Model

""") with gr.Row(): with gr.Column(scale=1): input_image_component = gr.Image( type="pil", label="Upload Image", height=350 ) prompt_input = gr.Textbox( label="Prompt", value=default_prompt_i2v, placeholder="Describe the motion you want...", lines=3 ) duration_seconds_input = gr.Slider( minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.5, value=2.0, label="Duration (seconds)" ) with gr.Accordion("Advanced Options", open=False): negative_prompt_input = gr.Textbox( label="Negative Prompt", value=default_negative_prompt, lines=2 ) steps_slider = gr.Slider( minimum=4, maximum=12, step=1, value=6, label="Inference Steps" ) guidance_scale_input = gr.Slider( minimum=0.0, maximum=5.0, step=0.5, value=1.0, label="Guidance Scale" ) seed_input = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42 ) randomize_seed_checkbox = gr.Checkbox( label="Randomize Seed", value=True ) generate_button = gr.Button( "Generate Video", variant="primary" ) with gr.Column(scale=1): video_output = gr.Video( label="Generated Video", autoplay=True, height=450 ) ui_inputs = [ input_image_component, prompt_input, negative_prompt_input, duration_seconds_input, steps_slider, guidance_scale_input, seed_input, randomize_seed_checkbox ] generate_button.click( fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input] ) if __name__ == "__main__": demo.queue().launch()