un-Flux / app.py
badrerootunix's picture
Optimize settings: 6 steps default, better resize logic, 24fps
0171411
import os
import spaces
import torch
from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
from diffusers.utils.export_utils import export_to_video
import gradio as gr
import tempfile
import numpy as np
from PIL import Image
import random
# =========================================================
# MODEL CONFIGURATION
# =========================================================
MODEL_ID = os.getenv("MODEL_ID", "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers")
HF_TOKEN = os.environ.get("HF_TOKEN")
MAX_DIM = 832
MIN_DIM = 480
SQUARE_DIM = 640
MULTIPLE_OF = 16
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 24
MIN_FRAMES_MODEL = 8
MAX_FRAMES_MODEL = 81
MIN_DURATION = 0.5
MAX_DURATION = 3.0
# =========================================================
# LOAD PIPELINE
# =========================================================
print("Loading pipeline...")
pipe = WanImageToVideoPipeline.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
token=HF_TOKEN
)
# =========================================================
# DEFAULT PROMPTS
# =========================================================
default_prompt_i2v = "Generate a video with smooth and natural movement. Objects should have visible motion while maintaining fluid transitions."
default_negative_prompt = "low quality, worst quality, blurry, distorted, deformed, ugly, bad anatomy, static, frozen, overall gray"
# =========================================================
# IMAGE RESIZING LOGIC
# =========================================================
def resize_image(image: Image.Image) -> Image.Image:
width, height = image.size
if width == height:
return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
aspect_ratio = width / height
MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
image_to_resize = image
if aspect_ratio > MAX_ASPECT_RATIO:
crop_width = int(round(height * MAX_ASPECT_RATIO))
left = (width - crop_width) // 2
image_to_resize = image.crop((left, 0, left + crop_width, height))
elif aspect_ratio < MIN_ASPECT_RATIO:
crop_height = int(round(width / MIN_ASPECT_RATIO))
top = (height - crop_height) // 2
image_to_resize = image.crop((0, top, width, top + crop_height))
current_width, current_height = image_to_resize.size
current_aspect = current_width / current_height
if current_width > current_height:
target_w = MAX_DIM
target_h = int(round(target_w / current_aspect))
else:
target_h = MAX_DIM
target_w = int(round(target_h * current_aspect))
final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
final_w = max(MIN_DIM, min(MAX_DIM, final_w))
final_h = max(MIN_DIM, min(MAX_DIM, final_h))
return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
# =========================================================
# UTILITY FUNCTIONS
# =========================================================
def get_num_frames(duration_seconds: float):
return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL))
# =========================================================
# MAIN GENERATION FUNCTION
# =========================================================
@spaces.GPU(duration=300)
def generate_video(
input_image,
prompt,
negative_prompt=default_negative_prompt,
duration_seconds=2.0,
steps=6,
guidance_scale=1.0,
seed=42,
randomize_seed=False,
progress=gr.Progress(track_tqdm=True),
):
if input_image is None:
raise gr.Error("Please upload an image.")
pipe.to("cuda")
num_frames = get_num_frames(duration_seconds)
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
resized_image = resize_image(input_image)
output_frames_list = pipe(
image=resized_image,
prompt=prompt,
negative_prompt=negative_prompt,
height=resized_image.height,
width=resized_image.width,
num_frames=num_frames,
guidance_scale=float(guidance_scale),
num_inference_steps=int(steps),
generator=torch.Generator(device="cuda").manual_seed(current_seed),
).frames[0]
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
video_path = tmpfile.name
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
return video_path, current_seed
# =========================================================
# GRADIO UI
# =========================================================
with gr.Blocks() as demo:
gr.HTML("""
<style>
.gradio-container {
background: linear-gradient(135deg, #fef9f3 0%, #f0e6fa 50%, #e6f0fa 100%) !important;
}
footer {display: none !important;}
</style>
<div style="text-align: center; margin-bottom: 20px;">
<h1 style="color: #6b5b7a; font-size: 2.2rem; font-weight: 700; margin-bottom: 0.3rem;">
NSFW Uncensored "Image to Video"
</h1>
<p style="color: #8b7b9b; font-size: 1rem;">Powered by Wan 2.1 Model</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
input_image_component = gr.Image(
type="pil",
label="Upload Image",
height=350
)
prompt_input = gr.Textbox(
label="Prompt",
value=default_prompt_i2v,
placeholder="Describe the motion you want...",
lines=3
)
duration_seconds_input = gr.Slider(
minimum=MIN_DURATION,
maximum=MAX_DURATION,
step=0.5,
value=2.0,
label="Duration (seconds)"
)
with gr.Accordion("Advanced Options", open=False):
negative_prompt_input = gr.Textbox(
label="Negative Prompt",
value=default_negative_prompt,
lines=2
)
steps_slider = gr.Slider(
minimum=4,
maximum=12,
step=1,
value=6,
label="Inference Steps"
)
guidance_scale_input = gr.Slider(
minimum=0.0,
maximum=5.0,
step=0.5,
value=1.0,
label="Guidance Scale"
)
seed_input = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=42
)
randomize_seed_checkbox = gr.Checkbox(
label="Randomize Seed",
value=True
)
generate_button = gr.Button(
"Generate Video",
variant="primary"
)
with gr.Column(scale=1):
video_output = gr.Video(
label="Generated Video",
autoplay=True,
height=450
)
ui_inputs = [
input_image_component,
prompt_input,
negative_prompt_input,
duration_seconds_input,
steps_slider,
guidance_scale_input,
seed_input,
randomize_seed_checkbox
]
generate_button.click(
fn=generate_video,
inputs=ui_inputs,
outputs=[video_output, seed_input]
)
if __name__ == "__main__":
demo.queue().launch()