|
|
import os |
|
|
import spaces |
|
|
import torch |
|
|
from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline |
|
|
from diffusers.models.transformers.transformer_wan import WanTransformer3DModel |
|
|
from diffusers.utils.export_utils import export_to_video |
|
|
import gradio as gr |
|
|
import tempfile |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
import random |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_ID = os.getenv("MODEL_ID", "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers") |
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
MAX_DIM = 832 |
|
|
MIN_DIM = 480 |
|
|
SQUARE_DIM = 640 |
|
|
MULTIPLE_OF = 16 |
|
|
MAX_SEED = np.iinfo(np.int32).max |
|
|
FIXED_FPS = 24 |
|
|
MIN_FRAMES_MODEL = 8 |
|
|
MAX_FRAMES_MODEL = 81 |
|
|
MIN_DURATION = 0.5 |
|
|
MAX_DURATION = 3.0 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Loading pipeline...") |
|
|
pipe = WanImageToVideoPipeline.from_pretrained( |
|
|
MODEL_ID, |
|
|
torch_dtype=torch.bfloat16, |
|
|
token=HF_TOKEN |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
default_prompt_i2v = "Generate a video with smooth and natural movement. Objects should have visible motion while maintaining fluid transitions." |
|
|
default_negative_prompt = "low quality, worst quality, blurry, distorted, deformed, ugly, bad anatomy, static, frozen, overall gray" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def resize_image(image: Image.Image) -> Image.Image: |
|
|
width, height = image.size |
|
|
if width == height: |
|
|
return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS) |
|
|
|
|
|
aspect_ratio = width / height |
|
|
MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM |
|
|
MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM |
|
|
|
|
|
image_to_resize = image |
|
|
if aspect_ratio > MAX_ASPECT_RATIO: |
|
|
crop_width = int(round(height * MAX_ASPECT_RATIO)) |
|
|
left = (width - crop_width) // 2 |
|
|
image_to_resize = image.crop((left, 0, left + crop_width, height)) |
|
|
elif aspect_ratio < MIN_ASPECT_RATIO: |
|
|
crop_height = int(round(width / MIN_ASPECT_RATIO)) |
|
|
top = (height - crop_height) // 2 |
|
|
image_to_resize = image.crop((0, top, width, top + crop_height)) |
|
|
|
|
|
current_width, current_height = image_to_resize.size |
|
|
current_aspect = current_width / current_height |
|
|
|
|
|
if current_width > current_height: |
|
|
target_w = MAX_DIM |
|
|
target_h = int(round(target_w / current_aspect)) |
|
|
else: |
|
|
target_h = MAX_DIM |
|
|
target_w = int(round(target_h * current_aspect)) |
|
|
|
|
|
final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF |
|
|
final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF |
|
|
final_w = max(MIN_DIM, min(MAX_DIM, final_w)) |
|
|
final_h = max(MIN_DIM, min(MAX_DIM, final_h)) |
|
|
|
|
|
return image_to_resize.resize((final_w, final_h), Image.LANCZOS) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_num_frames(duration_seconds: float): |
|
|
return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@spaces.GPU(duration=300) |
|
|
def generate_video( |
|
|
input_image, |
|
|
prompt, |
|
|
negative_prompt=default_negative_prompt, |
|
|
duration_seconds=2.0, |
|
|
steps=6, |
|
|
guidance_scale=1.0, |
|
|
seed=42, |
|
|
randomize_seed=False, |
|
|
progress=gr.Progress(track_tqdm=True), |
|
|
): |
|
|
if input_image is None: |
|
|
raise gr.Error("Please upload an image.") |
|
|
|
|
|
pipe.to("cuda") |
|
|
|
|
|
num_frames = get_num_frames(duration_seconds) |
|
|
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed) |
|
|
resized_image = resize_image(input_image) |
|
|
|
|
|
output_frames_list = pipe( |
|
|
image=resized_image, |
|
|
prompt=prompt, |
|
|
negative_prompt=negative_prompt, |
|
|
height=resized_image.height, |
|
|
width=resized_image.width, |
|
|
num_frames=num_frames, |
|
|
guidance_scale=float(guidance_scale), |
|
|
num_inference_steps=int(steps), |
|
|
generator=torch.Generator(device="cuda").manual_seed(current_seed), |
|
|
).frames[0] |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile: |
|
|
video_path = tmpfile.name |
|
|
|
|
|
export_to_video(output_frames_list, video_path, fps=FIXED_FPS) |
|
|
return video_path, current_seed |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.HTML(""" |
|
|
<style> |
|
|
.gradio-container { |
|
|
background: linear-gradient(135deg, #fef9f3 0%, #f0e6fa 50%, #e6f0fa 100%) !important; |
|
|
} |
|
|
footer {display: none !important;} |
|
|
</style> |
|
|
<div style="text-align: center; margin-bottom: 20px;"> |
|
|
<h1 style="color: #6b5b7a; font-size: 2.2rem; font-weight: 700; margin-bottom: 0.3rem;"> |
|
|
NSFW Uncensored "Image to Video" |
|
|
</h1> |
|
|
<p style="color: #8b7b9b; font-size: 1rem;">Powered by Wan 2.1 Model</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
input_image_component = gr.Image( |
|
|
type="pil", |
|
|
label="Upload Image", |
|
|
height=350 |
|
|
) |
|
|
|
|
|
prompt_input = gr.Textbox( |
|
|
label="Prompt", |
|
|
value=default_prompt_i2v, |
|
|
placeholder="Describe the motion you want...", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
duration_seconds_input = gr.Slider( |
|
|
minimum=MIN_DURATION, |
|
|
maximum=MAX_DURATION, |
|
|
step=0.5, |
|
|
value=2.0, |
|
|
label="Duration (seconds)" |
|
|
) |
|
|
|
|
|
with gr.Accordion("Advanced Options", open=False): |
|
|
negative_prompt_input = gr.Textbox( |
|
|
label="Negative Prompt", |
|
|
value=default_negative_prompt, |
|
|
lines=2 |
|
|
) |
|
|
|
|
|
steps_slider = gr.Slider( |
|
|
minimum=4, |
|
|
maximum=12, |
|
|
step=1, |
|
|
value=6, |
|
|
label="Inference Steps" |
|
|
) |
|
|
|
|
|
guidance_scale_input = gr.Slider( |
|
|
minimum=0.0, |
|
|
maximum=5.0, |
|
|
step=0.5, |
|
|
value=1.0, |
|
|
label="Guidance Scale" |
|
|
) |
|
|
|
|
|
seed_input = gr.Slider( |
|
|
label="Seed", |
|
|
minimum=0, |
|
|
maximum=MAX_SEED, |
|
|
step=1, |
|
|
value=42 |
|
|
) |
|
|
|
|
|
randomize_seed_checkbox = gr.Checkbox( |
|
|
label="Randomize Seed", |
|
|
value=True |
|
|
) |
|
|
|
|
|
generate_button = gr.Button( |
|
|
"Generate Video", |
|
|
variant="primary" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
video_output = gr.Video( |
|
|
label="Generated Video", |
|
|
autoplay=True, |
|
|
height=450 |
|
|
) |
|
|
|
|
|
ui_inputs = [ |
|
|
input_image_component, |
|
|
prompt_input, |
|
|
negative_prompt_input, |
|
|
duration_seconds_input, |
|
|
steps_slider, |
|
|
guidance_scale_input, |
|
|
seed_input, |
|
|
randomize_seed_checkbox |
|
|
] |
|
|
|
|
|
generate_button.click( |
|
|
fn=generate_video, |
|
|
inputs=ui_inputs, |
|
|
outputs=[video_output, seed_input] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.queue().launch() |
|
|
|