badrerootunix commited on
Commit
c45ec28
·
1 Parent(s): b18bc5f

Restore Wan 2.1 with proper aspect ratio and optimized settings

Browse files
Files changed (2) hide show
  1. app.py +96 -79
  2. requirements.txt +7 -4
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import spaces
3
  import torch
4
- from diffusers import StableVideoDiffusionPipeline
5
  from diffusers.utils import export_to_video
6
  import gradio as gr
7
  import tempfile
@@ -13,28 +13,36 @@ import random
13
  # MODEL CONFIGURATION
14
  # =========================================================
15
 
16
- MODEL_ID = "stabilityai/stable-video-diffusion-img2vid-xt"
 
 
 
 
 
17
  MAX_SEED = np.iinfo(np.int32).max
18
- FIXED_FPS = 7
19
- NUM_FRAMES = 25
 
 
 
20
 
21
  # =========================================================
22
  # LOAD PIPELINE
23
  # =========================================================
24
 
25
  print("Loading pipeline...")
26
- pipe = StableVideoDiffusionPipeline.from_pretrained(
27
  MODEL_ID,
28
- torch_dtype=torch.float16,
29
- variant="fp16"
30
  )
31
- pipe.to("cpu")
32
 
33
  # =========================================================
34
  # DEFAULT PROMPTS
35
  # =========================================================
36
 
37
- default_negative_prompt = "low quality, worst quality, blurry, distorted, deformed"
 
38
 
39
  # =========================================================
40
  # IMAGE RESIZING LOGIC
@@ -42,35 +50,46 @@ default_negative_prompt = "low quality, worst quality, blurry, distorted, deform
42
 
43
  def resize_image(image: Image.Image) -> Image.Image:
44
  width, height = image.size
45
- aspect_ratio = width / height
46
-
47
- # SVD works best with 1024x576 or 576x1024
48
- if aspect_ratio > 1: # Landscape
49
- new_width = 1024
50
- new_height = 576
51
- elif aspect_ratio < 1: # Portrait
52
- new_width = 576
53
- new_height = 1024
54
  else: # Square
55
- new_width = 768
56
- new_height = 768
 
 
 
 
 
 
 
 
 
 
57
 
58
- return image.resize((new_width, new_height), Image.LANCZOS)
 
 
59
 
60
  # =========================================================
61
  # MAIN GENERATION FUNCTION
62
  # =========================================================
63
 
64
- @spaces.GPU(duration=120)
65
  def generate_video(
66
  input_image,
67
- motion_bucket_id,
68
- noise_aug_strength,
69
- fps,
70
- num_frames,
71
- decode_chunk_size,
72
- seed,
73
- randomize_seed,
74
  progress=gr.Progress(track_tqdm=True),
75
  ):
76
  if input_image is None:
@@ -78,24 +97,26 @@ def generate_video(
78
 
79
  pipe.to("cuda")
80
 
 
81
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
82
  resized_image = resize_image(input_image)
83
 
84
- generator = torch.Generator(device="cuda").manual_seed(current_seed)
85
-
86
- frames = pipe(
87
  image=resized_image,
88
- num_frames=int(num_frames),
89
- motion_bucket_id=int(motion_bucket_id),
90
- noise_aug_strength=float(noise_aug_strength),
91
- decode_chunk_size=int(decode_chunk_size),
92
- generator=generator,
 
 
 
93
  ).frames[0]
94
 
95
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
96
  video_path = tmpfile.name
97
 
98
- export_to_video(frames, video_path, fps=int(fps))
99
  return video_path, current_seed
100
 
101
  # =========================================================
@@ -106,15 +127,15 @@ with gr.Blocks() as demo:
106
  gr.HTML("""
107
  <style>
108
  .gradio-container {
109
- background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%) !important;
110
  }
111
  footer {display: none !important;}
112
  </style>
113
  <div style="text-align: center; margin-bottom: 20px;">
114
- <h1 style="color: #e94560; font-size: 2.2rem; font-weight: 700; margin-bottom: 0.3rem;">
115
  NSFW Uncensored "Image to Video"
116
  </h1>
117
- <p style="color: #a0a0a0; font-size: 1rem;">Powered by Stable Video Diffusion</p>
118
  </div>
119
  """)
120
 
@@ -126,46 +147,42 @@ with gr.Blocks() as demo:
126
  height=350
127
  )
128
 
129
- with gr.Accordion("Video Settings", open=True):
130
- motion_bucket_id = gr.Slider(
131
- minimum=1,
132
- maximum=255,
133
- step=1,
134
- value=127,
135
- label="Motion Intensity (higher = more motion)"
136
- )
137
-
138
- fps_slider = gr.Slider(
139
- minimum=5,
140
- maximum=30,
141
- step=1,
142
- value=7,
143
- label="FPS"
144
- )
145
 
146
- num_frames_slider = gr.Slider(
147
- minimum=14,
148
- maximum=25,
149
- step=1,
150
- value=25,
151
- label="Number of Frames"
152
- )
153
 
154
  with gr.Accordion("Advanced Options", open=False):
155
- noise_aug_strength = gr.Slider(
156
- minimum=0.0,
157
- maximum=1.0,
158
- step=0.01,
159
- value=0.02,
160
- label="Noise Augmentation"
161
  )
162
 
163
- decode_chunk_size = gr.Slider(
164
  minimum=1,
165
- maximum=25,
166
  step=1,
167
- value=8,
168
- label="Decode Chunk Size"
 
 
 
 
 
 
 
 
169
  )
170
 
171
  seed_input = gr.Slider(
@@ -195,11 +212,11 @@ with gr.Blocks() as demo:
195
 
196
  ui_inputs = [
197
  input_image_component,
198
- motion_bucket_id,
199
- noise_aug_strength,
200
- fps_slider,
201
- num_frames_slider,
202
- decode_chunk_size,
203
  seed_input,
204
  randomize_seed_checkbox
205
  ]
 
1
  import os
2
  import spaces
3
  import torch
4
+ from diffusers import WanImageToVideoPipeline
5
  from diffusers.utils import export_to_video
6
  import gradio as gr
7
  import tempfile
 
13
  # MODEL CONFIGURATION
14
  # =========================================================
15
 
16
+ MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
17
+ HF_TOKEN = os.environ.get("HF_TOKEN")
18
+ MAX_DIM = 832
19
+ MIN_DIM = 480
20
+ SQUARE_DIM = 640
21
+ MULTIPLE_OF = 16
22
  MAX_SEED = np.iinfo(np.int32).max
23
+ FIXED_FPS = 16
24
+ MIN_FRAMES_MODEL = 8
25
+ MAX_FRAMES_MODEL = 49
26
+ MIN_DURATION = 0.5
27
+ MAX_DURATION = 2.0
28
 
29
  # =========================================================
30
  # LOAD PIPELINE
31
  # =========================================================
32
 
33
  print("Loading pipeline...")
34
+ pipe = WanImageToVideoPipeline.from_pretrained(
35
  MODEL_ID,
36
+ torch_dtype=torch.bfloat16,
37
+ token=HF_TOKEN
38
  )
 
39
 
40
  # =========================================================
41
  # DEFAULT PROMPTS
42
  # =========================================================
43
 
44
+ default_prompt_i2v = "Generate a video with smooth and natural movement. Objects should have visible motion while maintaining fluid transitions."
45
+ default_negative_prompt = "low quality, worst quality, blurry, distorted, deformed, ugly, bad anatomy, static, frozen"
46
 
47
  # =========================================================
48
  # IMAGE RESIZING LOGIC
 
50
 
51
  def resize_image(image: Image.Image) -> Image.Image:
52
  width, height = image.size
53
+
54
+ # Determine orientation and set target dimensions
55
+ if width > height: # Landscape
56
+ target_w = MAX_DIM
57
+ target_h = MIN_DIM
58
+ elif height > width: # Portrait
59
+ target_w = MIN_DIM
60
+ target_h = MAX_DIM
 
61
  else: # Square
62
+ target_w = SQUARE_DIM
63
+ target_h = SQUARE_DIM
64
+
65
+ # Make divisible by 16
66
+ target_w = (target_w // MULTIPLE_OF) * MULTIPLE_OF
67
+ target_h = (target_h // MULTIPLE_OF) * MULTIPLE_OF
68
+
69
+ return image.resize((target_w, target_h), Image.LANCZOS)
70
+
71
+ # =========================================================
72
+ # UTILITY FUNCTIONS
73
+ # =========================================================
74
 
75
+ def get_num_frames(duration_seconds: float):
76
+ frames = int(round(duration_seconds * FIXED_FPS))
77
+ return max(MIN_FRAMES_MODEL, min(MAX_FRAMES_MODEL, frames))
78
 
79
  # =========================================================
80
  # MAIN GENERATION FUNCTION
81
  # =========================================================
82
 
83
+ @spaces.GPU(duration=300)
84
  def generate_video(
85
  input_image,
86
+ prompt,
87
+ negative_prompt=default_negative_prompt,
88
+ duration_seconds=1.5,
89
+ steps=4,
90
+ guidance_scale=1.0,
91
+ seed=42,
92
+ randomize_seed=False,
93
  progress=gr.Progress(track_tqdm=True),
94
  ):
95
  if input_image is None:
 
97
 
98
  pipe.to("cuda")
99
 
100
+ num_frames = get_num_frames(duration_seconds)
101
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
102
  resized_image = resize_image(input_image)
103
 
104
+ output_frames_list = pipe(
 
 
105
  image=resized_image,
106
+ prompt=prompt,
107
+ negative_prompt=negative_prompt,
108
+ height=resized_image.height,
109
+ width=resized_image.width,
110
+ num_frames=num_frames,
111
+ guidance_scale=float(guidance_scale),
112
+ num_inference_steps=int(steps),
113
+ generator=torch.Generator(device="cuda").manual_seed(current_seed),
114
  ).frames[0]
115
 
116
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
117
  video_path = tmpfile.name
118
 
119
+ export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
120
  return video_path, current_seed
121
 
122
  # =========================================================
 
127
  gr.HTML("""
128
  <style>
129
  .gradio-container {
130
+ background: linear-gradient(135deg, #fef9f3 0%, #f0e6fa 50%, #e6f0fa 100%) !important;
131
  }
132
  footer {display: none !important;}
133
  </style>
134
  <div style="text-align: center; margin-bottom: 20px;">
135
+ <h1 style="color: #6b5b7a; font-size: 2.2rem; font-weight: 700; margin-bottom: 0.3rem;">
136
  NSFW Uncensored "Image to Video"
137
  </h1>
138
+ <p style="color: #8b7b9b; font-size: 1rem;">Powered by Wan 2.1 Model</p>
139
  </div>
140
  """)
141
 
 
147
  height=350
148
  )
149
 
150
+ prompt_input = gr.Textbox(
151
+ label="Prompt",
152
+ value=default_prompt_i2v,
153
+ placeholder="Describe the motion you want...",
154
+ lines=3
155
+ )
 
 
 
 
 
 
 
 
 
 
156
 
157
+ duration_seconds_input = gr.Slider(
158
+ minimum=MIN_DURATION,
159
+ maximum=MAX_DURATION,
160
+ step=0.5,
161
+ value=1.0,
162
+ label="Duration (seconds)"
163
+ )
164
 
165
  with gr.Accordion("Advanced Options", open=False):
166
+ negative_prompt_input = gr.Textbox(
167
+ label="Negative Prompt",
168
+ value=default_negative_prompt,
169
+ lines=2
 
 
170
  )
171
 
172
+ steps_slider = gr.Slider(
173
  minimum=1,
174
+ maximum=10,
175
  step=1,
176
+ value=4,
177
+ label="Inference Steps"
178
+ )
179
+
180
+ guidance_scale_input = gr.Slider(
181
+ minimum=0.0,
182
+ maximum=5.0,
183
+ step=0.5,
184
+ value=1.0,
185
+ label="Guidance Scale"
186
  )
187
 
188
  seed_input = gr.Slider(
 
212
 
213
  ui_inputs = [
214
  input_image_component,
215
+ prompt_input,
216
+ negative_prompt_input,
217
+ duration_seconds_input,
218
+ steps_slider,
219
+ guidance_scale_input,
220
  seed_input,
221
  randomize_seed_checkbox
222
  ]
requirements.txt CHANGED
@@ -1,11 +1,14 @@
1
- diffusers
2
  transformers
3
  accelerate
4
  safetensors
5
- torch
 
 
 
 
6
  gradio
 
7
  spaces
8
  numpy
9
  Pillow
10
- imageio
11
- imageio-ffmpeg
 
1
+ git+https://github.com/huggingface/diffusers.git
2
  transformers
3
  accelerate
4
  safetensors
5
+ sentencepiece
6
+ peft
7
+ ftfy
8
+ imageio-ffmpeg
9
+ opencv-python
10
  gradio
11
+ torch
12
  spaces
13
  numpy
14
  Pillow