import os
import spaces
import torch
from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
from diffusers.utils.export_utils import export_to_video
import gradio as gr
import tempfile
import numpy as np
from PIL import Image
import random

# =========================================================
# MODEL CONFIGURATION
# =========================================================

MODEL_ID = os.getenv("MODEL_ID", "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers")
HF_TOKEN = os.environ.get("HF_TOKEN")
MAX_DIM = 832
MIN_DIM = 480
SQUARE_DIM = 640
MULTIPLE_OF = 16
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 24
MIN_FRAMES_MODEL = 8
MAX_FRAMES_MODEL = 81
MIN_DURATION = 0.5
MAX_DURATION = 3.0

# =========================================================
# LOAD PIPELINE
# =========================================================

print("Loading pipeline...")
pipe = WanImageToVideoPipeline.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
    token=HF_TOKEN
)

# =========================================================
# DEFAULT PROMPTS
# =========================================================

default_prompt_i2v = "Generate a video with smooth and natural movement. Objects should have visible motion while maintaining fluid transitions."
default_negative_prompt = "low quality, worst quality, blurry, distorted, deformed, ugly, bad anatomy, static, frozen, overall gray"

# =========================================================
# IMAGE RESIZING LOGIC
# =========================================================

def resize_image(image: Image.Image) -> Image.Image:
    width, height = image.size
    if width == height:
        return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)

    aspect_ratio = width / height
    MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
    MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM

    image_to_resize = image
    if aspect_ratio > MAX_ASPECT_RATIO:
        crop_width = int(round(height * MAX_ASPECT_RATIO))
        left = (width - crop_width) // 2
        image_to_resize = image.crop((left, 0, left + crop_width, height))
    elif aspect_ratio < MIN_ASPECT_RATIO:
        crop_height = int(round(width / MIN_ASPECT_RATIO))
        top = (height - crop_height) // 2
        image_to_resize = image.crop((0, top, width, top + crop_height))

    current_width, current_height = image_to_resize.size
    current_aspect = current_width / current_height

    if current_width > current_height:
        target_w = MAX_DIM
        target_h = int(round(target_w / current_aspect))
    else:
        target_h = MAX_DIM
        target_w = int(round(target_h * current_aspect))

    final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
    final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
    final_w = max(MIN_DIM, min(MAX_DIM, final_w))
    final_h = max(MIN_DIM, min(MAX_DIM, final_h))

    return image_to_resize.resize((final_w, final_h), Image.LANCZOS)

# =========================================================
# UTILITY FUNCTIONS
# =========================================================

def get_num_frames(duration_seconds: float):
    return 1 + int(np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL))

# =========================================================
# MAIN GENERATION FUNCTION
# =========================================================

@spaces.GPU(duration=300)
def generate_video(
    input_image,
    prompt,
    negative_prompt=default_negative_prompt,
    duration_seconds=2.0,
    steps=6,
    guidance_scale=1.0,
    seed=42,
    randomize_seed=False,
    progress=gr.Progress(track_tqdm=True),
):
    if input_image is None:
        raise gr.Error("Please upload an image.")

    pipe.to("cuda")

    num_frames = get_num_frames(duration_seconds)
    current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
    resized_image = resize_image(input_image)

    output_frames_list = pipe(
        image=resized_image,
        prompt=prompt,
        negative_prompt=negative_prompt,
        height=resized_image.height,
        width=resized_image.width,
        num_frames=num_frames,
        guidance_scale=float(guidance_scale),
        num_inference_steps=int(steps),
        generator=torch.Generator(device="cuda").manual_seed(current_seed),
    ).frames[0]

    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
        video_path = tmpfile.name

    export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
    return video_path, current_seed

# =========================================================
# GRADIO UI
# =========================================================

with gr.Blocks() as demo:
    gr.HTML("""
    <style>
    .gradio-container {
        background: linear-gradient(135deg, #fef9f3 0%, #f0e6fa 50%, #e6f0fa 100%) !important;
    }
    footer {display: none !important;}
    </style>
    <div style="text-align: center; margin-bottom: 20px;">
        <h1 style="color: #6b5b7a; font-size: 2.2rem; font-weight: 700; margin-bottom: 0.3rem;">
            NSFW Uncensored "Image to Video"
        </h1>
        <p style="color: #8b7b9b; font-size: 1rem;">Powered by Wan 2.1 Model</p>
    </div>
    """)

    with gr.Row():
        with gr.Column(scale=1):
            input_image_component = gr.Image(
                type="pil",
                label="Upload Image",
                height=350
            )

            prompt_input = gr.Textbox(
                label="Prompt",
                value=default_prompt_i2v,
                placeholder="Describe the motion you want...",
                lines=3
            )

            duration_seconds_input = gr.Slider(
                minimum=MIN_DURATION,
                maximum=MAX_DURATION,
                step=0.5,
                value=2.0,
                label="Duration (seconds)"
            )

            with gr.Accordion("Advanced Options", open=False):
                negative_prompt_input = gr.Textbox(
                    label="Negative Prompt",
                    value=default_negative_prompt,
                    lines=2
                )

                steps_slider = gr.Slider(
                    minimum=4,
                    maximum=12,
                    step=1,
                    value=6,
                    label="Inference Steps"
                )

                guidance_scale_input = gr.Slider(
                    minimum=0.0,
                    maximum=5.0,
                    step=0.5,
                    value=1.0,
                    label="Guidance Scale"
                )

                seed_input = gr.Slider(
                    label="Seed",
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=42
                )

                randomize_seed_checkbox = gr.Checkbox(
                    label="Randomize Seed",
                    value=True
                )

            generate_button = gr.Button(
                "Generate Video",
                variant="primary"
            )

        with gr.Column(scale=1):
            video_output = gr.Video(
                label="Generated Video",
                autoplay=True,
                height=450
            )

    ui_inputs = [
        input_image_component,
        prompt_input,
        negative_prompt_input,
        duration_seconds_input,
        steps_slider,
        guidance_scale_input,
        seed_input,
        randomize_seed_checkbox
    ]

    generate_button.click(
        fn=generate_video,
        inputs=ui_inputs,
        outputs=[video_output, seed_input]
    )

if __name__ == "__main__":
    demo.queue().launch()