import gradio as gr
import numpy as np
import random
import torch
from diffusers import DiffusionPipeline

# === Configuration ===
MODEL_REPO_ID = "stabilityai/sdxl-turbo"
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024

def get_torch_dtype():
    return torch.float16 if torch.cuda.is_available() else torch.float32

def get_device():
    return "cuda" if torch.cuda.is_available() else "cpu"

# === Lazy load the diffusion model ===
def get_pipe():
    if not hasattr(get_pipe, "pipe"):
        pipe = DiffusionPipeline.from_pretrained(MODEL_REPO_ID, torch_dtype=get_torch_dtype()).to(get_device())
        get_pipe.pipe = pipe
    return get_pipe.pipe

# === Define custom prompt builder ===
def build_prompt(word):
    return (
        f"Create a powerful, emotionally resonant image that vividly illustrates the meaning of the word '{word}', "
        f"so that even someone who doesn’t speak English can understand it instantly. "
        f"The visual should be sharp, symbolic, and universally relatable. "
        f"Seamlessly weave the word '{word}' into the scene—clearly spelled but not overpowering—"
        f"so it supports the concept without drawing attention away. "
        f"Format: 1080x1080 pixels (square) for Instagram in a (.png) format."
    )

# === Image generation function ===
def generate_image(prompt, negative_prompt, guidance_scale, num_inference_steps, width, height, seed):
    generator = torch.Generator().manual_seed(seed)
    with torch.inference_mode():
        return get_pipe()(
            prompt=prompt,
            negative_prompt=negative_prompt,
            guidance_scale=guidance_scale,
            num_inference_steps=num_inference_steps,
            width=width,
            height=height,
            generator=generator,
        ).images[0]

# === Inference wrapper ===
def infer(
    word,
    negative_prompt,
    seed,
    randomize_seed,
    width,
    height,
    guidance_scale,
    num_inference_steps,
    progress=gr.Progress(track_tqdm=True),
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    prompt = build_prompt(word)
    image = generate_image(prompt, negative_prompt, guidance_scale, num_inference_steps, width, height, seed)
    return image, seed

css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(" # Word-to-Image Generator for Instagram 🎨")

        with gr.Row():
            word = gr.Text(
                label="Vocabulary Word",
                show_label=False,
                max_lines=1,
                placeholder="Enter a vocabulary word",
                container=False,
            )
            run_button = gr.Button("Generate Image", scale=0, variant="primary")

        result = gr.Image(label="Generated Image", show_label=False)

        with gr.Accordion("Advanced Settings", open=False):
            negative_prompt = gr.Text(
                label="Negative prompt",
                max_lines=1,
                placeholder="Enter a negative prompt",
                visible=False,
            )
            seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

            with gr.Row():
                width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1080)
                height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1080)

            with gr.Row():
                guidance_scale = gr.Slider(label="Guidance scale", minimum=0.0, maximum=10.0, step=0.1, value=3.5)
                num_inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=50, step=1, value=4)

    run_button.click(
        fn=infer,
        inputs=[word, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
        outputs=[result, seed],
    )

if __name__ == "__main__":
    demo.launch()