import gradio as gr import numpy as np import random import torch from diffusers import DiffusionPipeline # === Configuration === MODEL_REPO_ID = "stabilityai/sdxl-turbo" MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 1024 def get_torch_dtype(): return torch.float16 if torch.cuda.is_available() else torch.float32 def get_device(): return "cuda" if torch.cuda.is_available() else "cpu" # === Lazy load the diffusion model === def get_pipe(): if not hasattr(get_pipe, "pipe"): pipe = DiffusionPipeline.from_pretrained(MODEL_REPO_ID, torch_dtype=get_torch_dtype()).to(get_device()) get_pipe.pipe = pipe return get_pipe.pipe # === Define custom prompt builder === def build_prompt(word): return ( f"Create a powerful, emotionally resonant image that vividly illustrates the meaning of the word '{word}', " f"so that even someone who doesn’t speak English can understand it instantly. " f"The visual should be sharp, symbolic, and universally relatable. " f"Seamlessly weave the word '{word}' into the scene—clearly spelled but not overpowering—" f"so it supports the concept without drawing attention away. " f"Format: 1080x1080 pixels (square) for Instagram in a (.png) format." ) # === Image generation function === def generate_image(prompt, negative_prompt, guidance_scale, num_inference_steps, width, height, seed): generator = torch.Generator().manual_seed(seed) with torch.inference_mode(): return get_pipe()( prompt=prompt, negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, width=width, height=height, generator=generator, ).images[0] # === Inference wrapper === def infer( word, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True), ): if randomize_seed: seed = random.randint(0, MAX_SEED) prompt = build_prompt(word) image = generate_image(prompt, negative_prompt, guidance_scale, num_inference_steps, width, height, seed) return image, seed css = """ #col-container { margin: 0 auto; max-width: 640px; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown(" # Word-to-Image Generator for Instagram 🎨") with gr.Row(): word = gr.Text( label="Vocabulary Word", show_label=False, max_lines=1, placeholder="Enter a vocabulary word", container=False, ) run_button = gr.Button("Generate Image", scale=0, variant="primary") result = gr.Image(label="Generated Image", show_label=False) with gr.Accordion("Advanced Settings", open=False): negative_prompt = gr.Text( label="Negative prompt", max_lines=1, placeholder="Enter a negative prompt", visible=False, ) seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) with gr.Row(): width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1080) height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1080) with gr.Row(): guidance_scale = gr.Slider(label="Guidance scale", minimum=0.0, maximum=10.0, step=0.1, value=3.5) num_inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=50, step=1, value=4) run_button.click( fn=infer, inputs=[word, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps], outputs=[result, seed], ) if __name__ == "__main__": demo.launch()