File size: 4,721 Bytes
58ad1a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656bf12
58ad1a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373ba70
58ad1a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import gradio as gr
from PIL import Image
from typing import Tuple, Optional
import os
import shutil

# Import the core logic functions
from models import generate_remixed_image

# --- Setup Dummy Examples for immediate runnability ---
EXAMPLE_DIR = "examples"
if not os.path.exists(EXAMPLE_DIR):
    os.makedirs(EXAMPLE_DIR)

# Create dummy images for examples if they don't exist
def create_dummy_image(filename, color):
    path = os.path.join(EXAMPLE_DIR, filename)
    if not os.path.exists(path):
        dummy_img = Image.new('RGB', (100, 100), color=color)
        dummy_img.save(path)

# Ensure example files exist (using descriptive names for the demo)
create_dummy_image("eiffel_tower.jpg", "blue")
create_dummy_image("sunset.jpg", "orange")
create_dummy_image("painting.jpg", "green")

# --- Component Definitions ---

# Note: The component type should be "filepath" for external API interaction 
# to ensure we pass a local file path.
IMAGE_INPUT_PROPS = {
    "type": "filepath",
    "image_mode": "RGB",
    "height": 250,
    "label": "Drag Image Here",
    "sources": ["upload"],
    "interactive": True
}

# --- Interface Function Wrapper ---

def remixer_wrapper(
    model_choice: str,
    prompt: str,
    img1_path: Optional[str],
    img2_path: Optional[str],
    img3_path: Optional[str],
    progress: gr.Progress = gr.Progress()
) -> Tuple[str, Image.Image]:
    """
    Wrapper function to handle Gradio inputs and call the main generation logic.
    """
    if not prompt:
        raise gr.Error("Please provide a creative prompt.")
        
    progress(0.1, desc=f"Analyzing inputs using {model_choice}...")
    
    # We pass the paths directly, the model logic handles validation and API interaction
    result_prompt, result_image = generate_remixed_image(
        model_choice,
        prompt,
        img1_path,
        img2_path,
        img3_path
    )
    
    progress(1.0, desc="Generation complete.")
    
    return result_prompt, result_image

# --- Gradio Blocks Layout ---

with gr.Blocks(title="Image Remixer AI", theme=gr.themes.Soft()) as demo:
    gr.HTML("""
        <div style="text-align: center; max-width: 900px; margin: 0 auto; padding: 20px;">
            <h1 style="font-size: 2.5em; margin-bottom: 10px;">🎨 Multimodal Image Remixer</h1>
            <p style="margin-bottom: 20px;">Upload 3 images and provide a prompt to fuse them into a new creative result using advanced AI models.</p>
            <p>Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: blue;">anycoder</a></p>
        </div>
    """)

    with gr.Row():
        image_input_1 = gr.Image(**IMAGE_INPUT_PROPS)
        image_input_2 = gr.Image(**IMAGE_INPUT_PROPS)
        image_input_3 = gr.Image(**IMAGE_INPUT_PROPS)
        
    prompt_input = gr.Textbox(
        label="Creative Prompt/Instructions",
        placeholder="Describe the desired fusion style, mood, and content (e.g., 'A dramatic digital painting, blending these objects into a high-tech cityscape').",
        lines=2
    )

    with gr.Row():
        model_selector = gr.Radio(
            choices=["gemini-2", "gpt image-1"],
            value="gpt image-1",
            label="Analysis Model (Used to create a detailed prompt for DALL-E 3 Generation)",
            info="Choose the underlying multimodal model for creative analysis: Gemini 2.5 Flash Live ('gemini-2') or GPT-4o ('gpt image-1').",
            scale=1
        )
        submit_btn = gr.Button("Remix Images (Requires API Keys)", variant="primary", scale=1)

    with gr.Column():
        generated_prompt_output = gr.Textbox(label="AI Generated DALL-E Prompt", interactive=False)
        remixed_image_output = gr.Image(label="Remixed Output Image (DALL-E 3)", interactive=False)

    
    # --- Event Binding ---
    submit_btn.click(
        fn=remixer_wrapper,
        inputs=[
            model_selector,
            prompt_input,
            image_input_1,
            image_input_2,
            image_input_3
        ],
        outputs=[generated_prompt_output, remixed_image_output],
    )

    # --- Examples ---
    gr.Examples(
        examples=[
            [
                "A hyper-realistic oil painting of a mythical creature created by combining the style and features of the inputs.",
                "gpt image-1",
                os.path.join(EXAMPLE_DIR, "eiffel_tower.jpg"),
                os.path.join(EXAMPLE_DIR, "sunset.jpg"),
                os.path.join(EXAMPLE_DIR, "painting.jpg"),
            ]
        ],
        inputs=[prompt_input, model_selector, image_input_1, image_input_2, image_input_3],
        cache_examples=False,
    )

if __name__ == "__main__":
    demo.launch()