Spaces:

Kilos1
/

Nutrition_App

Runtime error

App Files Files Community

Kilos1 commited on Mar 9

Commit

539d19c

verified ·

1 Parent(s): 331693b

Update multimodal_queries.py

Browse files

Files changed (1) hide show

multimodal_queries.py +20 -31

multimodal_queries.py CHANGED Viewed

@@ -1,18 +1,21 @@
-import re
-import base64
-import io
 import torch
 import gradio as gr
 from PIL import Image
-from transformers import MllamaForConditionalGeneration, AutoProcessor
 # Load the model and processor
-model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
-model = MllamaForConditionalGeneration.from_pretrained(
     model_id,
     torch_dtype=torch.bfloat16,
-    device_map="auto",
-)
 processor = AutoProcessor.from_pretrained(model_id)
 def generate_model_response(image_file, user_query):
@@ -24,49 +27,35 @@ def generate_model_response(image_file, user_query):
     - user_query: The user's question about the image.
     Returns:
-    - str: The generated response from the model, formatted as HTML.
     """
     try:
         # Load and prepare the image
         raw_image = Image.open(image_file).convert("RGB")
-        # Prepare input for the model using the processor
-        conversation = [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image", "url": "<|image|>"},  # Placeholder for image
-                    {"type": "text", "text": user_query}
-                ]
-            }
-        ]
-        # Apply chat template to prepare inputs for the model
-        inputs = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
-        # Process the image and text inputs together
-        inputs = processor(inputs, raw_image, return_tensors="pt").to(model.device)
         # Generate response from the model
         outputs = model.generate(**inputs)
-        # Decode and format the response
-        generated_text = processor.decode(outputs[0], skip_special_tokens=True)
-        return generated_text
     except Exception as e:
         print(f"Error in generating response: {e}")
-        return f"<p>An error occurred: {str(e)}</p>"
 # Gradio Interface
 iface = gr.Interface(
     fn=generate_model_response,
     inputs=[
         gr.Image(type="file", label="Upload Image"),
-        gr.Textbox(label="Enter your question", placeholder="How many calories are in this food?")
     ],
-    outputs=gr.HTML(label="Response from Model"),
 )
 iface.launch(share=True)

 import torch
 import gradio as gr
 from PIL import Image
+from transformers import AutoProcessor, AutoModel
 # Load the model and processor
+model_id = "OpenGVLab/InternVL2_5-78B"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Initialize the model and processor
+model = AutoModel.from_pretrained(
     model_id,
     torch_dtype=torch.bfloat16,
+    low_cpu_mem_usage=True,
+    use_flash_attn=True,
+    trust_remote_code=True
+).eval().to(device)
 processor = AutoProcessor.from_pretrained(model_id)
 def generate_model_response(image_file, user_query):
     - user_query: The user's question about the image.
     Returns:
+    - str: The generated response from the model.
     """
     try:
         # Load and prepare the image
         raw_image = Image.open(image_file).convert("RGB")
+        # Prepare inputs for the model using the processor
+        inputs = processor(images=raw_image, text=user_query, return_tensors="pt").to(device)
         # Generate response from the model
         outputs = model.generate(**inputs)
+        # Decode and return the response
+        response_text = processor.decode(outputs[0], skip_special_tokens=True)
+        return response_text
     except Exception as e:
         print(f"Error in generating response: {e}")
+        return f"An error occurred: {str(e)}"
 # Gradio Interface
 iface = gr.Interface(
     fn=generate_model_response,
     inputs=[
         gr.Image(type="file", label="Upload Image"),
+        gr.Textbox(label="Enter your question", placeholder="What do you want to know about this image?")
     ],
+    outputs="text",
 )
 iface.launch(share=True)