Spaces:

Agents-X
/

PyVision

Running

App Files Files Community

stzhao commited on May 15

Commit

c9bd92b

verified ·

1 Parent(s): ec4ab32

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -7

app.py CHANGED Viewed

@@ -15,23 +15,40 @@ import markdown
 def encode_image(image):
     """
-    将PIL.Image对象或图像文件路径转换为base64编码字符串
     参数:
         image: 可以是PIL.Image对象或图像文件路径
     返回:
-        base64编码的字符串
     """
     if isinstance(image, str):
         # 处理文件路径的情况
         with open(image, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode('utf-8')
     else:
         # 处理PIL.Image对象的情况
         buffered = BytesIO()
         image.save(buffered, format='PNG')
-        return base64.b64encode(buffered.getvalue()).decode('utf-8')
 def excute_codes(codes, messages, executor: PythonExecutor):
     no_code_idx = []
@@ -47,13 +64,16 @@ def excute_codes(codes, messages, executor: PythonExecutor):
 def process_prompt_init(question, image, prompt_template, prompt_type):
     prompt_prefix = prompt_template[prompt_type]
-    image_base64 = encode_image(image)
     question_with_options = question
     messages = [
         {
             "role": "user",
-            "content": [{"type": "text", "text": "<image_clue_0>"}] + [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}] + [{"type": "text", "text": "</image_clue_0>\n\n"}] + [{"type": "text", "text": prompt_prefix.format(query=question_with_options)}]
         }
     ]
@@ -250,7 +270,7 @@ def o3_chat(model_name, api_key, base_url, question, image):
     # executor = SharedRuntimeExecutor(var_whitelist="RETAIN_ALL_VARS")
     prompt_template = json.load(open("./prompt_template_vis.json", "r", encoding="utf-8"))
-    prompt_type = 'vistool'
     data = {
         "question": question,

 def encode_image(image):
     """
+    将PIL.Image对象或图像文件路径转换为base64编码字符串，并获取分辨率信息
     参数:
         image: 可以是PIL.Image对象或图像文件路径
     返回:
+        包含以下键的字典:
+        - 'base64': base64编码的字符串
+        - 'width': 图片宽度(像素)
+        - 'height': 图片高度(像素)
+        - 'resolution': 字符串形式的"宽度x高度"
     """
+    img_obj = None
     if isinstance(image, str):
         # 处理文件路径的情况
+        img_obj = Image.open(image)
         with open(image, "rb") as image_file:
+            base64_str = base64.b64encode(image_file.read()).decode('utf-8')
     else:
         # 处理PIL.Image对象的情况
+        img_obj = image
         buffered = BytesIO()
         image.save(buffered, format='PNG')
+        base64_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
+    # 获取分辨率信息
+    width, height = img_obj.size
+    return {
+        'base64': base64_str,
+        'width': width,
+        'height': height
+    }
 def excute_codes(codes, messages, executor: PythonExecutor):
     no_code_idx = []
 def process_prompt_init(question, image, prompt_template, prompt_type):
     prompt_prefix = prompt_template[prompt_type]
+    img_result = encode_image(image)
+    image_base64 = img_result['base64']
+    width = img_result['width']
+    height = img_result['height']
     question_with_options = question
     messages = [
         {
             "role": "user",
+            "content": [{"type": "text", "text": "<image_clue_0>"}] + [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}] + [{"type": "text", "text": "</image_clue_0>\n\n"}] + [{"type": "text", "text": prompt_prefix.format(query=question_with_options, width=str(width), height=str(height))}]
         }
     ]
     # executor = SharedRuntimeExecutor(var_whitelist="RETAIN_ALL_VARS")
     prompt_template = json.load(open("./prompt_template_vis.json", "r", encoding="utf-8"))
+    prompt_type = 'vistool_with_img_info'
     data = {
         "question": question,