Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,23 +15,40 @@ import markdown
|
|
| 15 |
|
| 16 |
def encode_image(image):
|
| 17 |
"""
|
| 18 |
-
将PIL.Image对象或图像文件路径转换为base64
|
| 19 |
|
| 20 |
参数:
|
| 21 |
image: 可以是PIL.Image对象或图像文件路径
|
| 22 |
|
| 23 |
返回:
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"""
|
|
|
|
|
|
|
| 26 |
if isinstance(image, str):
|
| 27 |
# 处理文件路径的情况
|
|
|
|
| 28 |
with open(image, "rb") as image_file:
|
| 29 |
-
|
| 30 |
else:
|
| 31 |
# 处理PIL.Image对象的情况
|
|
|
|
| 32 |
buffered = BytesIO()
|
| 33 |
image.save(buffered, format='PNG')
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
def excute_codes(codes, messages, executor: PythonExecutor):
|
| 37 |
no_code_idx = []
|
|
@@ -47,13 +64,16 @@ def excute_codes(codes, messages, executor: PythonExecutor):
|
|
| 47 |
def process_prompt_init(question, image, prompt_template, prompt_type):
|
| 48 |
prompt_prefix = prompt_template[prompt_type]
|
| 49 |
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
| 51 |
question_with_options = question
|
| 52 |
|
| 53 |
messages = [
|
| 54 |
{
|
| 55 |
"role": "user",
|
| 56 |
-
"content": [{"type": "text", "text": "<image_clue_0>"}] + [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}] + [{"type": "text", "text": "</image_clue_0>\n\n"}] + [{"type": "text", "text": prompt_prefix.format(query=question_with_options)}]
|
| 57 |
}
|
| 58 |
]
|
| 59 |
|
|
@@ -250,7 +270,7 @@ def o3_chat(model_name, api_key, base_url, question, image):
|
|
| 250 |
# executor = SharedRuntimeExecutor(var_whitelist="RETAIN_ALL_VARS")
|
| 251 |
|
| 252 |
prompt_template = json.load(open("./prompt_template_vis.json", "r", encoding="utf-8"))
|
| 253 |
-
prompt_type = '
|
| 254 |
|
| 255 |
data = {
|
| 256 |
"question": question,
|
|
|
|
| 15 |
|
| 16 |
def encode_image(image):
|
| 17 |
"""
|
| 18 |
+
将PIL.Image对象或图像文件路径转换为base64编码字符串,并获取分辨率信息
|
| 19 |
|
| 20 |
参数:
|
| 21 |
image: 可以是PIL.Image对象或图像文件路径
|
| 22 |
|
| 23 |
返回:
|
| 24 |
+
包含以下键的字典:
|
| 25 |
+
- 'base64': base64编码的字符串
|
| 26 |
+
- 'width': 图片宽度(像素)
|
| 27 |
+
- 'height': 图片高度(像素)
|
| 28 |
+
- 'resolution': 字符串形式的"宽度x高度"
|
| 29 |
"""
|
| 30 |
+
img_obj = None
|
| 31 |
+
|
| 32 |
if isinstance(image, str):
|
| 33 |
# 处理文件路径的情况
|
| 34 |
+
img_obj = Image.open(image)
|
| 35 |
with open(image, "rb") as image_file:
|
| 36 |
+
base64_str = base64.b64encode(image_file.read()).decode('utf-8')
|
| 37 |
else:
|
| 38 |
# 处理PIL.Image对象的情况
|
| 39 |
+
img_obj = image
|
| 40 |
buffered = BytesIO()
|
| 41 |
image.save(buffered, format='PNG')
|
| 42 |
+
base64_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
| 43 |
+
|
| 44 |
+
# 获取分辨率信息
|
| 45 |
+
width, height = img_obj.size
|
| 46 |
+
|
| 47 |
+
return {
|
| 48 |
+
'base64': base64_str,
|
| 49 |
+
'width': width,
|
| 50 |
+
'height': height
|
| 51 |
+
}
|
| 52 |
|
| 53 |
def excute_codes(codes, messages, executor: PythonExecutor):
|
| 54 |
no_code_idx = []
|
|
|
|
| 64 |
def process_prompt_init(question, image, prompt_template, prompt_type):
|
| 65 |
prompt_prefix = prompt_template[prompt_type]
|
| 66 |
|
| 67 |
+
img_result = encode_image(image)
|
| 68 |
+
image_base64 = img_result['base64']
|
| 69 |
+
width = img_result['width']
|
| 70 |
+
height = img_result['height']
|
| 71 |
question_with_options = question
|
| 72 |
|
| 73 |
messages = [
|
| 74 |
{
|
| 75 |
"role": "user",
|
| 76 |
+
"content": [{"type": "text", "text": "<image_clue_0>"}] + [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}] + [{"type": "text", "text": "</image_clue_0>\n\n"}] + [{"type": "text", "text": prompt_prefix.format(query=question_with_options, width=str(width), height=str(height))}]
|
| 77 |
}
|
| 78 |
]
|
| 79 |
|
|
|
|
| 270 |
# executor = SharedRuntimeExecutor(var_whitelist="RETAIN_ALL_VARS")
|
| 271 |
|
| 272 |
prompt_template = json.load(open("./prompt_template_vis.json", "r", encoding="utf-8"))
|
| 273 |
+
prompt_type = 'vistool_with_img_info'
|
| 274 |
|
| 275 |
data = {
|
| 276 |
"question": question,
|