|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import gradio as gr |
|
|
from dotenv import load_dotenv |
|
|
load_dotenv() |
|
|
|
|
|
from model import encode_image, analyze_image_with_query, analyze_query |
|
|
from patient import record_audio, transcription |
|
|
|
|
|
|
|
|
|
|
|
system_prompt="""You are a professional doctor. Given input is the querry of patient. |
|
|
What's in this image (if provided)?. Do you find anything wrong with it medically? |
|
|
Suggest some quick response actions, which can be implemented immediately. Do not add any numbers or special characters in |
|
|
your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person. |
|
|
Donot say 'In the image I see' but say 'With what I see, I think you have ....' |
|
|
Do end the response with the specialist (ex:urologist, cardiologist) the user should consult and it strictly should be the very last word of the response. |
|
|
Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot, |
|
|
Keep your answer concise (max 2 sentences). No preamble, start your answer right away please""" |
|
|
|
|
|
|
|
|
def process_inputs(audio_filepath, image_filepath = None): |
|
|
speech_to_text_output = transcription(GROQ_API_KEY=os.environ.get("GROQ_API_KEY"), |
|
|
audio_filepath=audio_filepath, |
|
|
stt_model="whisper-large-v3") |
|
|
|
|
|
if not image_filepath: |
|
|
doctor_response = analyze_query(query=system_prompt+speech_to_text_output, model="meta-llama/llama-4-scout-17b-16e-instruct") |
|
|
else: |
|
|
doctor_response = analyze_image_with_query(query=system_prompt + speech_to_text_output,encoded_image=encode_image(image_filepath), |
|
|
model="meta-llama/llama-4-scout-17b-16e-instruct") |
|
|
return speech_to_text_output, doctor_response |
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=process_inputs, |
|
|
inputs=[ |
|
|
gr.Audio(sources=["microphone"], type="filepath"), |
|
|
gr.Image(type="filepath") |
|
|
], |
|
|
outputs=[ |
|
|
gr.Textbox(label="Speech to Text"), |
|
|
gr.Textbox(label="Doctor's Response") |
|
|
], |
|
|
title="AI Doctor with Vision and Voice" |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
iface.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |