Spaces:

RoneyBABA
/

MajorProj

Running

App Files Files Community

MajorProj / app.py

RoneyBABA

Update app.py

34909d9 verified 6 days ago

raw

history blame contribute delete

2.46 kB

	# if you dont use pipenv uncomment the following:
	# from dotenv import load_dotenv
	# load_dotenv()

	#VoiceBot UI with Gradio
	import os
	import gradio as gr
	from dotenv import load_dotenv
	load_dotenv()

	from model import encode_image, analyze_image_with_query, analyze_query
	from patient import record_audio, transcription

	#load_dotenv()

	system_prompt="""You are a professional doctor. Given input is the querry of patient.
	What's in this image (if provided)?. Do you find anything wrong with it medically?
	Suggest some quick response actions, which can be implemented immediately. Do not add any numbers or special characters in
	your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
	Donot say 'In the image I see' but say 'With what I see, I think you have ....'
	Do end the response with the specialist (ex:urologist, cardiologist) the user should consult and it strictly should be the very last word of the response.
	Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
	Keep your answer concise (max 2 sentences). No preamble, start your answer right away please"""


	def process_inputs(audio_filepath, image_filepath = None):
	speech_to_text_output = transcription(GROQ_API_KEY=os.environ.get("GROQ_API_KEY"),
	audio_filepath=audio_filepath,
	stt_model="whisper-large-v3")

	if not image_filepath:
	doctor_response = analyze_query(query=system_prompt+speech_to_text_output, model="meta-llama/llama-4-scout-17b-16e-instruct")
	else:
	doctor_response = analyze_image_with_query(query=system_prompt + speech_to_text_output,encoded_image=encode_image(image_filepath),
	model="meta-llama/llama-4-scout-17b-16e-instruct")
	return speech_to_text_output, doctor_response


	# Create the interface
	iface = gr.Interface(
	fn=process_inputs,
	inputs=[
	gr.Audio(sources=["microphone"], type="filepath"),
	gr.Image(type="filepath")
	],
	outputs=[
	gr.Textbox(label="Speech to Text"),
	gr.Textbox(label="Doctor's Response")
	],
	title="AI Doctor with Vision and Voice"
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))