Spaces:

Simonlob
/

Esse_transcrib_AiAcademy

Sleeping

App Files Files Community

Esse_transcrib_AiAcademy / app.py

Simonlob

Update app.py

393dac0 verified 11 months ago

raw

history blame contribute delete

9.23 kB

	import gradio as gr
	from dotenv import load_dotenv
	import requests
	import os
	from datetime import datetime
	import numpy as np
	import librosa
	import scipy.io.wavfile as wavfile


	load_dotenv()
	hf_token = os.getenv('HUGGINGFACE_API_TOKEN')


	API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
	HEADERS = {"Authorization": f'Bearer {hf_token}'}

	def query(filename):
	print(filename)
	with open(filename, "rb") as f:
	data = f.read()
	response = requests.post(API_URL, headers=HEADERS, data=data)
	output = response.json()
	return output['text']


	def validate_phone(phone):
	"""Validate phone number format"""
	return bool(phone and phone.isdigit() and len(phone) >= 9)

	def validate_score(score):
	"""Validate math score"""
	try:
	score = int(score)
	return 0 <= score <= 80
	except:
	return False

	def create_audio_file(audio:tuple, file_name:str)->None:
	target_sr=16000
	arr = audio[1]
	sr = audio[0]
	arr = (arr / abs(arr).max()).astype(np.float32)
	resampled_audio = librosa.resample(y=arr, orig_sr=sr, target_sr=target_sr)
	wavfile.write(file_name, target_sr, resampled_audio)
	return file_name



	def process_inputs(phone, math_score, essay_audio, examiner_audio=None):
	"""Process all inputs and create formatted text file"""

	if not validate_phone(phone):
	return {"error": "Invalid phone number format"}, None

	if not validate_score(math_score):
	return {"error": "Math score must be between 0 and 80"}, None
	phone = phone[-9:]
	try:
	essay_text = query(create_audio_file(essay_audio, 'essay.wav')) if essay_audio else ""
	if len(essay_text)<1:
	raise ValueError('Essay MUST BE')

	examiner_text = query(create_audio_file(examiner_audio, 'comment.wav')) if examiner_audio else ""

	output_text = f"""num tel: {phone}
	math score: {math_score}
	замечание экзаменатора: {examiner_text}

	Esse:

	{essay_text}"""

	# Create filename with phone and score
	filename = f"{phone}_{math_score}.txt"

	# Save to file
	with open(filename, "w", encoding="utf-8") as f:
	f.write(output_text)

	return output_text, filename

	except Exception as e:
	return {"error": f"Processing error: {str(e)}"}, None

	# Create Gradio interface
	iface = gr.Interface(
	fn=process_inputs,
	inputs=[
	gr.Textbox(label="Номер телефона (только цифры)", placeholder="Введите номер телефона"),
	gr.Number(label="Балл по математике (0-80)", minimum=0, maximum=80),
	gr.Audio(sources="microphone", type="numpy", label="Голосовой ввод эссе"),
	gr.Audio(sources="microphone", type="numpy", label="Голосовой ввод комментария экзаменатора (необязательно)")
	],
	outputs=[
	gr.Textbox(label="Предпросмотр содержимого файла"),
	gr.File(label="Скачать файл")
	],
	title="Система записи эссе",
	description="Запишите эссе голосом, добавьте комментарии экзаменатора (при наличии) и заполните обязательные поля",
	examples=[
	["700123456", "75", None, None],
	["0555789012", "45", None, None]
	]
	)

	if __name__ == "__main__":
	iface.launch()







	# import gradio as gr
	# import numpy as np
	# import librosa
	# import scipy.io.wavfile as wavfile
	# import os


	# def process_audio(audio):
	# target_sr=16000
	# arr = audio[1]
	# sr = audio[0]
	# arr = (arr / abs(arr).max()).astype(np.float32)
	# resampled_audio = librosa.resample(y=arr, orig_sr=sr, target_sr=target_sr)
	# wavfile.write('input.wav', target_sr, resampled_audio)
	# return f'DIR: {os.listdir()}'


	# stt_interface = gr.Interface(
	# fn=process_audio,
	# inputs=gr.Audio(sources="microphone", type="numpy"),
	# outputs=["text"],
	# description="Speech-to-Text and Emotion Analysis: Speak into your microphone."
	# )


	# demo = gr.TabbedInterface(
	# interface_list=[stt_interface],
	# tab_names=["Speech-to-Text"]
	# )

	# demo.launch(share=True)







	# import gradio as gr
	# from dotenv import load_dotenv
	# import requests
	# import os
	# from datetime import datetime

	# load_dotenv()
	# hf_token = os.getenv('HUGGINGFACE_API_TOKEN')


	# API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
	# HEADERS = {"Authorization": f'Bearer {hf_token}'}

	# def query(filename):
	# print(filename)
	# with open(filename, "rb") as f:
	# data = f.read()
	# response = requests.post(API_URL, headers=HEADERS, data=data)
	# output = response.json()
	# return output['text']


	# def validate_phone(phone):
	# """Validate phone number format"""
	# return bool(phone and phone.isdigit() and len(phone) >= 9)

	# def validate_score(score):
	# """Validate math score"""
	# try:
	# score = int(score)
	# return 0 <= score <= 80
	# except:
	# return False

	# def process_inputs(phone, math_score, essay_audio, examiner_audio=None):
	# """Process all inputs and create formatted text file"""

	# if not validate_phone(phone):
	# return {"error": "Invalid phone number format"}, None

	# if not validate_score(math_score):
	# return {"error": "Math score must be between 0 and 80"}, None
	# phone = phone[-9:]
	# try:
	# # Transcribe essay audio
	# essay_text = query(essay_audio) if essay_audio else ""
	# if len(essay_text)<1:
	# raise ValueError('Essay MUST BE')

	# # Transcribe examiner comments if provided
	# examiner_text = query(examiner_audio) if examiner_audio else ""

	# # Format output text
	# output_text = f"""num tel: {phone}
	# math score: {math_score}
	# замечание экзаменатора: {examiner_text}

	# Esse:

	# {essay_text}"""

	# # Create filename with phone and score
	# filename = f"{phone}_{math_score}.txt"

	# # Save to file
	# with open(filename, "w", encoding="utf-8") as f:
	# f.write(output_text)

	# return output_text, filename

	# except Exception as e:
	# return {"error": f"Processing error: {str(e)}"}, None

	# # Create Gradio interface
	# iface = gr.Interface(
	# fn=process_inputs,
	# inputs=[
	# gr.Textbox(label="Номер телефона (только цифры)", placeholder="Введите номер телефона"),
	# gr.Number(label="Балл по математике (0-80)", minimum=0, maximum=80),
	# gr.Audio(type="filepath", label="Голосовой ввод эссе"),
	# gr.Audio(type="filepath", label="Голосовой ввод комментария экзаменатора (необязательно)")
	# ],
	# outputs=[
	# gr.Textbox(label="Предпросмотр содержимого файла"),
	# gr.File(label="Скачать файл")
	# ],
	# title="Система записи эссе",
	# description="Запишите эссе голосом, добавьте комментарии экзаменатора (при наличии) и заполните обязательные поля",
	# examples=[
	# ["700123456", "75", None, None],
	# ["0555789012", "45", None, None]
	# ]
	# )

	# if __name__ == "__main__":
	# iface.launch()















	# import gradio as gr
	# import numpy as np
	# import librosa
	# from scipy.io import wavfile
	# import os
	# from dotenv import load_dotenv
	# import requests
	# from datetime import datetime


	# load_dotenv()
	# hf_token = os.getenv('HUGGINGFACE_API_TOKEN')


	# API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
	# HEADERS = {"Authorization": f'Bearer {hf_token}'}

	# def query(filename):
	# print(filename)
	# with open(filename, "rb") as f:
	# data = f.read()
	# response = requests.post(API_URL, headers=HEADERS, data=data)
	# output = response.json()
	# return output['text']

	# chunks = []

	# def process_chunk(audio):
	# if audio is None:
	# if chunks:
	# final_audio = np.concatenate(chunks)
	# filename = f"recorded_audio_{len(os.listdir())}.wav"
	# wavfile.write(filename, 16000, final_audio)
	# chunks.clear()
	# return query(filename)

	# return "No audio"

	# sr, audio_data = audio
	# audio_data = (audio_data / abs(audio_data).max()).astype(np.float32)
	# resampled_audio = librosa.resample(y=audio_data, orig_sr=sr, target_sr=16000)

	# chunks.append(resampled_audio)
	# return f"Recording... Chunks collected: {len(chunks)}"

	# demo = gr.Interface(
	# fn=process_chunk,
	# inputs=gr.Audio(sources="microphone", type="numpy", streaming=True),
	# outputs="text",
	# live=True
	# )

	# if __name__ == "__main__":
	# demo.launch()