Simonlob's picture
Update app.py
393dac0 verified
import gradio as gr
from dotenv import load_dotenv
import requests
import os
from datetime import datetime
import numpy as np
import librosa
import scipy.io.wavfile as wavfile
load_dotenv()
hf_token = os.getenv('HUGGINGFACE_API_TOKEN')
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
HEADERS = {"Authorization": f'Bearer {hf_token}'}
def query(filename):
print(filename)
with open(filename, "rb") as f:
data = f.read()
response = requests.post(API_URL, headers=HEADERS, data=data)
output = response.json()
return output['text']
def validate_phone(phone):
"""Validate phone number format"""
return bool(phone and phone.isdigit() and len(phone) >= 9)
def validate_score(score):
"""Validate math score"""
try:
score = int(score)
return 0 <= score <= 80
except:
return False
def create_audio_file(audio:tuple, file_name:str)->None:
target_sr=16000
arr = audio[1]
sr = audio[0]
arr = (arr / abs(arr).max()).astype(np.float32)
resampled_audio = librosa.resample(y=arr, orig_sr=sr, target_sr=target_sr)
wavfile.write(file_name, target_sr, resampled_audio)
return file_name
def process_inputs(phone, math_score, essay_audio, examiner_audio=None):
"""Process all inputs and create formatted text file"""
if not validate_phone(phone):
return {"error": "Invalid phone number format"}, None
if not validate_score(math_score):
return {"error": "Math score must be between 0 and 80"}, None
phone = phone[-9:]
try:
essay_text = query(create_audio_file(essay_audio, 'essay.wav')) if essay_audio else ""
if len(essay_text)<1:
raise ValueError('Essay MUST BE')
examiner_text = query(create_audio_file(examiner_audio, 'comment.wav')) if examiner_audio else ""
output_text = f"""num tel: {phone}
math score: {math_score}
замечание экзаменатора: {examiner_text}
Esse:
{essay_text}"""
# Create filename with phone and score
filename = f"{phone}_{math_score}.txt"
# Save to file
with open(filename, "w", encoding="utf-8") as f:
f.write(output_text)
return output_text, filename
except Exception as e:
return {"error": f"Processing error: {str(e)}"}, None
# Create Gradio interface
iface = gr.Interface(
fn=process_inputs,
inputs=[
gr.Textbox(label="Номер телефона (только цифры)", placeholder="Введите номер телефона"),
gr.Number(label="Балл по математике (0-80)", minimum=0, maximum=80),
gr.Audio(sources="microphone", type="numpy", label="Голосовой ввод эссе"),
gr.Audio(sources="microphone", type="numpy", label="Голосовой ввод комментария экзаменатора (необязательно)")
],
outputs=[
gr.Textbox(label="Предпросмотр содержимого файла"),
gr.File(label="Скачать файл")
],
title="Система записи эссе",
description="Запишите эссе голосом, добавьте комментарии экзаменатора (при наличии) и заполните обязательные поля",
examples=[
["700123456", "75", None, None],
["0555789012", "45", None, None]
]
)
if __name__ == "__main__":
iface.launch()
# import gradio as gr
# import numpy as np
# import librosa
# import scipy.io.wavfile as wavfile
# import os
# def process_audio(audio):
# target_sr=16000
# arr = audio[1]
# sr = audio[0]
# arr = (arr / abs(arr).max()).astype(np.float32)
# resampled_audio = librosa.resample(y=arr, orig_sr=sr, target_sr=target_sr)
# wavfile.write('input.wav', target_sr, resampled_audio)
# return f'DIR: {os.listdir()}'
# stt_interface = gr.Interface(
# fn=process_audio,
# inputs=gr.Audio(sources="microphone", type="numpy"),
# outputs=["text"],
# description="Speech-to-Text and Emotion Analysis: Speak into your microphone."
# )
# demo = gr.TabbedInterface(
# interface_list=[stt_interface],
# tab_names=["Speech-to-Text"]
# )
# demo.launch(share=True)
# import gradio as gr
# from dotenv import load_dotenv
# import requests
# import os
# from datetime import datetime
# load_dotenv()
# hf_token = os.getenv('HUGGINGFACE_API_TOKEN')
# API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
# HEADERS = {"Authorization": f'Bearer {hf_token}'}
# def query(filename):
# print(filename)
# with open(filename, "rb") as f:
# data = f.read()
# response = requests.post(API_URL, headers=HEADERS, data=data)
# output = response.json()
# return output['text']
# def validate_phone(phone):
# """Validate phone number format"""
# return bool(phone and phone.isdigit() and len(phone) >= 9)
# def validate_score(score):
# """Validate math score"""
# try:
# score = int(score)
# return 0 <= score <= 80
# except:
# return False
# def process_inputs(phone, math_score, essay_audio, examiner_audio=None):
# """Process all inputs and create formatted text file"""
# if not validate_phone(phone):
# return {"error": "Invalid phone number format"}, None
# if not validate_score(math_score):
# return {"error": "Math score must be between 0 and 80"}, None
# phone = phone[-9:]
# try:
# # Transcribe essay audio
# essay_text = query(essay_audio) if essay_audio else ""
# if len(essay_text)<1:
# raise ValueError('Essay MUST BE')
# # Transcribe examiner comments if provided
# examiner_text = query(examiner_audio) if examiner_audio else ""
# # Format output text
# output_text = f"""num tel: {phone}
# math score: {math_score}
# замечание экзаменатора: {examiner_text}
# Esse:
# {essay_text}"""
# # Create filename with phone and score
# filename = f"{phone}_{math_score}.txt"
# # Save to file
# with open(filename, "w", encoding="utf-8") as f:
# f.write(output_text)
# return output_text, filename
# except Exception as e:
# return {"error": f"Processing error: {str(e)}"}, None
# # Create Gradio interface
# iface = gr.Interface(
# fn=process_inputs,
# inputs=[
# gr.Textbox(label="Номер телефона (только цифры)", placeholder="Введите номер телефона"),
# gr.Number(label="Балл по математике (0-80)", minimum=0, maximum=80),
# gr.Audio(type="filepath", label="Голосовой ввод эссе"),
# gr.Audio(type="filepath", label="Голосовой ввод комментария экзаменатора (необязательно)")
# ],
# outputs=[
# gr.Textbox(label="Предпросмотр содержимого файла"),
# gr.File(label="Скачать файл")
# ],
# title="Система записи эссе",
# description="Запишите эссе голосом, добавьте комментарии экзаменатора (при наличии) и заполните обязательные поля",
# examples=[
# ["700123456", "75", None, None],
# ["0555789012", "45", None, None]
# ]
# )
# if __name__ == "__main__":
# iface.launch()
# import gradio as gr
# import numpy as np
# import librosa
# from scipy.io import wavfile
# import os
# from dotenv import load_dotenv
# import requests
# from datetime import datetime
# load_dotenv()
# hf_token = os.getenv('HUGGINGFACE_API_TOKEN')
# API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
# HEADERS = {"Authorization": f'Bearer {hf_token}'}
# def query(filename):
# print(filename)
# with open(filename, "rb") as f:
# data = f.read()
# response = requests.post(API_URL, headers=HEADERS, data=data)
# output = response.json()
# return output['text']
# chunks = []
# def process_chunk(audio):
# if audio is None:
# if chunks:
# final_audio = np.concatenate(chunks)
# filename = f"recorded_audio_{len(os.listdir())}.wav"
# wavfile.write(filename, 16000, final_audio)
# chunks.clear()
# return query(filename)
# return "No audio"
# sr, audio_data = audio
# audio_data = (audio_data / abs(audio_data).max()).astype(np.float32)
# resampled_audio = librosa.resample(y=audio_data, orig_sr=sr, target_sr=16000)
# chunks.append(resampled_audio)
# return f"Recording... Chunks collected: {len(chunks)}"
# demo = gr.Interface(
# fn=process_chunk,
# inputs=gr.Audio(sources="microphone", type="numpy", streaming=True),
# outputs="text",
# live=True
# )
# if __name__ == "__main__":
# demo.launch()