Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from dotenv import load_dotenv | |
| import requests | |
| import os | |
| from datetime import datetime | |
| import numpy as np | |
| import librosa | |
| import scipy.io.wavfile as wavfile | |
| load_dotenv() | |
| hf_token = os.getenv('HUGGINGFACE_API_TOKEN') | |
| API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo" | |
| HEADERS = {"Authorization": f'Bearer {hf_token}'} | |
| def query(filename): | |
| print(filename) | |
| with open(filename, "rb") as f: | |
| data = f.read() | |
| response = requests.post(API_URL, headers=HEADERS, data=data) | |
| output = response.json() | |
| return output['text'] | |
| def validate_phone(phone): | |
| """Validate phone number format""" | |
| return bool(phone and phone.isdigit() and len(phone) >= 9) | |
| def validate_score(score): | |
| """Validate math score""" | |
| try: | |
| score = int(score) | |
| return 0 <= score <= 80 | |
| except: | |
| return False | |
| def create_audio_file(audio:tuple, file_name:str)->None: | |
| target_sr=16000 | |
| arr = audio[1] | |
| sr = audio[0] | |
| arr = (arr / abs(arr).max()).astype(np.float32) | |
| resampled_audio = librosa.resample(y=arr, orig_sr=sr, target_sr=target_sr) | |
| wavfile.write(file_name, target_sr, resampled_audio) | |
| return file_name | |
| def process_inputs(phone, math_score, essay_audio, examiner_audio=None): | |
| """Process all inputs and create formatted text file""" | |
| if not validate_phone(phone): | |
| return {"error": "Invalid phone number format"}, None | |
| if not validate_score(math_score): | |
| return {"error": "Math score must be between 0 and 80"}, None | |
| phone = phone[-9:] | |
| try: | |
| essay_text = query(create_audio_file(essay_audio, 'essay.wav')) if essay_audio else "" | |
| if len(essay_text)<1: | |
| raise ValueError('Essay MUST BE') | |
| examiner_text = query(create_audio_file(examiner_audio, 'comment.wav')) if examiner_audio else "" | |
| output_text = f"""num tel: {phone} | |
| math score: {math_score} | |
| замечание экзаменатора: {examiner_text} | |
| Esse: | |
| {essay_text}""" | |
| # Create filename with phone and score | |
| filename = f"{phone}_{math_score}.txt" | |
| # Save to file | |
| with open(filename, "w", encoding="utf-8") as f: | |
| f.write(output_text) | |
| return output_text, filename | |
| except Exception as e: | |
| return {"error": f"Processing error: {str(e)}"}, None | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=process_inputs, | |
| inputs=[ | |
| gr.Textbox(label="Номер телефона (только цифры)", placeholder="Введите номер телефона"), | |
| gr.Number(label="Балл по математике (0-80)", minimum=0, maximum=80), | |
| gr.Audio(sources="microphone", type="numpy", label="Голосовой ввод эссе"), | |
| gr.Audio(sources="microphone", type="numpy", label="Голосовой ввод комментария экзаменатора (необязательно)") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Предпросмотр содержимого файла"), | |
| gr.File(label="Скачать файл") | |
| ], | |
| title="Система записи эссе", | |
| description="Запишите эссе голосом, добавьте комментарии экзаменатора (при наличии) и заполните обязательные поля", | |
| examples=[ | |
| ["700123456", "75", None, None], | |
| ["0555789012", "45", None, None] | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |
| # import gradio as gr | |
| # import numpy as np | |
| # import librosa | |
| # import scipy.io.wavfile as wavfile | |
| # import os | |
| # def process_audio(audio): | |
| # target_sr=16000 | |
| # arr = audio[1] | |
| # sr = audio[0] | |
| # arr = (arr / abs(arr).max()).astype(np.float32) | |
| # resampled_audio = librosa.resample(y=arr, orig_sr=sr, target_sr=target_sr) | |
| # wavfile.write('input.wav', target_sr, resampled_audio) | |
| # return f'DIR: {os.listdir()}' | |
| # stt_interface = gr.Interface( | |
| # fn=process_audio, | |
| # inputs=gr.Audio(sources="microphone", type="numpy"), | |
| # outputs=["text"], | |
| # description="Speech-to-Text and Emotion Analysis: Speak into your microphone." | |
| # ) | |
| # demo = gr.TabbedInterface( | |
| # interface_list=[stt_interface], | |
| # tab_names=["Speech-to-Text"] | |
| # ) | |
| # demo.launch(share=True) | |
| # import gradio as gr | |
| # from dotenv import load_dotenv | |
| # import requests | |
| # import os | |
| # from datetime import datetime | |
| # load_dotenv() | |
| # hf_token = os.getenv('HUGGINGFACE_API_TOKEN') | |
| # API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo" | |
| # HEADERS = {"Authorization": f'Bearer {hf_token}'} | |
| # def query(filename): | |
| # print(filename) | |
| # with open(filename, "rb") as f: | |
| # data = f.read() | |
| # response = requests.post(API_URL, headers=HEADERS, data=data) | |
| # output = response.json() | |
| # return output['text'] | |
| # def validate_phone(phone): | |
| # """Validate phone number format""" | |
| # return bool(phone and phone.isdigit() and len(phone) >= 9) | |
| # def validate_score(score): | |
| # """Validate math score""" | |
| # try: | |
| # score = int(score) | |
| # return 0 <= score <= 80 | |
| # except: | |
| # return False | |
| # def process_inputs(phone, math_score, essay_audio, examiner_audio=None): | |
| # """Process all inputs and create formatted text file""" | |
| # if not validate_phone(phone): | |
| # return {"error": "Invalid phone number format"}, None | |
| # if not validate_score(math_score): | |
| # return {"error": "Math score must be between 0 and 80"}, None | |
| # phone = phone[-9:] | |
| # try: | |
| # # Transcribe essay audio | |
| # essay_text = query(essay_audio) if essay_audio else "" | |
| # if len(essay_text)<1: | |
| # raise ValueError('Essay MUST BE') | |
| # # Transcribe examiner comments if provided | |
| # examiner_text = query(examiner_audio) if examiner_audio else "" | |
| # # Format output text | |
| # output_text = f"""num tel: {phone} | |
| # math score: {math_score} | |
| # замечание экзаменатора: {examiner_text} | |
| # Esse: | |
| # {essay_text}""" | |
| # # Create filename with phone and score | |
| # filename = f"{phone}_{math_score}.txt" | |
| # # Save to file | |
| # with open(filename, "w", encoding="utf-8") as f: | |
| # f.write(output_text) | |
| # return output_text, filename | |
| # except Exception as e: | |
| # return {"error": f"Processing error: {str(e)}"}, None | |
| # # Create Gradio interface | |
| # iface = gr.Interface( | |
| # fn=process_inputs, | |
| # inputs=[ | |
| # gr.Textbox(label="Номер телефона (только цифры)", placeholder="Введите номер телефона"), | |
| # gr.Number(label="Балл по математике (0-80)", minimum=0, maximum=80), | |
| # gr.Audio(type="filepath", label="Голосовой ввод эссе"), | |
| # gr.Audio(type="filepath", label="Голосовой ввод комментария экзаменатора (необязательно)") | |
| # ], | |
| # outputs=[ | |
| # gr.Textbox(label="Предпросмотр содержимого файла"), | |
| # gr.File(label="Скачать файл") | |
| # ], | |
| # title="Система записи эссе", | |
| # description="Запишите эссе голосом, добавьте комментарии экзаменатора (при наличии) и заполните обязательные поля", | |
| # examples=[ | |
| # ["700123456", "75", None, None], | |
| # ["0555789012", "45", None, None] | |
| # ] | |
| # ) | |
| # if __name__ == "__main__": | |
| # iface.launch() | |
| # import gradio as gr | |
| # import numpy as np | |
| # import librosa | |
| # from scipy.io import wavfile | |
| # import os | |
| # from dotenv import load_dotenv | |
| # import requests | |
| # from datetime import datetime | |
| # load_dotenv() | |
| # hf_token = os.getenv('HUGGINGFACE_API_TOKEN') | |
| # API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo" | |
| # HEADERS = {"Authorization": f'Bearer {hf_token}'} | |
| # def query(filename): | |
| # print(filename) | |
| # with open(filename, "rb") as f: | |
| # data = f.read() | |
| # response = requests.post(API_URL, headers=HEADERS, data=data) | |
| # output = response.json() | |
| # return output['text'] | |
| # chunks = [] | |
| # def process_chunk(audio): | |
| # if audio is None: | |
| # if chunks: | |
| # final_audio = np.concatenate(chunks) | |
| # filename = f"recorded_audio_{len(os.listdir())}.wav" | |
| # wavfile.write(filename, 16000, final_audio) | |
| # chunks.clear() | |
| # return query(filename) | |
| # return "No audio" | |
| # sr, audio_data = audio | |
| # audio_data = (audio_data / abs(audio_data).max()).astype(np.float32) | |
| # resampled_audio = librosa.resample(y=audio_data, orig_sr=sr, target_sr=16000) | |
| # chunks.append(resampled_audio) | |
| # return f"Recording... Chunks collected: {len(chunks)}" | |
| # demo = gr.Interface( | |
| # fn=process_chunk, | |
| # inputs=gr.Audio(sources="microphone", type="numpy", streaming=True), | |
| # outputs="text", | |
| # live=True | |
| # ) | |
| # if __name__ == "__main__": | |
| # demo.launch() |