Show time
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import numpy as np
|
|
| 3 |
import pandas as pd
|
| 4 |
import torch
|
| 5 |
import torchaudio
|
| 6 |
-
|
| 7 |
from lang_id import identify_languages
|
| 8 |
from whisper import transcribe
|
| 9 |
|
|
@@ -60,9 +60,11 @@ def process_audio(audio, chunk_duration, language_set):
|
|
| 60 |
audio_sec += chunk_duration
|
| 61 |
|
| 62 |
print(f"Processing audio chunk of length {len(chunk)}")
|
| 63 |
-
volume_norm = np.linalg.norm(chunk)
|
| 64 |
length = len(chunk) / SAMPLING_RATE # 音声データの長さ(秒)
|
|
|
|
| 65 |
selected_scores, all_scores = identify_languages(chunk, language_set)
|
|
|
|
| 66 |
|
| 67 |
# 日本語と英語の確率値を取得
|
| 68 |
ja_prob = selected_scores['Japanese']
|
|
@@ -74,7 +76,9 @@ def process_audio(audio, chunk_duration, language_set):
|
|
| 74 |
top3_languages = ", ".join([f"{lang} ({all_scores[lang]:.2f})" for lang in sorted(all_scores, key=all_scores.get, reverse=True)[:3]])
|
| 75 |
|
| 76 |
# テキストの認識
|
|
|
|
| 77 |
transcription = transcribe(chunk)
|
|
|
|
| 78 |
|
| 79 |
data.append({
|
| 80 |
"Time": audio_sec,
|
|
@@ -82,6 +86,8 @@ def process_audio(audio, chunk_duration, language_set):
|
|
| 82 |
"Volume": volume_norm,
|
| 83 |
"Japanese_English": f"{ja_en} ({ja_prob:.2f}, {en_prob:.2f})",
|
| 84 |
"Language": top3_languages,
|
|
|
|
|
|
|
| 85 |
"Text": transcription,
|
| 86 |
})
|
| 87 |
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
import torch
|
| 5 |
import torchaudio
|
| 6 |
+
from datetime import datetime
|
| 7 |
from lang_id import identify_languages
|
| 8 |
from whisper import transcribe
|
| 9 |
|
|
|
|
| 60 |
audio_sec += chunk_duration
|
| 61 |
|
| 62 |
print(f"Processing audio chunk of length {len(chunk)}")
|
| 63 |
+
volume_norm = np.linalg.norm(chunk)
|
| 64 |
length = len(chunk) / SAMPLING_RATE # 音声データの長さ(秒)
|
| 65 |
+
s = datetime.now()
|
| 66 |
selected_scores, all_scores = identify_languages(chunk, language_set)
|
| 67 |
+
lang_id_time = (datetime.now() - s).total_seconds()
|
| 68 |
|
| 69 |
# 日本語と英語の確率値を取得
|
| 70 |
ja_prob = selected_scores['Japanese']
|
|
|
|
| 76 |
top3_languages = ", ".join([f"{lang} ({all_scores[lang]:.2f})" for lang in sorted(all_scores, key=all_scores.get, reverse=True)[:3]])
|
| 77 |
|
| 78 |
# テキストの認識
|
| 79 |
+
s = datetime.now()
|
| 80 |
transcription = transcribe(chunk)
|
| 81 |
+
transcribe_time = (datetime.now() - s).total_seconds()
|
| 82 |
|
| 83 |
data.append({
|
| 84 |
"Time": audio_sec,
|
|
|
|
| 86 |
"Volume": volume_norm,
|
| 87 |
"Japanese_English": f"{ja_en} ({ja_prob:.2f}, {en_prob:.2f})",
|
| 88 |
"Language": top3_languages,
|
| 89 |
+
"Lang ID Time": lang_id_time,
|
| 90 |
+
"Transcribe Time": transcribe_time,
|
| 91 |
"Text": transcription,
|
| 92 |
})
|
| 93 |
|