Spaces:
Sleeping
Sleeping
Nguyen Anh Hong commited on
Commit ·
416704c
1
Parent(s): a6380bd
add
Browse files- .gitignore +1 -0
- app.py +6 -38
- sample.py +72 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
venv/
|
app.py
CHANGED
|
@@ -1,39 +1,3 @@
|
|
| 1 |
-
<<<<<<< HEAD
|
| 2 |
-
import gradio as gr
|
| 3 |
-
import torch
|
| 4 |
-
import torchaudio
|
| 5 |
-
from transformers import Wav2Vec2Processor, HubertForCTC
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
processor = Wav2Vec2Processor.from_pretrained("tecasoftai/hubert-finetune")
|
| 9 |
-
model = HubertForCTC.from_pretrained("tecasoftai/hubert-finetune")
|
| 10 |
-
model.eval()
|
| 11 |
-
|
| 12 |
-
def transcribe(audio):
|
| 13 |
-
waveform, sr = torchaudio.load(audio)
|
| 14 |
-
if sr != 16000:
|
| 15 |
-
waveform = torchaudio.functional.resample(waveform, sr, 16000)
|
| 16 |
-
|
| 17 |
-
inputs = processor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt")
|
| 18 |
-
with torch.no_grad():
|
| 19 |
-
logits = model(**inputs).logits
|
| 20 |
-
|
| 21 |
-
pred_ids = torch.argmax(logits, dim=-1)
|
| 22 |
-
phonemes = processor.batch_decode(pred_ids)[0]
|
| 23 |
-
return phonemes
|
| 24 |
-
|
| 25 |
-
iface = gr.Interface(
|
| 26 |
-
fn=transcribe,
|
| 27 |
-
inputs=gr.Audio(source="microphone", type="filepath"),
|
| 28 |
-
outputs="text",
|
| 29 |
-
title="HuBERT Phoneme Recognition",
|
| 30 |
-
description="Upload or record audio. The model will return phoneme sequence.",
|
| 31 |
-
live=False
|
| 32 |
-
)
|
| 33 |
-
|
| 34 |
-
if __name__ == "__main__":
|
| 35 |
-
iface.launch()
|
| 36 |
-
=======
|
| 37 |
import gradio as gr
|
| 38 |
import torch
|
| 39 |
import torchaudio
|
|
@@ -48,6 +12,7 @@ model = HubertForCTC.from_pretrained("tecasoftai/hubert-finetune", token=token)
|
|
| 48 |
model.eval()
|
| 49 |
|
| 50 |
def transcribe(audio):
|
|
|
|
| 51 |
waveform, sr = torchaudio.load(audio)
|
| 52 |
if sr != 16000:
|
| 53 |
waveform = torchaudio.functional.resample(waveform, sr, 16000)
|
|
@@ -58,11 +23,14 @@ def transcribe(audio):
|
|
| 58 |
|
| 59 |
pred_ids = torch.argmax(logits, dim=-1)
|
| 60 |
phonemes = processor.batch_decode(pred_ids)[0]
|
|
|
|
|
|
|
|
|
|
| 61 |
return phonemes
|
| 62 |
|
| 63 |
iface = gr.Interface(
|
| 64 |
fn=transcribe,
|
| 65 |
-
inputs=gr.Audio(
|
| 66 |
outputs="text",
|
| 67 |
title="HuBERT Phoneme Recognition",
|
| 68 |
description="Upload or record audio. The model will return phoneme sequence.",
|
|
@@ -71,4 +39,4 @@ iface = gr.Interface(
|
|
| 71 |
|
| 72 |
if __name__ == "__main__":
|
| 73 |
iface.launch()
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
import torchaudio
|
|
|
|
| 12 |
model.eval()
|
| 13 |
|
| 14 |
def transcribe(audio):
|
| 15 |
+
start = time.time()
|
| 16 |
waveform, sr = torchaudio.load(audio)
|
| 17 |
if sr != 16000:
|
| 18 |
waveform = torchaudio.functional.resample(waveform, sr, 16000)
|
|
|
|
| 23 |
|
| 24 |
pred_ids = torch.argmax(logits, dim=-1)
|
| 25 |
phonemes = processor.batch_decode(pred_ids)[0]
|
| 26 |
+
|
| 27 |
+
end = time.time()
|
| 28 |
+
print(f"Inferences time: {end - start:.3f} seconds")
|
| 29 |
return phonemes
|
| 30 |
|
| 31 |
iface = gr.Interface(
|
| 32 |
fn=transcribe,
|
| 33 |
+
inputs=gr.Audio(type="filepath"),
|
| 34 |
outputs="text",
|
| 35 |
title="HuBERT Phoneme Recognition",
|
| 36 |
description="Upload or record audio. The model will return phoneme sequence.",
|
|
|
|
| 39 |
|
| 40 |
if __name__ == "__main__":
|
| 41 |
iface.launch()
|
| 42 |
+
|
sample.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# info
|
| 2 |
+
info:
|
| 3 |
+
app:
|
| 4 |
+
name: @project.name@
|
| 5 |
+
description: @project.description@
|
| 6 |
+
version: @project.version@
|
| 7 |
+
|
| 8 |
+
# app
|
| 9 |
+
app:
|
| 10 |
+
media-url: https://elp.teca.vn/api/media
|
| 11 |
+
default-lang: vi
|
| 12 |
+
idp:
|
| 13 |
+
url: https://idp.teca.vn
|
| 14 |
+
realm: elp
|
| 15 |
+
spring:
|
| 16 |
+
application:
|
| 17 |
+
name: elp-search
|
| 18 |
+
security:
|
| 19 |
+
oauth2:
|
| 20 |
+
resourceserver:
|
| 21 |
+
jwt:
|
| 22 |
+
issuer-uri: ${app.idp.url}/realms/${app.idp.realm}
|
| 23 |
+
jwk-set-uri: ${app.idp.url}/realms/${app.idp.realm}/protocol/openid-connect/certs
|
| 24 |
+
messages:
|
| 25 |
+
basename: messages
|
| 26 |
+
encoding: UTF-8
|
| 27 |
+
|
| 28 |
+
jackson:
|
| 29 |
+
time-zone: Asia/Ho_Chi_Minh
|
| 30 |
+
|
| 31 |
+
elasticsearch:
|
| 32 |
+
uris: http://172.20.20.177:9200
|
| 33 |
+
username: elastic
|
| 34 |
+
password: StEIBLSSuTTtfxciuWDx
|
| 35 |
+
connection-timeout: 60s
|
| 36 |
+
socket-timeout: 60s
|
| 37 |
+
# management
|
| 38 |
+
management:
|
| 39 |
+
tracing:
|
| 40 |
+
enabled: false
|
| 41 |
+
sampling:
|
| 42 |
+
probability: 1.0
|
| 43 |
+
endpoint:
|
| 44 |
+
health:
|
| 45 |
+
show-details: always
|
| 46 |
+
endpoints:
|
| 47 |
+
web:
|
| 48 |
+
exposure:
|
| 49 |
+
include: info, health, prometheus, metrics
|
| 50 |
+
metrics:
|
| 51 |
+
tags:
|
| 52 |
+
application: ${spring.application.name}
|
| 53 |
+
info:
|
| 54 |
+
env:
|
| 55 |
+
enabled: true
|
| 56 |
+
|
| 57 |
+
# gRPC
|
| 58 |
+
grpc:
|
| 59 |
+
server:
|
| 60 |
+
keep-alive-time: 30s
|
| 61 |
+
keep-alive-timeout: 5s
|
| 62 |
+
enable-keep-alive: true
|
| 63 |
+
client:
|
| 64 |
+
security:
|
| 65 |
+
address: static://172.20.20.152:32290
|
| 66 |
+
negotiation-type: plaintext
|
| 67 |
+
media:
|
| 68 |
+
address: static://172.20.20.152:32293
|
| 69 |
+
negotiation-type: plaintext
|
| 70 |
+
lcm:
|
| 71 |
+
address: static://172.20.20.152:32295
|
| 72 |
+
negotiation-type: plaintext
|