Spaces:

tecasoftai
/

hubert-phoneme-space

Sleeping

App Files Files Community

Nguyen Anh Hong commited on Jul 25, 2025

Commit

416704c

1 Parent(s): a6380bd

add

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +6 -38
sample.py +72 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv/

app.py CHANGED Viewed

@@ -1,39 +1,3 @@
-<<<<<<< HEAD
-import gradio as gr
-import torch
-import torchaudio
-from transformers import Wav2Vec2Processor, HubertForCTC
-processor = Wav2Vec2Processor.from_pretrained("tecasoftai/hubert-finetune")
-model = HubertForCTC.from_pretrained("tecasoftai/hubert-finetune")
-model.eval()
-def transcribe(audio):
-    waveform, sr = torchaudio.load(audio)
-    if sr != 16000:
-        waveform = torchaudio.functional.resample(waveform, sr, 16000)
-    inputs = processor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt")
-    with torch.no_grad():
-        logits = model(**inputs).logits
-    pred_ids = torch.argmax(logits, dim=-1)
-    phonemes = processor.batch_decode(pred_ids)[0]
-    return phonemes
-iface = gr.Interface(
-    fn=transcribe,
-    inputs=gr.Audio(source="microphone", type="filepath"),
-    outputs="text",
-    title="HuBERT Phoneme Recognition",
-    description="Upload or record audio. The model will return phoneme sequence.",
-    live=False
-)
-if __name__ == "__main__":
-    iface.launch()
-=======
 import gradio as gr
 import torch
 import torchaudio
@@ -48,6 +12,7 @@ model = HubertForCTC.from_pretrained("tecasoftai/hubert-finetune", token=token)
 model.eval()
 def transcribe(audio):
     waveform, sr = torchaudio.load(audio)
     if sr != 16000:
         waveform = torchaudio.functional.resample(waveform, sr, 16000)
@@ -58,11 +23,14 @@ def transcribe(audio):
     pred_ids = torch.argmax(logits, dim=-1)
     phonemes = processor.batch_decode(pred_ids)[0]
     return phonemes
 iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(source="microphone", type="filepath"),
     outputs="text",
     title="HuBERT Phoneme Recognition",
     description="Upload or record audio. The model will return phoneme sequence.",
@@ -71,4 +39,4 @@ iface = gr.Interface(
 if __name__ == "__main__":
     iface.launch()
->>>>>>> 73dd16b (add local model hubert-finetune)

 import gradio as gr
 import torch
 import torchaudio
 model.eval()
 def transcribe(audio):
+    start = time.time()
     waveform, sr = torchaudio.load(audio)
     if sr != 16000:
         waveform = torchaudio.functional.resample(waveform, sr, 16000)
     pred_ids = torch.argmax(logits, dim=-1)
     phonemes = processor.batch_decode(pred_ids)[0]
+    end = time.time()
+    print(f"Inferences time: {end - start:.3f} seconds")
     return phonemes
 iface = gr.Interface(
     fn=transcribe,
+    inputs=gr.Audio(type="filepath"),
     outputs="text",
     title="HuBERT Phoneme Recognition",
     description="Upload or record audio. The model will return phoneme sequence.",
 if __name__ == "__main__":
     iface.launch()

sample.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# info
+info:
+  app:
+    name: @project.name@
+    description: @project.description@
+    version: @project.version@
+# app
+app:
+  media-url: https://elp.teca.vn/api/media
+  default-lang: vi
+  idp:
+    url: https://idp.teca.vn
+    realm: elp
+spring:
+  application:
+    name: elp-search
+  security:
+    oauth2:
+      resourceserver:
+        jwt:
+          issuer-uri: ${app.idp.url}/realms/${app.idp.realm}
+          jwk-set-uri: ${app.idp.url}/realms/${app.idp.realm}/protocol/openid-connect/certs
+  messages:
+    basename: messages
+    encoding: UTF-8
+  jackson:
+    time-zone: Asia/Ho_Chi_Minh
+  elasticsearch:
+    uris: http://172.20.20.177:9200
+    username: elastic
+    password: StEIBLSSuTTtfxciuWDx
+    connection-timeout: 60s
+    socket-timeout: 60s
+# management
+management:
+  tracing:
+    enabled: false
+    sampling:
+      probability: 1.0
+  endpoint:
+    health:
+      show-details: always
+  endpoints:
+    web:
+      exposure:
+        include: info, health, prometheus, metrics
+  metrics:
+    tags:
+      application: ${spring.application.name}
+  info:
+    env:
+      enabled: true
+# gRPC
+grpc:
+  server:
+    keep-alive-time: 30s
+    keep-alive-timeout: 5s
+    enable-keep-alive: true
+  client:
+    security:
+      address: static://172.20.20.152:32290
+      negotiation-type: plaintext
+    media:
+      address: static://172.20.20.152:32293
+      negotiation-type: plaintext
+    lcm:
+      address: static://172.20.20.152:32295
+      negotiation-type: plaintext