Nguyen Anh Hong commited on
Commit
416704c
·
1 Parent(s): a6380bd
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +6 -38
  3. sample.py +72 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv/
app.py CHANGED
@@ -1,39 +1,3 @@
1
- <<<<<<< HEAD
2
- import gradio as gr
3
- import torch
4
- import torchaudio
5
- from transformers import Wav2Vec2Processor, HubertForCTC
6
-
7
-
8
- processor = Wav2Vec2Processor.from_pretrained("tecasoftai/hubert-finetune")
9
- model = HubertForCTC.from_pretrained("tecasoftai/hubert-finetune")
10
- model.eval()
11
-
12
- def transcribe(audio):
13
- waveform, sr = torchaudio.load(audio)
14
- if sr != 16000:
15
- waveform = torchaudio.functional.resample(waveform, sr, 16000)
16
-
17
- inputs = processor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt")
18
- with torch.no_grad():
19
- logits = model(**inputs).logits
20
-
21
- pred_ids = torch.argmax(logits, dim=-1)
22
- phonemes = processor.batch_decode(pred_ids)[0]
23
- return phonemes
24
-
25
- iface = gr.Interface(
26
- fn=transcribe,
27
- inputs=gr.Audio(source="microphone", type="filepath"),
28
- outputs="text",
29
- title="HuBERT Phoneme Recognition",
30
- description="Upload or record audio. The model will return phoneme sequence.",
31
- live=False
32
- )
33
-
34
- if __name__ == "__main__":
35
- iface.launch()
36
- =======
37
  import gradio as gr
38
  import torch
39
  import torchaudio
@@ -48,6 +12,7 @@ model = HubertForCTC.from_pretrained("tecasoftai/hubert-finetune", token=token)
48
  model.eval()
49
 
50
  def transcribe(audio):
 
51
  waveform, sr = torchaudio.load(audio)
52
  if sr != 16000:
53
  waveform = torchaudio.functional.resample(waveform, sr, 16000)
@@ -58,11 +23,14 @@ def transcribe(audio):
58
 
59
  pred_ids = torch.argmax(logits, dim=-1)
60
  phonemes = processor.batch_decode(pred_ids)[0]
 
 
 
61
  return phonemes
62
 
63
  iface = gr.Interface(
64
  fn=transcribe,
65
- inputs=gr.Audio(source="microphone", type="filepath"),
66
  outputs="text",
67
  title="HuBERT Phoneme Recognition",
68
  description="Upload or record audio. The model will return phoneme sequence.",
@@ -71,4 +39,4 @@ iface = gr.Interface(
71
 
72
  if __name__ == "__main__":
73
  iface.launch()
74
- >>>>>>> 73dd16b (add local model hubert-finetune)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import torch
3
  import torchaudio
 
12
  model.eval()
13
 
14
  def transcribe(audio):
15
+ start = time.time()
16
  waveform, sr = torchaudio.load(audio)
17
  if sr != 16000:
18
  waveform = torchaudio.functional.resample(waveform, sr, 16000)
 
23
 
24
  pred_ids = torch.argmax(logits, dim=-1)
25
  phonemes = processor.batch_decode(pred_ids)[0]
26
+
27
+ end = time.time()
28
+ print(f"Inferences time: {end - start:.3f} seconds")
29
  return phonemes
30
 
31
  iface = gr.Interface(
32
  fn=transcribe,
33
+ inputs=gr.Audio(type="filepath"),
34
  outputs="text",
35
  title="HuBERT Phoneme Recognition",
36
  description="Upload or record audio. The model will return phoneme sequence.",
 
39
 
40
  if __name__ == "__main__":
41
  iface.launch()
42
+
sample.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # info
2
+ info:
3
+ app:
4
+ name: @project.name@
5
+ description: @project.description@
6
+ version: @project.version@
7
+
8
+ # app
9
+ app:
10
+ media-url: https://elp.teca.vn/api/media
11
+ default-lang: vi
12
+ idp:
13
+ url: https://idp.teca.vn
14
+ realm: elp
15
+ spring:
16
+ application:
17
+ name: elp-search
18
+ security:
19
+ oauth2:
20
+ resourceserver:
21
+ jwt:
22
+ issuer-uri: ${app.idp.url}/realms/${app.idp.realm}
23
+ jwk-set-uri: ${app.idp.url}/realms/${app.idp.realm}/protocol/openid-connect/certs
24
+ messages:
25
+ basename: messages
26
+ encoding: UTF-8
27
+
28
+ jackson:
29
+ time-zone: Asia/Ho_Chi_Minh
30
+
31
+ elasticsearch:
32
+ uris: http://172.20.20.177:9200
33
+ username: elastic
34
+ password: StEIBLSSuTTtfxciuWDx
35
+ connection-timeout: 60s
36
+ socket-timeout: 60s
37
+ # management
38
+ management:
39
+ tracing:
40
+ enabled: false
41
+ sampling:
42
+ probability: 1.0
43
+ endpoint:
44
+ health:
45
+ show-details: always
46
+ endpoints:
47
+ web:
48
+ exposure:
49
+ include: info, health, prometheus, metrics
50
+ metrics:
51
+ tags:
52
+ application: ${spring.application.name}
53
+ info:
54
+ env:
55
+ enabled: true
56
+
57
+ # gRPC
58
+ grpc:
59
+ server:
60
+ keep-alive-time: 30s
61
+ keep-alive-timeout: 5s
62
+ enable-keep-alive: true
63
+ client:
64
+ security:
65
+ address: static://172.20.20.152:32290
66
+ negotiation-type: plaintext
67
+ media:
68
+ address: static://172.20.20.152:32293
69
+ negotiation-type: plaintext
70
+ lcm:
71
+ address: static://172.20.20.152:32295
72
+ negotiation-type: plaintext