mastefan commited on
Commit
0474d38
·
verified ·
1 Parent(s): 7a76102

Update src/app/conversation_core.py

Browse files
Files changed (1) hide show
  1. src/app/conversation_core.py +8 -20
src/app/conversation_core.py CHANGED
@@ -10,6 +10,9 @@ from dataclasses import dataclass
10
  from typing import List, Optional, Tuple
11
  from .config import get_user_dir
12
 
 
 
 
13
 
14
  import torch
15
  from gtts import gTTS
@@ -109,17 +112,6 @@ _LANG_HINTS = {
109
  # SPEECH RECOGNITION — faster-whisper
110
  ##########################################
111
 
112
- from faster_whisper import WhisperModel
113
-
114
- _whisper_model = None
115
-
116
- def load_whisper():
117
- global _whisper_model
118
- if _whisper_model is None:
119
- _whisper_model = WhisperModel("small", device="cpu", compute_type="int8")
120
- return _whisper_model
121
-
122
-
123
  def transcribe_audio(audio_segment, spoken_lang=None):
124
  """
125
  Accepts a pydub AudioSegment (mono, 16k).
@@ -353,7 +345,10 @@ class ConversationManager:
353
 
354
  from transformers import pipeline
355
 
356
- # Load Whisper once at module import (fast + HF-safe)
 
 
 
357
  whisper_pipe = pipeline(
358
  task="automatic-speech-recognition",
359
  model="openai/whisper-small",
@@ -363,21 +358,14 @@ class ConversationManager:
363
  def transcribe(self, audio_segment, spoken_lang=None):
364
  import numpy as np
365
 
366
- # Convert AudioSegment → numpy float32 PCM
367
  audio = np.array(audio_segment.get_array_of_samples()).astype("float32")
368
- audio = audio / np.max(np.abs(audio)) # normalize to [-1, 1]
369
 
370
- # Transformers Whisper expects a Python list or numpy array
371
  result = whisper_pipe(audio)
372
-
373
  text = result.get("text", "").strip()
374
 
375
- # transformers Whisper does not provide language predictions on CPU
376
  return text, spoken_lang or "unknown", 1.0
377
 
378
-
379
-
380
-
381
  ###########################################################
382
  # TEXT → SPEECH
383
  ###########################################################
 
10
  from typing import List, Optional, Tuple
11
  from .config import get_user_dir
12
 
13
+ import numpy as np
14
+ from transformers import pipeline
15
+ from pydub import AudioSegment
16
 
17
  import torch
18
  from gtts import gTTS
 
112
  # SPEECH RECOGNITION — faster-whisper
113
  ##########################################
114
 
 
 
 
 
 
 
 
 
 
 
 
115
  def transcribe_audio(audio_segment, spoken_lang=None):
116
  """
117
  Accepts a pydub AudioSegment (mono, 16k).
 
345
 
346
  from transformers import pipeline
347
 
348
+ ###########################################################
349
+ # AUDIO TRANSCRIPTION — Transformers Whisper
350
+ ###########################################################
351
+
352
  whisper_pipe = pipeline(
353
  task="automatic-speech-recognition",
354
  model="openai/whisper-small",
 
358
  def transcribe(self, audio_segment, spoken_lang=None):
359
  import numpy as np
360
 
 
361
  audio = np.array(audio_segment.get_array_of_samples()).astype("float32")
362
+ audio = audio / np.max(np.abs(audio))
363
 
 
364
  result = whisper_pipe(audio)
 
365
  text = result.get("text", "").strip()
366
 
 
367
  return text, spoken_lang or "unknown", 1.0
368
 
 
 
 
369
  ###########################################################
370
  # TEXT → SPEECH
371
  ###########################################################