unknown commited on
Commit
fa16b79
·
1 Parent(s): 59afc96

add Qwen ASR

Browse files
README.md CHANGED
@@ -43,3 +43,20 @@ Or rerun diagnosis only for an existing run:
43
  export OPENAI_API_KEY=your_key
44
  python scripts/run_diagnostic.py --run_id <run_id> --model gpt-4.1-mini
45
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  export OPENAI_API_KEY=your_key
44
  python scripts/run_diagnostic.py --run_id <run_id> --model gpt-4.1-mini
45
  ```
46
+
47
+
48
+ ## Qwen3-ASR
49
+
50
+ This project now supports `Qwen/Qwen3-ASR-0.6B` and `Qwen/Qwen3-ASR-1.7B` via the `qwen-asr` package.
51
+
52
+ Install the runtime dependency:
53
+
54
+ ```bash
55
+ pip install -U qwen-asr
56
+ ```
57
+
58
+ Example run:
59
+
60
+ ```bash
61
+ python pipeline/run_all.py --manifest data/manifest.jsonl --model_name Qwen/Qwen3-ASR-0.6B --backend qwen3_asr --language zh
62
+ ```
adapters/qwen3_asr.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import librosa
7
+ import torch
8
+
9
+ from core.interfaces import ASRModel
10
+ from core.schemas import ASRConfig, ASROutput, Segment
11
+
12
+
13
+ _LANG_MAP = {
14
+ "zh": "Chinese",
15
+ "zh-cn": "Chinese",
16
+ "zh-tw": "Chinese",
17
+ "yue": "Cantonese",
18
+ "en": "English",
19
+ "ja": "Japanese",
20
+ "ko": "Korean",
21
+ "fr": "French",
22
+ "de": "German",
23
+ "es": "Spanish",
24
+ "pt": "Portuguese",
25
+ "ru": "Russian",
26
+ "it": "Italian",
27
+ "ar": "Arabic",
28
+ "id": "Indonesian",
29
+ "vi": "Vietnamese",
30
+ "th": "Thai",
31
+ "tr": "Turkish",
32
+ "hi": "Hindi",
33
+ "ms": "Malay",
34
+ "nl": "Dutch",
35
+ "sv": "Swedish",
36
+ "da": "Danish",
37
+ "fi": "Finnish",
38
+ "pl": "Polish",
39
+ "cs": "Czech",
40
+ "fil": "Filipino",
41
+ "fa": "Persian",
42
+ "el": "Greek",
43
+ "hu": "Hungarian",
44
+ "mk": "Macedonian",
45
+ "ro": "Romanian",
46
+ }
47
+
48
+
49
+ class Qwen3ASRAdapter(ASRModel):
50
+ def __init__(
51
+ self,
52
+ model_name: str = "Qwen/Qwen3-ASR-0.6B",
53
+ device: Optional[str] = None,
54
+ dtype: Optional[str] = None,
55
+ device_map: Optional[str] = None,
56
+ max_inference_batch_size: int = 1,
57
+ max_new_tokens: int = 512,
58
+ forced_aligner: Optional[str] = None,
59
+ ):
60
+ try:
61
+ from qwen_asr import Qwen3ASRModel
62
+ except Exception as e: # pragma: no cover - import guard
63
+ raise ImportError(
64
+ "Qwen3-ASR adapter requires the 'qwen-asr' package. "
65
+ "Install it with: pip install -U qwen-asr"
66
+ ) from e
67
+
68
+ self.model_name = model_name
69
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
70
+ self.dtype_name = dtype or ("bfloat16" if self.device.startswith("cuda") else "float32")
71
+ self.max_inference_batch_size = max_inference_batch_size
72
+ self.max_new_tokens = max_new_tokens
73
+ self.forced_aligner = forced_aligner
74
+
75
+ torch_dtype = self._resolve_dtype(self.dtype_name)
76
+ if self.device.startswith("cuda"):
77
+ resolved_device_map = device_map or "cuda:0"
78
+ else:
79
+ resolved_device_map = device_map or "cpu"
80
+
81
+ kwargs: Dict[str, Any] = {
82
+ "dtype": torch_dtype,
83
+ "device_map": resolved_device_map,
84
+ "max_inference_batch_size": max_inference_batch_size,
85
+ "max_new_tokens": max_new_tokens,
86
+ }
87
+ if forced_aligner:
88
+ kwargs["forced_aligner"] = forced_aligner
89
+ kwargs["forced_aligner_kwargs"] = {
90
+ "dtype": torch_dtype,
91
+ "device_map": resolved_device_map,
92
+ }
93
+
94
+ self._qwen_asr_model = Qwen3ASRModel.from_pretrained(model_name, **kwargs)
95
+ self._model_kwargs = kwargs
96
+
97
+ @staticmethod
98
+ def _resolve_dtype(dtype_name: str):
99
+ name = (dtype_name or "").lower()
100
+ if name in {"bf16", "bfloat16"}:
101
+ return torch.bfloat16
102
+ if name in {"fp16", "float16", "half"}:
103
+ return torch.float16
104
+ return torch.float32
105
+
106
+ @staticmethod
107
+ def _normalize_language(language: Optional[str]) -> Optional[str]:
108
+ if not language:
109
+ return None
110
+ key = str(language).strip().lower()
111
+ return _LANG_MAP.get(key, language)
112
+
113
+ @staticmethod
114
+ def _segment_from_timestamp(ts: Any, idx: int) -> Optional[Segment]:
115
+ if ts is None:
116
+ return None
117
+ text = getattr(ts, "text", None)
118
+ start = getattr(ts, "start_time", None)
119
+ end = getattr(ts, "end_time", None)
120
+ if text is None:
121
+ if isinstance(ts, dict):
122
+ text = ts.get("text")
123
+ start = ts.get("start_time", ts.get("start"))
124
+ end = ts.get("end_time", ts.get("end"))
125
+ else:
126
+ return None
127
+ try:
128
+ return Segment(start=float(start or 0.0), end=float(end or 0.0), text=str(text))
129
+ except Exception:
130
+ return Segment(start=float(idx), end=float(idx), text=str(text))
131
+
132
+ def model_info(self) -> Dict:
133
+ return {
134
+ "name": "qwen3-asr",
135
+ "model_name": self.model_name,
136
+ "device": self.device,
137
+ "dtype": self.dtype_name,
138
+ "max_inference_batch_size": self.max_inference_batch_size,
139
+ "max_new_tokens": self.max_new_tokens,
140
+ "forced_aligner": self.forced_aligner,
141
+ }
142
+
143
+ def transcribe(self, utt_id: str, audio_uri: str, config: Optional[ASRConfig] = None) -> ASROutput:
144
+ config = config or ASRConfig()
145
+
146
+ try:
147
+ y, sr = librosa.load(audio_uri, sr=16000, mono=True)
148
+ duration_s = float(len(y) / 16000.0)
149
+ except Exception:
150
+ duration_s = None
151
+
152
+ language = self._normalize_language(config.language)
153
+ return_timestamps = bool(self.forced_aligner)
154
+
155
+ t0 = time.time()
156
+ results = self._qwen_asr_model.transcribe(
157
+ audio=audio_uri,
158
+ language=language,
159
+ return_time_stamps=return_timestamps,
160
+ )
161
+ latency_ms = (time.time() - t0) * 1000.0
162
+
163
+ if not results:
164
+ return ASROutput(
165
+ utt_id=utt_id,
166
+ hyp_text="",
167
+ segments=[],
168
+ language=language,
169
+ duration_s=duration_s,
170
+ latency_ms=latency_ms,
171
+ confidence=None,
172
+ extras={},
173
+ )
174
+
175
+ result = results[0]
176
+ hyp_text = str(getattr(result, "text", "") or "").strip()
177
+ detected_language = getattr(result, "language", None) or language
178
+
179
+ segments: List[Segment] = []
180
+ time_stamps = getattr(result, "time_stamps", None)
181
+ if time_stamps:
182
+ for idx, ts in enumerate(time_stamps):
183
+ seg = self._segment_from_timestamp(ts, idx)
184
+ if seg is not None:
185
+ segments.append(seg)
186
+ elif duration_s is not None:
187
+ segments = [Segment(start=0.0, end=duration_s, text=hyp_text)]
188
+
189
+ extras: Dict[str, Any] = {}
190
+ for key in ["tokens", "raw_text"]:
191
+ value = getattr(result, key, None)
192
+ if value is not None:
193
+ extras[key] = value
194
+
195
+ return ASROutput(
196
+ utt_id=utt_id,
197
+ hyp_text=hyp_text,
198
+ segments=segments,
199
+ language=detected_language,
200
+ duration_s=duration_s,
201
+ latency_ms=latency_ms,
202
+ confidence=None,
203
+ extras=extras,
204
+ )
data/manifest_hf.jsonl ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"utt_id": "fsicoli_common_voice_22_0_00000", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00000.wav", "ref_text": "巴顿是位于美国加利福尼亚州阿马多尔县的一个非建制地区。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
2
+ {"utt_id": "fsicoli_common_voice_22_0_00001", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00001.wav", "ref_text": "恩骑尉,是中国清朝时的爵名。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
3
+ {"utt_id": "fsicoli_common_voice_22_0_00002", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00002.wav", "ref_text": "仙台盐釜港是位于日本宫城县、内的海港,由宫城县政府负责港务营运。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
4
+ {"utt_id": "fsicoli_common_voice_22_0_00003", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00003.wav", "ref_text": "利马的阳台是西班牙殖民时期和共和国时期建造的文化遗产。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
5
+ {"utt_id": "fsicoli_common_voice_22_0_00004", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00004.wav", "ref_text": "成山,字屏临,号进斋,满洲正蓝旗人。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
6
+ {"utt_id": "fsicoli_common_voice_22_0_00005", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00005.wav", "ref_text": "嘉靖十一年任福建龙溪县知县。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
7
+ {"utt_id": "fsicoli_common_voice_22_0_00006", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00006.wav", "ref_text": "科莫巴比是位于美国亚利桑那州皮马县的一个人口普查指定地区。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
8
+ {"utt_id": "fsicoli_common_voice_22_0_00007", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00007.wav", "ref_text": "历史上明永乐皇帝、清干隆皇帝等曾经多次到访,并留下牌匾和诗句。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
9
+ {"utt_id": "fsicoli_common_voice_22_0_00008", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00008.wav", "ref_text": "小花仙动画角色列表记录了所有在中国大陆动画《小花仙》系列中出场角色的详细介绍。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
10
+ {"utt_id": "fsicoli_common_voice_22_0_00009", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00009.wav", "ref_text": "妳不要再去那里了", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
11
+ {"utt_id": "fsicoli_common_voice_22_0_00010", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00010.wav", "ref_text": "银座松竹广场是位于日本东京都中央区筑地一丁目的摩天大楼。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
12
+ {"utt_id": "fsicoli_common_voice_22_0_00011", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00011.wav", "ref_text": "儿童权利监察使办公室设于华沙。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
13
+ {"utt_id": "fsicoli_common_voice_22_0_00012", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00012.wav", "ref_text": "灰阶音乐是位于香港的一家独立唱片厂牌和音乐出版公司。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
14
+ {"utt_id": "fsicoli_common_voice_22_0_00013", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00013.wav", "ref_text": "梁士济,字遂良,广东广州府南海县人,明朝、南明政治人物。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
15
+ {"utt_id": "fsicoli_common_voice_22_0_00014", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00014.wav", "ref_text": "姜涛曾就读轩尼诗道官立下午小学、邓肇坚维多利亚官立中学和青年学院。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
16
+ {"utt_id": "fsicoli_common_voice_22_0_00015", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00015.wav", "ref_text": "上海江南长兴重工有限责任公司简称长兴重工,厂区位于上海长兴岛船舶制造基地。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
17
+ {"utt_id": "fsicoli_common_voice_22_0_00016", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00016.wav", "ref_text": "卢启贤是香港的亿万富翁企业家和慈善家。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
18
+ {"utt_id": "fsicoli_common_voice_22_0_00017", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00017.wav", "ref_text": "在这类故事的早期版本里,女人的猪脸外观是由巫术导致的。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
19
+ {"utt_id": "fsicoli_common_voice_22_0_00018", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00018.wav", "ref_text": "事件起因据信是天然气爆炸。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
20
+ {"utt_id": "fsicoli_common_voice_22_0_00019", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00019.wav", "ref_text": "在工作了九年后,伯爵不幸去世。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
21
+ {"utt_id": "fsicoli_common_voice_22_0_00020", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00020.wav", "ref_text": "整个系统称为键接合。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
22
+ {"utt_id": "fsicoli_common_voice_22_0_00021", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00021.wav", "ref_text": "大和和纪,日本漫画家,代表作有《窈窕淑女》、《源氏物语》等。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
23
+ {"utt_id": "fsicoli_common_voice_22_0_00022", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00022.wav", "ref_text": "事后三天,赵宇被福州市公安局晋安分局以涉嫌故意伤害罪刑事拘留。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
24
+ {"utt_id": "fsicoli_common_voice_22_0_00023", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00023.wav", "ref_text": "由春岗互通向萝岗方向排列", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
25
+ {"utt_id": "fsicoli_common_voice_22_0_00024", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00024.wav", "ref_text": "弘光帝即位,让刘文照袭封新乐伯,南京沦陷后寄居在高邮,开辟农田种菜直到去世。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "bcb4464171113dd9b51f371c3eecea06771fde83e7e3239ad0516469c6dcdf80170d26c7d1b1ef2476c45b51bfb4ee5549f07d7002bcfcec9b371a30c873b92d", "gender": "male_masculine", "accent": "", "age": "twenties", "locale": "zh-CN"}}
26
+ {"utt_id": "fsicoli_common_voice_22_0_00025", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00025.wav", "ref_text": "露露夫人终究与三姐弟达成了协议。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "99a4cee094a7058f27e615982d793da9039f8916c4cb0934eafecb601214cb89657ddee22f688a38782a72f5b6622a323ed6dca74f6663430f8cb3c0804563ea", "gender": "male_masculine", "accent": "出生地:31 上海市", "age": "teens", "locale": "zh-CN"}}
27
+ {"utt_id": "fsicoli_common_voice_22_0_00026", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00026.wav", "ref_text": "武定州,中国唐朝时设置的州。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "99a4cee094a7058f27e615982d793da9039f8916c4cb0934eafecb601214cb89657ddee22f688a38782a72f5b6622a323ed6dca74f6663430f8cb3c0804563ea", "gender": "male_masculine", "accent": "出生地:31 上海市", "age": "teens", "locale": "zh-CN"}}
28
+ {"utt_id": "fsicoli_common_voice_22_0_00027", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00027.wav", "ref_text": "宝陀寺,可以指", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "99a4cee094a7058f27e615982d793da9039f8916c4cb0934eafecb601214cb89657ddee22f688a38782a72f5b6622a323ed6dca74f6663430f8cb3c0804563ea", "gender": "male_masculine", "accent": "出生地:31 上海市", "age": "teens", "locale": "zh-CN"}}
29
+ {"utt_id": "fsicoli_common_voice_22_0_00028", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00028.wav", "ref_text": "去札幌啤酒博物馆", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "99a4cee094a7058f27e615982d793da9039f8916c4cb0934eafecb601214cb89657ddee22f688a38782a72f5b6622a323ed6dca74f6663430f8cb3c0804563ea", "gender": "male_masculine", "accent": "出生地:31 上海市", "age": "teens", "locale": "zh-CN"}}
30
+ {"utt_id": "fsicoli_common_voice_22_0_00029", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00029.wav", "ref_text": "洛莱塔是位于美国加利福尼亚州洪堡县的一个人口普查指定地区。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "99a4cee094a7058f27e615982d793da9039f8916c4cb0934eafecb601214cb89657ddee22f688a38782a72f5b6622a323ed6dca74f6663430f8cb3c0804563ea", "gender": "male_masculine", "accent": "出生地:31 上海市", "age": "teens", "locale": "zh-CN"}}
31
+ {"utt_id": "fsicoli_common_voice_22_0_00030", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00030.wav", "ref_text": "许州人。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "99a4cee094a7058f27e615982d793da9039f8916c4cb0934eafecb601214cb89657ddee22f688a38782a72f5b6622a323ed6dca74f6663430f8cb3c0804563ea", "gender": "male_masculine", "accent": "出生地:31 上海市", "age": "teens", "locale": "zh-CN"}}
32
+ {"utt_id": "fsicoli_common_voice_22_0_00031", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00031.wav", "ref_text": "班纳镇区为美国堪萨斯州杰克逊县辖下的镇区。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "99a4cee094a7058f27e615982d793da9039f8916c4cb0934eafecb601214cb89657ddee22f688a38782a72f5b6622a323ed6dca74f6663430f8cb3c0804563ea", "gender": "male_masculine", "accent": "出生地:31 上海市", "age": "teens", "locale": "zh-CN"}}
33
+ {"utt_id": "fsicoli_common_voice_22_0_00032", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00032.wav", "ref_text": "范家庄遗址,位于山东省潍坊市坊子区坊城街道。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "99a4cee094a7058f27e615982d793da9039f8916c4cb0934eafecb601214cb89657ddee22f688a38782a72f5b6622a323ed6dca74f6663430f8cb3c0804563ea", "gender": "male_masculine", "accent": "出生地:31 上海市", "age": "teens", "locale": "zh-CN"}}
34
+ {"utt_id": "fsicoli_common_voice_22_0_00033", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00033.wav", "ref_text": "郭新立,河北安国人,出生于北京,中国教育人物,现任山东大学党委书记。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "99a4cee094a7058f27e615982d793da9039f8916c4cb0934eafecb601214cb89657ddee22f688a38782a72f5b6622a323ed6dca74f6663430f8cb3c0804563ea", "gender": "male_masculine", "accent": "出生地:31 上海市", "age": "teens", "locale": "zh-CN"}}
35
+ {"utt_id": "fsicoli_common_voice_22_0_00034", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00034.wav", "ref_text": "龟山风景区管理处是下辖的一个类似乡级单位。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "22950d9b987d2554c0d7130808cc60fcb5255d92bb579ad138f4da5e2d5fc52b02d4639e4fe708ef5b820a04812fd3f530e3ea93abfac3e55c8dc2ad22696403", "gender": "", "accent": "", "age": "", "locale": "zh-CN"}}
36
+ {"utt_id": "fsicoli_common_voice_22_0_00035", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00035.wav", "ref_text": "后来他随着李成栋反正,历任光禄卿、户部右侍郎,兵部左侍郎,永历二年晋兵部尚书。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "22950d9b987d2554c0d7130808cc60fcb5255d92bb579ad138f4da5e2d5fc52b02d4639e4fe708ef5b820a04812fd3f530e3ea93abfac3e55c8dc2ad22696403", "gender": "", "accent": "", "age": "", "locale": "zh-CN"}}
37
+ {"utt_id": "fsicoli_common_voice_22_0_00036", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00036.wav", "ref_text": "同年加入中国人民解放军。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "22950d9b987d2554c0d7130808cc60fcb5255d92bb579ad138f4da5e2d5fc52b02d4639e4fe708ef5b820a04812fd3f530e3ea93abfac3e55c8dc2ad22696403", "gender": "", "accent": "", "age": "", "locale": "zh-CN"}}
38
+ {"utt_id": "fsicoli_common_voice_22_0_00037", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00037.wav", "ref_text": "由马可、许亚军领衔主演,并由岳红、柯蓝、王策、孙爽联合主演。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "22950d9b987d2554c0d7130808cc60fcb5255d92bb579ad138f4da5e2d5fc52b02d4639e4fe708ef5b820a04812fd3f530e3ea93abfac3e55c8dc2ad22696403", "gender": "", "accent": "", "age": "", "locale": "zh-CN"}}
39
+ {"utt_id": "fsicoli_common_voice_22_0_00038", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00038.wav", "ref_text": "生于崎玉县川越市,女子美术大学肄业。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "22950d9b987d2554c0d7130808cc60fcb5255d92bb579ad138f4da5e2d5fc52b02d4639e4fe708ef5b820a04812fd3f530e3ea93abfac3e55c8dc2ad22696403", "gender": "", "accent": "", "age": "", "locale": "zh-CN"}}
40
+ {"utt_id": "fsicoli_common_voice_22_0_00039", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00039.wav", "ref_text": "旧福布斯敦是位于美国加利福尼亚州比尤特县的一个非建制地区。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "22950d9b987d2554c0d7130808cc60fcb5255d92bb579ad138f4da5e2d5fc52b02d4639e4fe708ef5b820a04812fd3f530e3ea93abfac3e55c8dc2ad22696403", "gender": "", "accent": "", "age": "", "locale": "zh-CN"}}
41
+ {"utt_id": "fsicoli_common_voice_22_0_00040", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00040.wav", "ref_text": "大厅供穆斯林祈祷,这也是他们见面以结束禁食的地方。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "22950d9b987d2554c0d7130808cc60fcb5255d92bb579ad138f4da5e2d5fc52b02d4639e4fe708ef5b820a04812fd3f530e3ea93abfac3e55c8dc2ad22696403", "gender": "", "accent": "", "age": "", "locale": "zh-CN"}}
42
+ {"utt_id": "fsicoli_common_voice_22_0_00041", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00041.wav", "ref_text": "我们就没办法改善", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "22950d9b987d2554c0d7130808cc60fcb5255d92bb579ad138f4da5e2d5fc52b02d4639e4fe708ef5b820a04812fd3f530e3ea93abfac3e55c8dc2ad22696403", "gender": "", "accent": "", "age": "", "locale": "zh-CN"}}
43
+ {"utt_id": "fsicoli_common_voice_22_0_00042", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00042.wav", "ref_text": "四号镇区是位于美国阿肯色州本顿县的一个镇区。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "22950d9b987d2554c0d7130808cc60fcb5255d92bb579ad138f4da5e2d5fc52b02d4639e4fe708ef5b820a04812fd3f530e3ea93abfac3e55c8dc2ad22696403", "gender": "", "accent": "", "age": "", "locale": "zh-CN"}}
44
+ {"utt_id": "fsicoli_common_voice_22_0_00043", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00043.wav", "ref_text": "格梅林后来出版了若干本关于化学、制药科学、矿物学和植物学的教科书。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "22950d9b987d2554c0d7130808cc60fcb5255d92bb579ad138f4da5e2d5fc52b02d4639e4fe708ef5b820a04812fd3f530e3ea93abfac3e55c8dc2ad22696403", "gender": "", "accent": "", "age": "", "locale": "zh-CN"}}
45
+ {"utt_id": "fsicoli_common_voice_22_0_00044", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00044.wav", "ref_text": "同年获选澳门十大杰出运动员,是首位获奖的篮球员。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "3c71635420e0de3a0272e28a63d340dbaaeb5d99e246668955f38c25279dfdbbd8eec8cc8663601fe11d6cfd81a45f9a2e8a5d55379220fe71d24a00bee0effb", "gender": "male_masculine", "accent": "出生地:42 湖北省", "age": "thirties", "locale": "zh-CN"}}
46
+ {"utt_id": "fsicoli_common_voice_22_0_00045", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00045.wav", "ref_text": "阿尔德斯普林斯是位于美国加利福尼亚州弗雷斯诺县的一个非建制地区。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "3c71635420e0de3a0272e28a63d340dbaaeb5d99e246668955f38c25279dfdbbd8eec8cc8663601fe11d6cfd81a45f9a2e8a5d55379220fe71d24a00bee0effb", "gender": "male_masculine", "accent": "出生地:42 湖北省", "age": "thirties", "locale": "zh-CN"}}
47
+ {"utt_id": "fsicoli_common_voice_22_0_00046", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00046.wav", "ref_text": "巴特勒是位于美国亚利桑那州莫哈维县的一个非建制地区。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "3c71635420e0de3a0272e28a63d340dbaaeb5d99e246668955f38c25279dfdbbd8eec8cc8663601fe11d6cfd81a45f9a2e8a5d55379220fe71d24a00bee0effb", "gender": "male_masculine", "accent": "出生地:42 湖北省", "age": "thirties", "locale": "zh-CN"}}
48
+ {"utt_id": "fsicoli_common_voice_22_0_00047", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00047.wav", "ref_text": "最后放弃", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "3c71635420e0de3a0272e28a63d340dbaaeb5d99e246668955f38c25279dfdbbd8eec8cc8663601fe11d6cfd81a45f9a2e8a5d55379220fe71d24a00bee0effb", "gender": "male_masculine", "accent": "出生地:42 湖北省", "age": "thirties", "locale": "zh-CN"}}
49
+ {"utt_id": "fsicoli_common_voice_22_0_00048", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00048.wav", "ref_text": "薄刀峰林场,是下辖的一个类似乡级单位。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "3c71635420e0de3a0272e28a63d340dbaaeb5d99e246668955f38c25279dfdbbd8eec8cc8663601fe11d6cfd81a45f9a2e8a5d55379220fe71d24a00bee0effb", "gender": "male_masculine", "accent": "出生地:42 湖北省", "age": "thirties", "locale": "zh-CN"}}
50
+ {"utt_id": "fsicoli_common_voice_22_0_00049", "audio_uri": "C:\\Users\\hp\\Desktop\\ASR Agent\\ASR_AGENT_\\data\\hf_audio\\fsicoli_common_voice_22_0_00049.wav", "ref_text": "该季他第一次出赛是在九局上担任普林斯·菲尔德的代跑。", "meta": {"dataset_id": "fsicoli/common_voice_22_0", "dataset_config": "zh-CN", "split": "validation", "text_field": "sentence", "sample_rate": 48000, "client_id": "dfacf81ef98f2b80ebf3a932d8c926f7fa65ffaa8dfc35edefc1344d0e4096cc52dd6cd86f2b29d9ae8dc8bf25d4ac3e0fd6133ed370de7f4e6df6d89193c9b4", "gender": "male_masculine", "accent": "出生地:35 福建省", "age": "twenties", "locale": "zh-CN"}}
pipeline/run_all.py CHANGED
@@ -1,24 +1,28 @@
1
  from __future__ import annotations
 
2
  import argparse
3
- from pipeline.run_asr import run_asr
4
  from pipeline.run_analysis import run_analysis
 
5
 
6
 
7
  def main():
8
  ap = argparse.ArgumentParser()
9
  ap.add_argument("--manifest", required=True)
10
- ap.add_argument("--model_name", default="small")
11
  ap.add_argument("--device", default="cpu")
12
- ap.add_argument("--compute_type", default="int8")
13
  ap.add_argument("--llm_model", default="gpt-4.1-mini")
14
  ap.add_argument("--disable_llm", action="store_true")
 
15
  args = ap.parse_args()
16
 
17
  run_id = run_asr(
18
  manifest_path=args.manifest,
19
- model_name=args.model_name,
20
  device=args.device,
21
- compute_type=args.compute_type,
 
22
  )
23
  run_analysis(run_id, llm_enabled=not args.disable_llm, llm_model=args.llm_model)
24
  print(f"Done. Run: runs/{run_id}")
 
1
  from __future__ import annotations
2
+
3
  import argparse
4
+
5
  from pipeline.run_analysis import run_analysis
6
+ from pipeline.run_asr import run_asr
7
 
8
 
9
  def main():
10
  ap = argparse.ArgumentParser()
11
  ap.add_argument("--manifest", required=True)
12
+ ap.add_argument("--model_name", default="openai/whisper-small")
13
  ap.add_argument("--device", default="cpu")
14
+ ap.add_argument("--backend", default="auto", choices=["auto", "whisper_transformers", "qwen3_asr"])
15
  ap.add_argument("--llm_model", default="gpt-4.1-mini")
16
  ap.add_argument("--disable_llm", action="store_true")
17
+ ap.add_argument("--language", default="zh")
18
  args = ap.parse_args()
19
 
20
  run_id = run_asr(
21
  manifest_path=args.manifest,
22
+ model_repo_id=args.model_name,
23
  device=args.device,
24
+ asr_config={"language": args.language},
25
+ backend=args.backend,
26
  )
27
  run_analysis(run_id, llm_enabled=not args.disable_llm, llm_model=args.llm_model)
28
  print(f"Done. Run: runs/{run_id}")
pipeline/run_asr.py CHANGED
@@ -1,18 +1,19 @@
1
  from __future__ import annotations
2
 
3
  import argparse
 
4
  import json
5
  from pathlib import Path
6
  from typing import Dict, Optional
 
7
  from tqdm import tqdm
8
 
9
- from core.io import load_manifest, append_jsonl, read_jsonl
 
 
10
  from core.schemas import ASRConfig
11
- from core.utils import stable_hash, ensure_dir
12
 
13
- from adapters.whisper_transformers import TransformersWhisperAdapter
14
-
15
- import hashlib
16
 
17
  def file_md5(path: str) -> str:
18
  h = hashlib.md5()
@@ -22,23 +23,51 @@ def file_md5(path: str) -> str:
22
  return h.hexdigest()[:8]
23
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def run_asr(
26
  manifest_path: str,
27
  out_root: str = "runs",
28
  model_repo_id: str = "openai/whisper-small",
29
  device: str = "cpu",
30
  asr_config: Optional[Dict] = None,
 
31
  ) -> str:
32
  asr_config = asr_config or {}
33
  cfg = ASRConfig(**asr_config)
34
 
35
- adapter = TransformersWhisperAdapter(model_name=model_repo_id, device=device)
 
36
  model_info = adapter.model_info()
37
 
38
  manifest_hash = file_md5(manifest_path)
39
- run_id = f"whisper_{model_repo_id.split('/')[-1]}_{manifest_hash}_{stable_hash({'model': model_info, 'cfg': cfg.model_dump()})}"
40
 
41
- # run_id = f"whisper_{model_repo_id.split('/')[-1]}_{stable_hash({'model': model_info, 'cfg': cfg.model_dump()})}"
42
  run_dir = Path(out_root) / run_id
43
  ensure_dir(run_dir)
44
 
@@ -47,11 +76,11 @@ def run_asr(
47
  "manifest_path": str(manifest_path),
48
  "model_info": model_info,
49
  "asr_config": cfg.model_dump(),
 
50
  }
51
  (run_dir / "run_meta.json").write_text(json.dumps(run_meta, ensure_ascii=False, indent=2), encoding="utf-8")
52
 
53
  out_path = run_dir / "asr_outputs.jsonl"
54
-
55
  done = set()
56
  if out_path.exists():
57
  for r in read_jsonl(out_path):
@@ -77,6 +106,7 @@ def main():
77
  ap.add_argument("--model_repo_id", default="openai/whisper-small")
78
  ap.add_argument("--device", default="cpu")
79
  ap.add_argument("--language", default="zh")
 
80
  args = ap.parse_args()
81
 
82
  run_id = run_asr(
@@ -85,6 +115,7 @@ def main():
85
  model_repo_id=args.model_repo_id,
86
  device=args.device,
87
  asr_config={"language": args.language},
 
88
  )
89
  print(run_id)
90
 
 
1
  from __future__ import annotations
2
 
3
  import argparse
4
+ import hashlib
5
  import json
6
  from pathlib import Path
7
  from typing import Dict, Optional
8
+
9
  from tqdm import tqdm
10
 
11
+ from adapters.qwen3_asr import Qwen3ASRAdapter
12
+ from adapters.whisper_transformers import TransformersWhisperAdapter
13
+ from core.io import append_jsonl, load_manifest, read_jsonl
14
  from core.schemas import ASRConfig
15
+ from core.utils import ensure_dir, stable_hash
16
 
 
 
 
17
 
18
  def file_md5(path: str) -> str:
19
  h = hashlib.md5()
 
23
  return h.hexdigest()[:8]
24
 
25
 
26
+ def resolve_backend(model_repo_id: str, backend: Optional[str] = None) -> str:
27
+ if backend and backend != "auto":
28
+ return backend
29
+ model_id = (model_repo_id or "").lower()
30
+ if "qwen3-asr" in model_id:
31
+ return "qwen3_asr"
32
+ return "whisper_transformers"
33
+
34
+
35
+ def build_adapter(
36
+ model_repo_id: str,
37
+ device: str,
38
+ backend: str,
39
+ asr_config: ASRConfig,
40
+ ):
41
+ if backend == "qwen3_asr":
42
+ forced_aligner = "Qwen/Qwen3-ForcedAligner-0.6B" if asr_config.chunk_length_s else None
43
+ return Qwen3ASRAdapter(
44
+ model_name=model_repo_id,
45
+ device=device,
46
+ forced_aligner=forced_aligner,
47
+ )
48
+ if backend == "whisper_transformers":
49
+ return TransformersWhisperAdapter(model_name=model_repo_id, device=device)
50
+ raise ValueError(f"Unsupported backend: {backend}")
51
+
52
+
53
  def run_asr(
54
  manifest_path: str,
55
  out_root: str = "runs",
56
  model_repo_id: str = "openai/whisper-small",
57
  device: str = "cpu",
58
  asr_config: Optional[Dict] = None,
59
+ backend: str = "auto",
60
  ) -> str:
61
  asr_config = asr_config or {}
62
  cfg = ASRConfig(**asr_config)
63
 
64
+ resolved_backend = resolve_backend(model_repo_id, backend)
65
+ adapter = build_adapter(model_repo_id=model_repo_id, device=device, backend=resolved_backend, asr_config=cfg)
66
  model_info = adapter.model_info()
67
 
68
  manifest_hash = file_md5(manifest_path)
69
+ run_id = f"{resolved_backend}_{model_repo_id.split('/')[-1]}_{manifest_hash}_{stable_hash({'model': model_info, 'cfg': cfg.model_dump()})}"
70
 
 
71
  run_dir = Path(out_root) / run_id
72
  ensure_dir(run_dir)
73
 
 
76
  "manifest_path": str(manifest_path),
77
  "model_info": model_info,
78
  "asr_config": cfg.model_dump(),
79
+ "backend": resolved_backend,
80
  }
81
  (run_dir / "run_meta.json").write_text(json.dumps(run_meta, ensure_ascii=False, indent=2), encoding="utf-8")
82
 
83
  out_path = run_dir / "asr_outputs.jsonl"
 
84
  done = set()
85
  if out_path.exists():
86
  for r in read_jsonl(out_path):
 
106
  ap.add_argument("--model_repo_id", default="openai/whisper-small")
107
  ap.add_argument("--device", default="cpu")
108
  ap.add_argument("--language", default="zh")
109
+ ap.add_argument("--backend", default="auto", choices=["auto", "whisper_transformers", "qwen3_asr"])
110
  args = ap.parse_args()
111
 
112
  run_id = run_asr(
 
115
  model_repo_id=args.model_repo_id,
116
  device=args.device,
117
  asr_config={"language": args.language},
118
+ backend=args.backend,
119
  )
120
  print(run_id)
121
 
requirements.txt CHANGED
@@ -22,4 +22,5 @@ soundfile
22
  librosa
23
  pydantic>=2.0
24
  opencc-python-reimplemented
25
- openai>=1.30.0
 
 
22
  librosa
23
  pydantic>=2.0
24
  opencc-python-reimplemented
25
+ openai>=1.30.0
26
+ qwen-asr
scripts/run_hf_job.py CHANGED
@@ -184,6 +184,7 @@ def main():
184
  ap.add_argument("--num", type=int, default=50)
185
 
186
  ap.add_argument("--model_repo_id", required=True)
 
187
  ap.add_argument("--language", default="zh")
188
 
189
  ap.add_argument("--out_root", default="runs")
@@ -215,6 +216,7 @@ def main():
215
  model_repo_id=args.model_repo_id,
216
  device="cpu",
217
  asr_config={"language": args.language},
 
218
  )
219
  print(f" - ASR done. run_id={run_id}")
220
 
 
184
  ap.add_argument("--num", type=int, default=50)
185
 
186
  ap.add_argument("--model_repo_id", required=True)
187
+ ap.add_argument("--backend", default="auto", choices=["auto", "whisper_transformers", "qwen3_asr"])
188
  ap.add_argument("--language", default="zh")
189
 
190
  ap.add_argument("--out_root", default="runs")
 
216
  model_repo_id=args.model_repo_id,
217
  device="cpu",
218
  asr_config={"language": args.language},
219
+ backend=args.backend,
220
  )
221
  print(f" - ASR done. run_id={run_id}")
222
 
ui/app.py CHANGED
@@ -40,13 +40,54 @@ def _read_jsonl(path: Path):
40
  return rows
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def _normalize_semantic_df(df: pd.DataFrame) -> pd.DataFrame:
44
  if df is None or len(df) == 0:
45
  return pd.DataFrame()
46
  out = df.copy()
47
  for col in ["semantic_error_types", "improvement_suggestions"]:
48
  if col in out.columns:
49
- out[col] = out[col].apply(lambda xs: xs if isinstance(xs, list) else ([] if pd.isna(xs) else [str(xs)]))
50
  if "semantic_error_types" in out.columns and "semantic_error_types_str" not in out.columns:
51
  out["semantic_error_types_str"] = out["semantic_error_types"].apply(lambda xs: " | ".join(xs))
52
  if "improvement_suggestions" in out.columns and "improvement_suggestions_str" not in out.columns:
@@ -233,7 +274,28 @@ def search_semantic(run_id, judgement, severity, business_impact, semantic_type,
233
  return q[cols].head(300)
234
 
235
 
236
- def run_hf_job(dataset_id, dataset_config, split, text_field, model_repo_id, language, num_samples):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  cmd = [
238
  sys.executable,
239
  "scripts/run_hf_job.py",
@@ -241,6 +303,7 @@ def run_hf_job(dataset_id, dataset_config, split, text_field, model_repo_id, lan
241
  "--split", split.strip(),
242
  "--text_field", text_field.strip(),
243
  "--model_repo_id", model_repo_id.strip(),
 
244
  "--language", language.strip(),
245
  "--num", str(int(num_samples)),
246
  ]
@@ -250,6 +313,7 @@ def run_hf_job(dataset_id, dataset_config, split, text_field, model_repo_id, lan
250
  p = subprocess.run(cmd, capture_output=True, text=True)
251
  out = (p.stdout or "") + ("\n" + (p.stderr or "") if p.stderr else "")
252
  if p.returncode != 0:
 
253
  out += "\n\n[HINT] If you see 401/403 for Common Voice: set HF_TOKEN in Space Settings → Secrets, and accept dataset terms on HF."
254
  empty = pd.DataFrame()
255
  return out, gr.update(), "", empty, empty, empty, "", "No diagnostic report yet.", gr.update(), gr.update()
@@ -261,6 +325,7 @@ def run_hf_job(dataset_id, dataset_config, split, text_field, model_repo_id, lan
261
  else:
262
  md, align_view, events_view, semantic_view, semantic_md, diagnostic_text, type_dd, domain_dd = "", pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), "", "No diagnostic report yet.", gr.update(), gr.update()
263
 
 
264
  return out, gr.update(choices=runs, value=latest), md, align_view, events_view, semantic_view, semantic_md, diagnostic_text, type_dd, domain_dd
265
 
266
 
@@ -269,7 +334,7 @@ with gr.Blocks() as demo:
269
 
270
  with gr.Accordion("Run from Hugging Face", open=True):
271
  gr.Markdown(
272
- "Fill in a dataset and a Whisper model, then click **Run**. "
273
  "If the dataset is gated, set `HF_TOKEN` in Space **Settings → Secrets**. "
274
  "For LLM semantic diagnostics, make sure `OPENAI_API_KEY` is available."
275
  )
@@ -282,8 +347,10 @@ with gr.Blocks() as demo:
282
  num_samples = gr.Number(label="Num samples", value=50, precision=0)
283
  with gr.Row():
284
  model_repo_id = gr.Textbox(label="HF model repo id", value="openai/whisper-small")
 
285
  language = gr.Textbox(label="Language", value="zh")
286
  run_btn = gr.Button("Run")
 
287
  logs = gr.Textbox(label="Logs", lines=16)
288
 
289
  gr.Markdown("## Browse Existing Runs")
@@ -336,6 +403,8 @@ with gr.Blocks() as demo:
336
  semantic_type.choices = type0["choices"]
337
  semantic_domain.choices = domain0["choices"]
338
 
 
 
339
  run_dd.change(
340
  on_select_run,
341
  inputs=[run_dd],
@@ -352,6 +421,6 @@ with gr.Blocks() as demo:
352
 
353
  run_btn.click(
354
  run_hf_job,
355
- inputs=[dataset_id, dataset_config, split, text_field, model_repo_id, language, num_samples],
356
  outputs=[logs, run_dd, summary_md, align_tbl, events_tbl, semantic_tbl, semantic_overview_md, diagnostic_md, semantic_type, semantic_domain],
357
  )
 
40
  return rows
41
 
42
 
43
+ def _normalize_semantic_cell(xs):
44
+ def _clean_seq(seq):
45
+ out = []
46
+ for x in seq:
47
+ if x is None:
48
+ continue
49
+ try:
50
+ na = pd.isna(x)
51
+ if isinstance(na, bool) and na:
52
+ continue
53
+ except Exception:
54
+ pass
55
+ s = str(x).strip()
56
+ if s:
57
+ out.append(s)
58
+ return out
59
+
60
+ if xs is None:
61
+ return []
62
+ if isinstance(xs, (list, tuple, set)):
63
+ return _clean_seq(xs)
64
+ if hasattr(xs, "tolist") and not isinstance(xs, (str, bytes, dict)):
65
+ try:
66
+ arr = xs.tolist()
67
+ if isinstance(arr, (list, tuple, set)):
68
+ return _clean_seq(arr)
69
+ if arr is None:
70
+ return []
71
+ xs = arr
72
+ except Exception:
73
+ pass
74
+ try:
75
+ na = pd.isna(xs)
76
+ if isinstance(na, bool) and na:
77
+ return []
78
+ except Exception:
79
+ pass
80
+ s = str(xs).strip()
81
+ return [s] if s else []
82
+
83
+
84
  def _normalize_semantic_df(df: pd.DataFrame) -> pd.DataFrame:
85
  if df is None or len(df) == 0:
86
  return pd.DataFrame()
87
  out = df.copy()
88
  for col in ["semantic_error_types", "improvement_suggestions"]:
89
  if col in out.columns:
90
+ out[col] = out[col].apply(_normalize_semantic_cell)
91
  if "semantic_error_types" in out.columns and "semantic_error_types_str" not in out.columns:
92
  out["semantic_error_types_str"] = out["semantic_error_types"].apply(lambda xs: " | ".join(xs))
93
  if "improvement_suggestions" in out.columns and "improvement_suggestions_str" not in out.columns:
 
274
  return q[cols].head(300)
275
 
276
 
277
+ def apply_backend_preset(backend, model_repo_id, language):
278
+ backend = str(backend or "auto").strip()
279
+ model_repo_id = str(model_repo_id or "").strip()
280
+ language = str(language or "").strip()
281
+ if backend == "qwen3_asr":
282
+ if (not model_repo_id) or ("qwen3-asr" not in model_repo_id.lower()):
283
+ model_repo_id = "Qwen/Qwen3-ASR-0.6B"
284
+ if not language:
285
+ language = "zh"
286
+ info = "Qwen3-ASR 已启用。建议模型:Qwen/Qwen3-ASR-0.6B 或 Qwen/Qwen3-ASR-1.7B。若环境未安装 qwen-asr,任务会失败。"
287
+ return model_repo_id, language, info
288
+ if backend == "whisper_transformers":
289
+ if (not model_repo_id) or ("whisper" not in model_repo_id.lower()):
290
+ model_repo_id = "openai/whisper-small"
291
+ info = "Whisper Transformers 已启用。"
292
+ return model_repo_id, language or "zh", info
293
+ info = "backend=auto:会根据模型名自动选择适配器;模型名包含 qwen3-asr 时会走 Qwen3-ASR Adapter。"
294
+ return model_repo_id or "openai/whisper-small", language or "zh", info
295
+
296
+
297
+ def run_hf_job(dataset_id, dataset_config, split, text_field, model_repo_id, backend, language, num_samples):
298
+ model_repo_id, language, preset_info = apply_backend_preset(backend, model_repo_id, language)
299
  cmd = [
300
  sys.executable,
301
  "scripts/run_hf_job.py",
 
303
  "--split", split.strip(),
304
  "--text_field", text_field.strip(),
305
  "--model_repo_id", model_repo_id.strip(),
306
+ "--backend", str(backend).strip(),
307
  "--language", language.strip(),
308
  "--num", str(int(num_samples)),
309
  ]
 
313
  p = subprocess.run(cmd, capture_output=True, text=True)
314
  out = (p.stdout or "") + ("\n" + (p.stderr or "") if p.stderr else "")
315
  if p.returncode != 0:
316
+ out = preset_info + "\n\n" + out
317
  out += "\n\n[HINT] If you see 401/403 for Common Voice: set HF_TOKEN in Space Settings → Secrets, and accept dataset terms on HF."
318
  empty = pd.DataFrame()
319
  return out, gr.update(), "", empty, empty, empty, "", "No diagnostic report yet.", gr.update(), gr.update()
 
325
  else:
326
  md, align_view, events_view, semantic_view, semantic_md, diagnostic_text, type_dd, domain_dd = "", pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), "", "No diagnostic report yet.", gr.update(), gr.update()
327
 
328
+ out = preset_info + "\n\n" + out
329
  return out, gr.update(choices=runs, value=latest), md, align_view, events_view, semantic_view, semantic_md, diagnostic_text, type_dd, domain_dd
330
 
331
 
 
334
 
335
  with gr.Accordion("Run from Hugging Face", open=True):
336
  gr.Markdown(
337
+ "Fill in a dataset and an ASR model, then click **Run**. "
338
  "If the dataset is gated, set `HF_TOKEN` in Space **Settings → Secrets**. "
339
  "For LLM semantic diagnostics, make sure `OPENAI_API_KEY` is available."
340
  )
 
347
  num_samples = gr.Number(label="Num samples", value=50, precision=0)
348
  with gr.Row():
349
  model_repo_id = gr.Textbox(label="HF model repo id", value="openai/whisper-small")
350
+ backend = gr.Dropdown(label="ASR backend", choices=["auto", "whisper_transformers", "qwen3_asr"], value="auto")
351
  language = gr.Textbox(label="Language", value="zh")
352
  run_btn = gr.Button("Run")
353
+ backend_info = gr.Markdown("backend=auto:会根据模型名自动选择适配器;模型名包含 qwen3-asr 时会走 Qwen3-ASR Adapter。")
354
  logs = gr.Textbox(label="Logs", lines=16)
355
 
356
  gr.Markdown("## Browse Existing Runs")
 
403
  semantic_type.choices = type0["choices"]
404
  semantic_domain.choices = domain0["choices"]
405
 
406
+ backend.change(apply_backend_preset, inputs=[backend, model_repo_id, language], outputs=[model_repo_id, language, backend_info])
407
+
408
  run_dd.change(
409
  on_select_run,
410
  inputs=[run_dd],
 
421
 
422
  run_btn.click(
423
  run_hf_job,
424
+ inputs=[dataset_id, dataset_config, split, text_field, model_repo_id, backend, language, num_samples],
425
  outputs=[logs, run_dd, summary_md, align_tbl, events_tbl, semantic_tbl, semantic_overview_md, diagnostic_md, semantic_type, semantic_domain],
426
  )