50 lines
1.3 KiB
Python
50 lines
1.3 KiB
Python
|
|
"""
|
||
|
|
ASR 配置文件
|
||
|
|
定义语音识别的默认参数
|
||
|
|
"""
|
||
|
|
|
||
|
|
# ============================================================
|
||
|
|
# 模型配置
|
||
|
|
# ============================================================
|
||
|
|
|
||
|
|
# 默认模型
|
||
|
|
# qwen3-asr-flash: 短音频 (≤5分钟)
|
||
|
|
# qwen3-asr-flash-filetrans: 长音频 (≤12小时)
|
||
|
|
MODEL = 'qwen3-asr-flash'
|
||
|
|
|
||
|
|
# API URL (北京地域)
|
||
|
|
API_URL = 'https://dashscope.aliyuncs.com/api/v1'
|
||
|
|
|
||
|
|
# 新加坡地域 URL (备用)
|
||
|
|
# API_URL = 'https://dashscope-intl.aliyuncs.com/api/v1'
|
||
|
|
|
||
|
|
|
||
|
|
# ============================================================
|
||
|
|
# 识别参数
|
||
|
|
# ============================================================
|
||
|
|
|
||
|
|
# 语言 (可选值: 'zh', 'en', 'ja', 'ko', 'de', 'fr', 'ru', 'es', 'it', 'pt', 'ar', 等)
|
||
|
|
# None 表示自动检测
|
||
|
|
LANGUAGE = None
|
||
|
|
|
||
|
|
# 是否启用 ITN (Inverse Text Normalization)
|
||
|
|
# 将口语数字转为书面形式,如"一百二十三"→"123"
|
||
|
|
ENABLE_ITN = False
|
||
|
|
|
||
|
|
|
||
|
|
# ============================================================
|
||
|
|
# 支持的音频格式
|
||
|
|
# ============================================================
|
||
|
|
|
||
|
|
SUPPORTED_FORMATS = [
|
||
|
|
'aac', 'amr', 'avi', 'aiff', 'flac', 'flv',
|
||
|
|
'm4a', 'mkv', 'mp3', 'mpeg', 'ogg', 'opus',
|
||
|
|
'wav', 'webm', 'wma', 'wmv'
|
||
|
|
]
|
||
|
|
|
||
|
|
# 最大文件大小 (字节) - 10MB
|
||
|
|
MAX_FILE_SIZE = 10 * 1024 * 1024
|
||
|
|
|
||
|
|
# 最大音频时长 (秒) - 5分钟
|
||
|
|
MAX_DURATION = 5 * 60
|