105 lines
2.9 KiB
Python
105 lines
2.9 KiB
Python
"""
|
|
VAD 语音活动检测测试
|
|
使用 TTS 生成的音频文件测试
|
|
"""
|
|
|
|
from pathlib import Path
|
|
from src.Module.vad.vad import SileroVAD, detect_speech_segments
|
|
|
|
|
|
def test_detect_file():
|
|
"""测试文件语音检测"""
|
|
print("=" * 60)
|
|
print(" VAD 语音活动检测测试")
|
|
print("=" * 60)
|
|
|
|
# 使用 TTS 生成的测试音频
|
|
audio_file = Path(__file__).parent.parent / 'tts' / 'output' / 'stream_test.wav'
|
|
|
|
if not audio_file.exists():
|
|
print(f"[跳过] 音频文件不存在: {audio_file}")
|
|
print("请先运行 TTS 测试生成音频文件")
|
|
return False
|
|
|
|
print(f"\n音频文件: {audio_file.name}")
|
|
|
|
# 检测语音段落
|
|
print("\n加载 SileroVAD 模型...")
|
|
segments = detect_speech_segments(
|
|
str(audio_file),
|
|
min_silence_duration_ms=300 # 设置较短的静音阈值
|
|
)
|
|
|
|
print(f"\n检测到 {len(segments)} 个语音段落:")
|
|
for i, seg in enumerate(segments):
|
|
duration = seg.end_ms - seg.start_ms
|
|
print(f" [{i +
|
|
1}] {seg.start_ms /
|
|
1000:.2f}s - {seg.end_ms /
|
|
1000:.2f}s (时长: {duration /
|
|
1000:.2f}s)")
|
|
|
|
return len(segments) > 0
|
|
|
|
|
|
def test_realtime_detection():
|
|
"""测试实时检测"""
|
|
print("\n" + "=" * 60)
|
|
print("实时检测模拟测试")
|
|
print("=" * 60)
|
|
|
|
import wave
|
|
|
|
audio_file = Path(__file__).parent.parent / 'tts' / 'output' / 'stream_test.wav'
|
|
|
|
if not audio_file.exists():
|
|
print(f"[跳过] 音频文件不存在")
|
|
return False
|
|
|
|
# 读取音频
|
|
with wave.open(str(audio_file), 'rb') as wav:
|
|
sample_rate = wav.getframerate()
|
|
audio_data = wav.readframes(wav.getnframes())
|
|
|
|
print(f"\n采样率: {sample_rate} Hz")
|
|
print(f"音频长度: {len(audio_data) / 2 / sample_rate:.2f} 秒")
|
|
|
|
# 创建 VAD
|
|
vad = SileroVAD()
|
|
|
|
# 模拟实时流式检测
|
|
chunk_size = 512 * 2 # 512 samples * 2 bytes
|
|
voice_detected = False
|
|
|
|
print("\n模拟实时流式检测:")
|
|
for i in range(0, len(audio_data), chunk_size):
|
|
chunk = audio_data[i:i + chunk_size]
|
|
if len(chunk) < chunk_size:
|
|
break
|
|
|
|
result = vad.detect(chunk)
|
|
|
|
if result.voice_start:
|
|
print(f" [{i / 2 / sample_rate:.2f}s] 语音开始")
|
|
voice_detected = True
|
|
if result.voice_end:
|
|
print(f" [{i / 2 / sample_rate:.2f}s] 语音结束")
|
|
|
|
return voice_detected
|
|
|
|
|
|
if __name__ == '__main__':
|
|
results = []
|
|
|
|
success1 = test_detect_file()
|
|
results.append(("文件语音检测", success1))
|
|
|
|
success2 = test_realtime_detection()
|
|
results.append(("实时检测模拟", success2))
|
|
|
|
print("\n" + "=" * 60)
|
|
print("测试结果:")
|
|
for name, success in results:
|
|
status = "✓ 通过" if success else "✗ 失败/跳过"
|
|
print(f" {name}: {status}")
|