perf: ⚡ 双向流式

2026-01-01 21:14:17 +08:00 · 2026-01-01 21:14:17 +08:00 · 48fe2f37ae
commit 48fe2f37ae
parent 7f9ae0e036
5 changed files with 195 additions and 0 deletions
--- a/src/Module/tts/tts.py
+++ b/src/Module/tts/tts.py
@ -267,6 +267,113 @@ class StreamingTTS:
                pass
            self._client = None

+    def stream_from_generator(
+        self,
+        text_generator: Generator[str, None, None],
+        voice: str = None,
+        language: str = None,
+        speech_rate: float = None,
+        volume: int = None,
+        pitch_rate: float = None,
+    ) -> Generator[AudioChunk, None, None]:
+        """
+        双向流式合成 - 边发送文本边接收音频
+
+        Args:
+            text_generator: 文本生成器，逐块产生文本
+            voice: 音色
+            language: 语言
+            speech_rate: 语速
+            volume: 音量
+            pitch_rate: 语调
+
+        Yields:
+            AudioChunk: 音频数据块
+
+        Example:
+            >>> def get_text():
+            ...     yield "你好，"
+            ...     yield "世界！"
+            >>> tts = StreamingTTS()
+            >>> for chunk in tts.stream_from_generator(get_text()):
+            ...     play_audio(chunk.data)
+        """
+        _voice = voice or self.voice
+        _language = language or self.language
+        _speech_rate = self._clamp(speech_rate or self.speech_rate, 0.5, 2.0)
+        _volume = self._clamp(volume or self.volume, 0, 100)
+        _pitch_rate = self._clamp(pitch_rate or self.pitch_rate, 0.5, 2.0)
+
+        self._callback = _StreamingCallback()
+        self._client = QwenTtsRealtime(
+            model=self.model,
+            callback=self._callback,
+            url=config.WS_URL
+        )
+
+        # 启动发送线程
+        send_thread = threading.Thread(
+            target=self._send_text_generator,
+            args=(
+                text_generator,
+                _voice,
+                _language,
+                _speech_rate,
+                _volume,
+                _pitch_rate),
+            daemon=True)
+
+        try:
+            self._client.connect()
+            send_thread.start()
+
+            while True:
+                chunk = self._callback.get_chunk()
+                if chunk is None:
+                    break
+                yield chunk
+                if chunk.is_final:
+                    break
+
+        finally:
+            self._cleanup()
+
+    def _send_text_generator(
+        self,
+        text_generator: Generator[str, None, None],
+        voice: str,
+        language: str,
+        speech_rate: float,
+        volume: int,
+        pitch_rate: float
+    ) -> None:
+        """后台逐块发送文本"""
+        try:
+            if not self._callback.wait_connected():
+                return
+
+            self._client.update_session(
+                voice=voice,
+                response_format=AudioFormat.PCM_24000HZ_MONO_16BIT,
+                mode=config.MODE,
+                language_type=language,
+                speech_rate=speech_rate,
+                volume=volume,
+                pitch_rate=pitch_rate
+            )
+
+            # 逐块发送文本
+            for text_chunk in text_generator:
+                if text_chunk:
+                    self._client.append_text(text_chunk)
+
+            self._client.finish()
+
+        except Exception as e:
+            self._callback._audio_queue.put(
+                AudioChunk(data=b'', is_final=True, error=str(e))
+            )
+
    def stream_to_file(self, text: str, output_file: str, **kwargs) -> bool:
        """
        流式合成并保存到文件
--- a/test/asr/mirror_hello.mp3
+++ b/test/asr/mirror_hello.mp3
--- a/test/tts/output/bidirectional_test.wav
+++ b/test/tts/output/bidirectional_test.wav
--- a/test/tts/output/stream_test.wav
+++ b/test/tts/output/stream_test.wav
--- a/test/tts/test_bidirectional.py
+++ b/test/tts/test_bidirectional.py
@ -0,0 +1,88 @@
+"""
+TTS 双向流测试 - 边发送文本边接收音频
+"""
+
+import time
+from pathlib import Path
+from src.Module.tts.tts import StreamingTTS
+
+
+def simulate_llm_output():
+    """
+    模拟 LLM 流式输出
+    一句话分成多个部分逐步发送
+    """
+    text_chunks = [
+        "你好，",
+        "我是通义千问",
+        "语音合成系统。",
+        "现在正在进行",
+        "双向流测试！"
+    ]
+
+    for chunk in text_chunks:
+        print(f"  [发送文本] -> {chunk}")
+        yield chunk
+        time.sleep(0.1)  # 模拟 LLM 生成延迟
+
+
+def test_bidirectional_stream():
+    """测试双向流式合成"""
+    print("=" * 60)
+    print("     TTS 双向流测试")
+    print("=" * 60)
+    print("\n一句话分为多个部分发送，同时接收音频\n")
+
+    tts = StreamingTTS(voice='Cherry')
+
+    total_bytes = 0
+    chunk_count = 0
+    audio_data = bytearray()
+
+    print("开始双向流传输...\n")
+
+    # 使用 stream_from_generator 实现双向流
+    for audio_chunk in tts.stream_from_generator(simulate_llm_output()):
+        if audio_chunk.error:
+            print(f"  [错误] {audio_chunk.error}")
+            break
+
+        if audio_chunk.data:
+            chunk_count += 1
+            total_bytes += len(audio_chunk.data)
+            audio_data.extend(audio_chunk.data)
+            print(
+                f"  [收到音频] Chunk {
+                    chunk_count:02d}: {
+                    len(
+                        audio_chunk.data):5d} 字节 | 累计: {
+                    total_bytes:6d} 字节")
+
+        if audio_chunk.is_final:
+            print("\n传输完成!")
+            break
+
+    # 保存音频
+    output_dir = Path(__file__).parent / 'output'
+    output_dir.mkdir(exist_ok=True)
+    wav_file = output_dir / 'bidirectional_test.wav'
+
+    import wave
+    with wave.open(str(wav_file), 'wb') as wav:
+        wav.setnchannels(1)
+        wav.setsampwidth(2)
+        wav.setframerate(24000)
+        wav.writeframes(audio_data)
+
+    print(f"\n结果统计:")
+    print(f"  音频块: {chunk_count} 个")
+    print(f"  总字节: {total_bytes} 字节")
+    print(f"  音频时长: {total_bytes / (24000 * 2):.2f} 秒")
+    print(f"  已保存: {wav_file}")
+
+    return chunk_count > 0
+
+
+if __name__ == '__main__':
+    success = test_bidirectional_stream()
+    print(f"\n测试结果: {'✓ 通过' if success else '✗ 失败'}")