TTS 返回语音优化

2025-10-19 09:55:32 +08:00
parent 060ba472c7
commit 3cf8fbfe9e
2 changed files with 36 additions and 8 deletions
--- a/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java
+++ b/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java
@@ -15,10 +15,7 @@ import org.apache.commons.io.FileUtils;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;

-import javax.sound.sampled.AudioFormat;
-import javax.sound.sampled.AudioSystem;
-import javax.sound.sampled.DataLine;
-import javax.sound.sampled.TargetDataLine;
+import javax.sound.sampled.*;
 import javax.websocket.*;
 import javax.websocket.server.PathParam;
 import javax.websocket.server.ServerEndpoint;
@@ -31,6 +28,9 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.CountDownLatch;
+import javax.sound.sampled.*;
+import java.io.*;
+import java.nio.ByteBuffer;

 /**
 * 语音面试 web处理器
@@ -202,9 +202,14 @@ public class ChatWebSocketHandler {
 //                webSocket.send("{\"type\": \"input_audio_buffer.start\"}");
                log.info("3.1 开始发送数据音频流啦");
                // 将音频数据转换为 Base64 编码的字符串
-                String base64Audio = Base64.getEncoder().encodeToString(bytes);
+                //进行转换
+                // 转换音频格式
+                AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
+                byte[] outputAudioBytes = convertAudio(bytes, format);
+                String base64Audio = Base64.getEncoder().encodeToString(outputAudioBytes);
                String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
                webSocket.send(message);
+                log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
                // 3. 提交音频并请求转录
 //                webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
 //                webSocket.send("{\"type\": \"response.create\"}");
@@ -212,7 +217,7 @@ public class ChatWebSocketHandler {
        }catch (Exception e){
            e.printStackTrace();
        }
-        log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
+
    }

    // 连接关闭时调用
@@ -228,6 +233,28 @@ public class ChatWebSocketHandler {
        throwable.printStackTrace();
    }

+    public static byte[] convertAudio(byte[] inputAudioBytes, AudioFormat targetFormat) throws Exception {
+        // 将 byte[] 转换为 AudioInputStream
+        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(inputAudioBytes);
+        AudioInputStream inputAudioStream = new AudioInputStream(byteArrayInputStream, targetFormat, inputAudioBytes.length);
+
+        // 创建目标格式的 AudioInputStream
+        AudioInputStream outputAudioStream = AudioSystem.getAudioInputStream(targetFormat, inputAudioStream);
+
+        // 获取输出音频的 byte[]
+        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+        byte[] buffer = new byte[1024];
+        int bytesRead;
+
+        // 从 AudioInputStream 读取数据并写入 ByteArrayOutputStream
+        while ((bytesRead = outputAudioStream.read(buffer)) != -1) {
+            byteArrayOutputStream.write(buffer, 0, bytesRead);
+        }
+
+        // 返回转换后的 byte[]
+        return byteArrayOutputStream.toByteArray();
+    }
+
    /**
     * 将字节数组保存为WebM文件
     *
@@ -297,7 +324,7 @@ public class ChatWebSocketHandler {
    private void createWhisperRealtimeSocket(String clientId){
        try{
            OkHttpClient client = new OkHttpClient();
-            CountDownLatch latch = new CountDownLatch(1);
+//            CountDownLatch latch = new CountDownLatch(1);
            // 设置 WebSocket 请求
            Request request = new Request.Builder()
                    .url(API_URL)
--- a/vetti-admin/src/main/resources/application-druid.yml
+++ b/vetti-admin/src/main/resources/application-druid.yml
@@ -145,7 +145,8 @@ verification:
 # 文本转语音
 elevenLabs:
    baseUrl: https://api.elevenlabs.io/v1
-    apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812
+#    apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812
+    apiKey: sk_88f5a560e1bbde0e5b8b6b6eb1812163a98bfb98554acbec
    modelId: eleven_turbo_v2_5
 # 语音转文本
 whisper: