效率优化

2025-11-29 09:59:12 +08:00
parent 56b6e67180
commit 6ee8e976cf
4 changed files with 842 additions and 28 deletions
--- a/vetti-common/src/main/java/com/vetti/common/ai/elevenLabs/ElevenLabsStreamClient.java
+++ b/vetti-common/src/main/java/com/vetti/common/ai/elevenLabs/ElevenLabsStreamClient.java
@@ -19,10 +19,7 @@ import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;

 import javax.websocket.Session;
-import java.io.ByteArrayInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
+import java.io.*;
 import java.nio.ByteBuffer;
 import java.util.HashMap;
 import java.util.Map;
@@ -51,8 +48,8 @@ public class ElevenLabsStreamClient {
     * @param voiceId        语音ID (可从ElevenLabs网站获取)
     * @throws IOException 网络请求或文件操作异常
     */
-    private void textToSpeech(String text, String voiceId, CloseableHttpClient httpClient, Session session) throws IOException {
-        HttpPost httpPost = new HttpPost(BASE_URL + "/text-to-speech/" + voiceId+"/stream?output_format=mp3_24000_48&optimize_streaming_latency=2");
+    private void textToSpeech(String text, String voiceId, CloseableHttpClient httpClient, Session session,String outputFormat) throws IOException {
+        HttpPost httpPost = new HttpPost(BASE_URL + "/text-to-speech/" + voiceId+"/stream?output_format="+outputFormat);
        httpPost.setHeader("xi-api-key", apiKey);
        httpPost.setHeader("Content-Type", "application/json");

@@ -67,25 +64,48 @@ public class ElevenLabsStreamClient {
            HttpEntity responseEntity = response.getEntity();
            if (responseEntity != null) {
                try (InputStream inputStream = responseEntity.getContent();) {
-//                    byte[] allData = inputStream.readAllBytes();
-//                    InputStream stableStream = new ByteArrayInputStream(allData);
-//                    sendAudioStream(session,stableStream);
+                    //用来合并零散的碎片
+                    ByteArrayOutputStream smallChunkBuffer = new ByteArrayOutputStream();  //
                    byte[] buffer = new byte[4096];
                    int bytesRead;
+                    int n = 0;
                    while ((bytesRead = inputStream.read(buffer)) != -1) {
-                        ByteBuffer byteBuffer = ByteBuffer.wrap(buffer, 0, bytesRead);
-//                        log.info("字符流的长度大小:{}", bytesRead);
-                        if(bytesRead != 1 && bytesRead != 2){
-                            session.getAsyncRemote().sendBinary(byteBuffer);
+                        //语音流合并到2KB左右进行发送
+                        if(smallChunkBuffer.size() >= 3072){
+                            log.info("语音流大于"+smallChunkBuffer.size()+"啦,发送完成!!!");
+                            byte[] merged = smallChunkBuffer.toByteArray();
+                            smallChunkBuffer.reset();
+                            session.getAsyncRemote().sendBinary(ByteBuffer.wrap(merged));
                            try {
-                                Thread.sleep(20);
+                                Thread.sleep(50);
                            }catch (Exception e){}
-//                            log.info("正常语音发送出去语音流啦!!!");
                        }
+                        //发送三次告诉前端要合成一次语音
+//                        if(n == 2){
+//                            Map<String,String> dataText = new HashMap<>();
+//                            dataText.put("type","voiceMiddleEnd");
+//                            dataText.put("content","");
+//                            session.getBasicRemote().sendText(JSONUtil.toJsonStr(dataText));
+//                            //重置一下
+//                            n = 0;
+//                        }
+                        // 零散的碎片 → 加入缓冲区，不立即发送
+                        smallChunkBuffer.write(buffer, 0, bytesRead);
+                        n++;
+                    }
+                    //都加完缓冲区,最最后一次发送
+                    if(smallChunkBuffer.size() > 2){
+                        log.info("最后一次发送,语音流大于"+smallChunkBuffer.size()+"啦,发送完成!!!");
+                        byte[] merged = smallChunkBuffer.toByteArray();
+                        smallChunkBuffer.reset();
+                        session.getAsyncRemote().sendBinary(ByteBuffer.wrap(merged));
+                        try {
+                            Thread.sleep(50);
+                        }catch (Exception e){}
                    }
                    //返回结束点
                    try {
-                        Thread.sleep(100);
+                        Thread.sleep(50);
                    }catch (Exception e){}
                    Map<String,String> dataText = new HashMap<>();
                    dataText.put("type","voiceEnd");
@@ -120,13 +140,13 @@ public class ElevenLabsStreamClient {
     * @param inputText
     * @return
     */
-    public void handleTextToVoice(String inputText,Session session){
+    public void handleTextToVoice(String inputText,Session session,String outputFormat){
        CloseableHttpClient httpClient = HttpClients.createDefault();
        try {
            // 使用第一个可用语音进行文本转语音(澳洲本地女声)
 //            String firstVoiceId = "56bWURjYFHyYyVf490Dp";
            String firstVoiceId = "56bWURjYFHyYyVf490Dp";
-            textToSpeech(inputText, firstVoiceId,httpClient,session);
+            textToSpeech(inputText, firstVoiceId,httpClient,session,outputFormat);
        } catch (IOException e) {
            e.printStackTrace();
        } finally {