语音AI分片以及流式处理返回结果

2025-10-16 14:27:20 +08:00
parent 1c816402df
commit a5777d7beb
6 changed files with 285 additions and 56 deletions
--- a/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java
+++ b/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java
@@ -1,7 +1,10 @@
 package com.vetti.socket;

+import cn.hutool.core.util.StrUtil;
+import cn.hutool.json.JSONUtil;
 import com.vetti.common.ai.elevenLabs.ElevenLabsClient;
-import com.vetti.common.ai.gpt.ChatGPTClient;
+import com.vetti.common.ai.gpt.OpenAiStreamClient;
+import com.vetti.common.ai.gpt.service.OpenAiStreamListenerService;
 import com.vetti.common.ai.whisper.WhisperClient;
 import com.vetti.common.config.RuoYiConfig;
 import com.vetti.common.utils.spring.SpringUtils;
@@ -15,9 +18,10 @@ import javax.websocket.server.ServerEndpoint;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;

 /**
 * 语音面试 web处理器
@@ -27,6 +31,11 @@ import java.nio.channels.FileChannel;
@Component
 public class ChatWebSocketHandler {

+    /**
+     * 缓存客户端流式解析的语音文本数据
+     */
+    private final Map<String,String> cacheClientTts = new ConcurrentHashMap<>();
+
    // 语音文件保存目录
    private static final String VOICE_STORAGE_DIR = "/voice_files/";

@@ -51,73 +60,109 @@ public class ChatWebSocketHandler {
    @OnOpen
    public void onOpen(Session session, @PathParam("clientId") String clientId) {
        log.info("WebSocket 链接已建立:{}", clientId);
-        //创建会话
-
+        cacheClientTts.put(clientId,new String());
    }

    // 接收文本消息
    @OnMessage
-    public void onTextMessage(Session session, String message) {
+    public void onTextMessage(Session session, String message,@PathParam("clientId") String clientId) {
        System.out.println("接收到文本消息: " + message);
-        // 可以在这里处理文本流数据
        try {
-            // 发送响应
-            session.getBasicRemote().sendText("已收到文本: " + message);
+            //处理文本结果
+            if(StrUtil.isNotEmpty(message)){
+                Map<String,String> mapResult = JSONUtil.toBean(JSONUtil.parseObj(message),Map.class);
+                String resultFlag = mapResult.get("msg");
+                if("done".equals(resultFlag)){
+                    //语音结束,开始进行回答解析
+                    String cacheResultText = cacheClientTts.get(clientId);
+                    log.info("1、开始进行AI回答时间:{}",System.currentTimeMillis()/1000);
+                    //把提问的文字发送给CPT(流式处理)
+                    OpenAiStreamClient aiStreamClient = SpringUtils.getBean(OpenAiStreamClient.class);
+                    aiStreamClient.streamChat(cacheResultText, new OpenAiStreamListenerService() {
+                        @Override
+                        public void onMessage(String content) {
+                            // 实时输出内容
+                            //开始进行语音输出-流式持续输出
+                            log.info("2、开始进行AI回答时间:{}",System.currentTimeMillis()/1000);
+                            //把结果文字转成语音文件
+                            //生成文件
+                            // 生成唯一文件名
+                            String resultFileName = clientId + "_" + System.currentTimeMillis() + ".opus";
+                            String resultPathUrl = RuoYiConfig.getProfile() + VOICE_STORAGE_RESULT_DIR + resultFileName;
+                            ElevenLabsClient elevenLabsClient = SpringUtils.getBean(ElevenLabsClient.class);
+                            elevenLabsClient.handleTextToVoice(content, resultPathUrl);
+                            log.info("3、开始进行AI回答时间:{}",System.currentTimeMillis()/1000);
+                            //持续返回数据流给客户端
+                            try {
+                                //文件转换成文件流
+                                ByteBuffer outByteBuffer = convertFileToByteBuffer(resultPathUrl);
+                                //发送文件流数据
+                                session.getBasicRemote().sendBinary(outByteBuffer);
+                                // 发送响应确认
+                                log.info("4、开始进行AI回答时间:{}",System.currentTimeMillis()/1000);
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }

+                        @Override
+                        public void onComplete() {
+                            try {
+                                Map<String,String> resultEntity = new HashMap<>();
+                                resultEntity.put("msg","done");
+                                //发送通知告诉客户端已经回答结束了
+                                session.getBasicRemote().sendText(JSONUtil.toJsonStr(resultEntity));
+                            } catch (Exception e) {
+                                throw new RuntimeException(e);
+                            }
+                            log.info("5、结束进行AI回答时间:{}",System.currentTimeMillis()/1000);
+                        }
+
+                        @Override
+                        public void onError(Throwable throwable) {
+                            throwable.printStackTrace();
+                        }
+                    });
+                }
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
    // 接收二进制消息（流数据）
    @OnMessage
    public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
-        try {
-            log.info("1、运行时间:{}",System.currentTimeMillis()/1000);
+        log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
        log.info("客户端ID为:{}", clientId);
        // 处理二进制流数据
        byte[] bytes = new byte[byteBuffer.remaining()];
        //从缓冲区中读取数据并存储到指定的字节数组中
        byteBuffer.get(bytes);
-            log.info("2、运行时间:{}",System.currentTimeMillis()/1000);
+        log.info("2、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
        // 生成唯一文件名
        String fileName = clientId + "_" + System.currentTimeMillis() + ".webm";
        String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName;
-//            String pathUrl = "/Users/wangxiangshun/Desktop/0.8733346782733291.webm";
        log.info("文件路径为:{}", pathUrl);
-            saveAsWebM(bytes, pathUrl);
-            log.info("3、运行时间:{}",System.currentTimeMillis()/1000);
+        log.info("3、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
+        try{
+            //接收到数据流后直接就进行SST处理
            //拿到文件进行文字转换
            WhisperClient whisperClient = SpringUtils.getBean(WhisperClient.class);
-            String resultText = whisperClient.handleVoiceToText(pathUrl);
-            log.info("4、运行时间:{}",System.currentTimeMillis()/1000);
-            //把提问的文字发送给CPT
-            ChatGPTClient chatGPTClient = SpringUtils.getBean(ChatGPTClient.class);
-            String resultMsg = chatGPTClient.handleAiChat(resultText);
-            log.info("5、运行时间:{}",System.currentTimeMillis()/1000);
-            //把结果文字转成语音文件
-            //生成文件
-            // 生成唯一文件名
-            String resultFileName = clientId + "_" + System.currentTimeMillis() + ".opus";
-            String resultPathUrl = RuoYiConfig.getProfile() + VOICE_STORAGE_RESULT_DIR + resultFileName;
-            ElevenLabsClient elevenLabsClient = SpringUtils.getBean(ElevenLabsClient.class);
-            elevenLabsClient.handleTextToVoice(resultMsg, resultPathUrl);
-            log.info("6、运行时间:{}",System.currentTimeMillis()/1000);
-            //把语音文件转换成流,发送给前端
-            System.out.println("接收到二进制数据，长度: " + bytes.length + " bytes");
-            try {
-                //文件转换成文件流
-                ByteBuffer outByteBuffer = convertFileToByteBuffer(resultPathUrl);
-                //发送文件流数据
-                session.getBasicRemote().sendBinary(outByteBuffer);
-                // 发送响应确认
-//                session.getBasicRemote().sendText("已收到二进制数据，长度: " + bytes.length);
-                log.info("7、运行时间:{}",System.currentTimeMillis()/1000);
-            } catch (IOException e) {
-                e.printStackTrace();
+            String resultText = whisperClient.transcribeAudio(bytes,fileName);
+            //进行客户端文本数据存储
+            String cacheString = cacheClientTts.get(clientId);
+            if(StrUtil.isNotEmpty(cacheString)){
+                cacheString = cacheString+resultText;
+            }else {
+                cacheString = resultText;
            }
+            cacheClientTts.put(clientId,cacheString);
+
        }catch (Exception e){
            e.printStackTrace();
        }
+        log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
    }

    // 连接关闭时调用
--- a/vetti-admin/target/classes/application-druid.yml
+++ b/vetti-admin/target/classes/application-druid.yml
@@ -146,7 +146,7 @@ verification:
 elevenLabs:
    baseUrl: https://api.elevenlabs.io/v1
    apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812
-    modelId: eleven_turbo_v2.5
+    modelId: eleven_turbo_v2_5
 # 语音转文本
 whisper:
    apiUrl: https://api.openai.com/v1/audio/transcriptions
--- a/vetti-common/src/main/java/com/vetti/common/ai/elevenLabs/ElevenLabsClient.java
+++ b/vetti-common/src/main/java/com/vetti/common/ai/elevenLabs/ElevenLabsClient.java
@@ -106,8 +106,7 @@ public class ElevenLabsClient {
            VoicesResponse voicesResponse = getVoices(httpClient);
            if (voicesResponse != null && voicesResponse.getVoices() != null
                    && voicesResponse.getVoices().length > 0) {
-                // 使用第一个可用语音进行文本转语音
-//                String firstVoiceId = voicesResponse.getVoices()[0].getVoice_id();
+                // 使用第一个可用语音进行文本转语音(澳洲本地女声)
                String firstVoiceId = "21m00Tcm4TlvDq8ikWAM";
                textToSpeech(inputText, firstVoiceId, outputFile,httpClient);
            }
--- a/vetti-common/src/main/java/com/vetti/common/ai/gpt/OpenAiStreamClient.java
+++ b/vetti-common/src/main/java/com/vetti/common/ai/gpt/OpenAiStreamClient.java
@@ -0,0 +1,121 @@
+package com.vetti.common.ai.gpt;
+
+import cn.hutool.json.JSONObject;
+import cn.hutool.json.JSONUtil;
+import com.vetti.common.ai.gpt.service.OpenAiStreamListenerService;
+import okhttp3.*;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * AI ChatGPT 对话
+ */
+@Component
+public class OpenAiStreamClient {
+
+    @Value("${chatGpt.apiKey}")
+    private String apiKey;
+    // API端点URL
+    @Value("${chatGpt.apiUrl}")
+    private String apiUrl;
+
+    @Value("${chatGpt.model}")
+    private String model;
+
+    @Value("${chatGpt.role}")
+    private String role;
+
+    /**
+     * 发送流式请求
+     *
+     * @param prompt   提示词
+     * @param listener 流式响应监听器
+     */
+    public void streamChat(String prompt, OpenAiStreamListenerService listener) {
+        OkHttpClient client = new OkHttpClient.Builder()
+                .connectTimeout(30, TimeUnit.SECONDS)
+                .readTimeout(60, TimeUnit.SECONDS)
+                .writeTimeout(30, TimeUnit.SECONDS)
+                .build();
+        // 构建请求参数
+        Map<String, Object> requestBody = new HashMap<>();
+        requestBody.put("model", model);
+        requestBody.put("stream", true);
+
+        // 构建消息
+        Map<String, String> message = new HashMap<>();
+        message.put("role", role);
+        message.put("content", prompt);
+        requestBody.put("messages", new Object[]{message});
+
+        // 创建请求
+        Request request = new Request.Builder()
+                .url(apiUrl)
+                .addHeader("Content-Type", "application/json")
+                .addHeader("Authorization", "Bearer " + apiKey)
+                .post(RequestBody.create(JSONUtil.toJsonStr(requestBody),
+                        MediaType.parse("application/json; charset=utf-8")
+                ))
+                .build();
+
+        // 发送异步请求
+        client.newCall(request).enqueue(new Callback() {
+            @Override
+            public void onFailure(Call call, IOException e) {
+                listener.onError(e);
+            }
+
+            @Override
+            public void onResponse(Call call, Response response) throws IOException {
+                if (!response.isSuccessful()) {
+                    listener.onError(new IOException("Unexpected code " + response));
+                    return;
+                }
+                // 处理流式响应
+                ResponseBody body = response.body();
+                if (body == null) {
+                    listener.onError(new IOException("Response body is null"));
+                    return;
+                }
+                try (var source = body.source()) {
+                    // 逐行读取响应
+                    while (!source.exhausted()) {
+                        String line = source.readUtf8Line();
+                        if (line == null || line.isEmpty()) continue;
+                        // 处理SSE格式（去除"data: "前缀）
+                        if (line.startsWith("data: ")) {
+                            String data = line.substring(6).trim();
+                            // 检查是否为结束标记
+                            if (data.equals("[DONE]")) {
+                                listener.onComplete();
+                                break;
+                            }
+                            // 解析JSON获取内容
+                            try {
+                                JSONObject json = JSONUtil.parseObj(data);
+                                String content = json.getJSONArray("choices")
+                                        .getJSONObject(0)
+                                        .getJSONObject("delta")
+                                        .getStr("content");
+
+                                if (content != null && !content.isEmpty()) {
+                                    listener.onMessage(content);
+                                }
+                            } catch (Exception e) {
+                                listener.onError(new IOException("Parse error: " + e.getMessage()));
+                            }
+                        }
+                    }
+                } finally {
+                    response.close();
+                }
+            }
+        });
+    }
+
+}
--- a/vetti-common/src/main/java/com/vetti/common/ai/gpt/service/OpenAiStreamListenerService.java
+++ b/vetti-common/src/main/java/com/vetti/common/ai/gpt/service/OpenAiStreamListenerService.java
@@ -0,0 +1,25 @@
+package com.vetti.common.ai.gpt.service;
+
+/**
+ * AI 流式响应监听器 服务层
+ */
+public interface OpenAiStreamListenerService {
+
+    /**
+     * 返回信息
+     * @param content
+     */
+    public void onMessage(String content);
+
+    /**
+     * 返回完成通知
+     */
+    public void onComplete();
+
+    /**
+     * 异常信息通知
+     * @param throwable
+     */
+    public void onError(Throwable throwable);
+
+}
--- a/vetti-common/src/main/java/com/vetti/common/ai/whisper/WhisperClient.java
+++ b/vetti-common/src/main/java/com/vetti/common/ai/whisper/WhisperClient.java
@@ -1,7 +1,6 @@
 package com.vetti.common.ai.whisper;

 import cn.hutool.core.util.StrUtil;
-import cn.hutool.json.JSONObject;
 import cn.hutool.json.JSONUtil;
 import okhttp3.*;
 import org.springframework.beans.factory.annotation.Value;
@@ -11,6 +10,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.concurrent.TimeUnit;

 /**
 * 语音转换文本
@@ -75,6 +75,45 @@ public class WhisperClient {
        }
    }

+    /**
+     * 将音频文件转换为文本
+     * @param bytes 语音流
+     * @param fileName 文件名
+     * @return
+     * @throws Exception
+     */
+    public String transcribeAudio(byte[] bytes, String fileName) throws Exception {
+        OkHttpClient client = new OkHttpClient.Builder()
+                .connectTimeout(60, TimeUnit.SECONDS)
+                .readTimeout(60, TimeUnit.SECONDS)
+                .writeTimeout(60, TimeUnit.SECONDS)
+                .build();
+
+        // 构建请求体，包含音频流
+        RequestBody requestBody = new MultipartBody.Builder()
+                .setType(MultipartBody.FORM)
+                .addFormDataPart("model", MODEL)
+                .addFormDataPart(
+                        "file",
+                        fileName,
+                        RequestBody.create(MediaType.parse("audio/opus"), bytes)
+                )
+                .addFormDataPart("response_format", "json")
+                .build();
+        Request request = new Request.Builder()
+                .url(API_URL)
+                .header("Authorization", "Bearer " + API_URL)
+                .post(requestBody)
+                .build();
+        try (Response response = client.newCall(request).execute()) {
+            if (!response.isSuccessful()) {
+                throw new IOException("Unexpected response: " + response);
+            }
+            return response.body().string();
+        }
+    }
+
+
    /**
     * 处理语音转换成文字
     *