From a5777d7beb6bbfa033d1a35fce8e0b66b662deb9 Mon Sep 17 00:00:00 2001 From: wangxiangshun Date: Thu, 16 Oct 2025 14:27:20 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AF=AD=E9=9F=B3AI=E5=88=86=E7=89=87=E4=BB=A5?= =?UTF-8?q?=E5=8F=8A=E6=B5=81=E5=BC=8F=E5=A4=84=E7=90=86=E8=BF=94=E5=9B=9E?= =?UTF-8?q?=E7=BB=93=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../vetti/socket/ChatWebSocketHandler.java | 149 ++++++++++++------ .../target/classes/application-druid.yml | 2 +- .../ai/elevenLabs/ElevenLabsClient.java | 3 +- .../common/ai/gpt/OpenAiStreamClient.java | 121 ++++++++++++++ .../service/OpenAiStreamListenerService.java | 25 +++ .../common/ai/whisper/WhisperClient.java | 41 ++++- 6 files changed, 285 insertions(+), 56 deletions(-) create mode 100644 vetti-common/src/main/java/com/vetti/common/ai/gpt/OpenAiStreamClient.java create mode 100644 vetti-common/src/main/java/com/vetti/common/ai/gpt/service/OpenAiStreamListenerService.java diff --git a/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java b/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java index f6a92c4..62bc0dc 100644 --- a/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java +++ b/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java @@ -1,7 +1,10 @@ package com.vetti.socket; +import cn.hutool.core.util.StrUtil; +import cn.hutool.json.JSONUtil; import com.vetti.common.ai.elevenLabs.ElevenLabsClient; -import com.vetti.common.ai.gpt.ChatGPTClient; +import com.vetti.common.ai.gpt.OpenAiStreamClient; +import com.vetti.common.ai.gpt.service.OpenAiStreamListenerService; import com.vetti.common.ai.whisper.WhisperClient; import com.vetti.common.config.RuoYiConfig; import com.vetti.common.utils.spring.SpringUtils; @@ -15,9 +18,10 @@ import javax.websocket.server.ServerEndpoint; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; -import java.io.RandomAccessFile; import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; /** * 语音面试 web处理器 @@ -27,6 +31,11 @@ import java.nio.channels.FileChannel; @Component public class ChatWebSocketHandler { + /** + * 缓存客户端流式解析的语音文本数据 + */ + private final Map cacheClientTts = new ConcurrentHashMap<>(); + // 语音文件保存目录 private static final String VOICE_STORAGE_DIR = "/voice_files/"; @@ -51,19 +60,72 @@ public class ChatWebSocketHandler { @OnOpen public void onOpen(Session session, @PathParam("clientId") String clientId) { log.info("WebSocket 链接已建立:{}", clientId); - //创建会话 - + cacheClientTts.put(clientId,new String()); } // 接收文本消息 @OnMessage - public void onTextMessage(Session session, String message) { + public void onTextMessage(Session session, String message,@PathParam("clientId") String clientId) { System.out.println("接收到文本消息: " + message); - // 可以在这里处理文本流数据 try { - // 发送响应 - session.getBasicRemote().sendText("已收到文本: " + message); - } catch (IOException e) { + //处理文本结果 + if(StrUtil.isNotEmpty(message)){ + Map mapResult = JSONUtil.toBean(JSONUtil.parseObj(message),Map.class); + String resultFlag = mapResult.get("msg"); + if("done".equals(resultFlag)){ + //语音结束,开始进行回答解析 + String cacheResultText = cacheClientTts.get(clientId); + log.info("1、开始进行AI回答时间:{}",System.currentTimeMillis()/1000); + //把提问的文字发送给CPT(流式处理) + OpenAiStreamClient aiStreamClient = SpringUtils.getBean(OpenAiStreamClient.class); + aiStreamClient.streamChat(cacheResultText, new OpenAiStreamListenerService() { + @Override + public void onMessage(String content) { + // 实时输出内容 + //开始进行语音输出-流式持续输出 + log.info("2、开始进行AI回答时间:{}",System.currentTimeMillis()/1000); + //把结果文字转成语音文件 + //生成文件 + // 生成唯一文件名 + String resultFileName = clientId + "_" + System.currentTimeMillis() + ".opus"; + String resultPathUrl = RuoYiConfig.getProfile() + VOICE_STORAGE_RESULT_DIR + resultFileName; + ElevenLabsClient elevenLabsClient = SpringUtils.getBean(ElevenLabsClient.class); + elevenLabsClient.handleTextToVoice(content, resultPathUrl); + log.info("3、开始进行AI回答时间:{}",System.currentTimeMillis()/1000); + //持续返回数据流给客户端 + try { + //文件转换成文件流 + ByteBuffer outByteBuffer = convertFileToByteBuffer(resultPathUrl); + //发送文件流数据 + session.getBasicRemote().sendBinary(outByteBuffer); + // 发送响应确认 + log.info("4、开始进行AI回答时间:{}",System.currentTimeMillis()/1000); + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Override + public void onComplete() { + try { + Map resultEntity = new HashMap<>(); + resultEntity.put("msg","done"); + //发送通知告诉客户端已经回答结束了 + session.getBasicRemote().sendText(JSONUtil.toJsonStr(resultEntity)); + } catch (Exception e) { + throw new RuntimeException(e); + } + log.info("5、结束进行AI回答时间:{}",System.currentTimeMillis()/1000); + } + + @Override + public void onError(Throwable throwable) { + throwable.printStackTrace(); + } + }); + } + } + } catch (Exception e) { e.printStackTrace(); } } @@ -71,53 +133,36 @@ public class ChatWebSocketHandler { // 接收二进制消息(流数据) @OnMessage public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) { - try { - log.info("1、运行时间:{}",System.currentTimeMillis()/1000); - log.info("客户端ID为:{}", clientId); - // 处理二进制流数据 - byte[] bytes = new byte[byteBuffer.remaining()]; - //从缓冲区中读取数据并存储到指定的字节数组中 - byteBuffer.get(bytes); - log.info("2、运行时间:{}",System.currentTimeMillis()/1000); - // 生成唯一文件名 - String fileName = clientId + "_" + System.currentTimeMillis() + ".webm"; - String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName; -// String pathUrl = "/Users/wangxiangshun/Desktop/0.8733346782733291.webm"; - log.info("文件路径为:{}", pathUrl); - saveAsWebM(bytes, pathUrl); - log.info("3、运行时间:{}",System.currentTimeMillis()/1000); + log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000); + log.info("客户端ID为:{}", clientId); + // 处理二进制流数据 + byte[] bytes = new byte[byteBuffer.remaining()]; + //从缓冲区中读取数据并存储到指定的字节数组中 + byteBuffer.get(bytes); + log.info("2、开始接收数据流时间:{}",System.currentTimeMillis()/1000); + // 生成唯一文件名 + String fileName = clientId + "_" + System.currentTimeMillis() + ".webm"; + String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName; + log.info("文件路径为:{}", pathUrl); + log.info("3、开始接收数据流时间:{}",System.currentTimeMillis()/1000); + try{ + //接收到数据流后直接就进行SST处理 //拿到文件进行文字转换 WhisperClient whisperClient = SpringUtils.getBean(WhisperClient.class); - String resultText = whisperClient.handleVoiceToText(pathUrl); - log.info("4、运行时间:{}",System.currentTimeMillis()/1000); - //把提问的文字发送给CPT - ChatGPTClient chatGPTClient = SpringUtils.getBean(ChatGPTClient.class); - String resultMsg = chatGPTClient.handleAiChat(resultText); - log.info("5、运行时间:{}",System.currentTimeMillis()/1000); - //把结果文字转成语音文件 - //生成文件 - // 生成唯一文件名 - String resultFileName = clientId + "_" + System.currentTimeMillis() + ".opus"; - String resultPathUrl = RuoYiConfig.getProfile() + VOICE_STORAGE_RESULT_DIR + resultFileName; - ElevenLabsClient elevenLabsClient = SpringUtils.getBean(ElevenLabsClient.class); - elevenLabsClient.handleTextToVoice(resultMsg, resultPathUrl); - log.info("6、运行时间:{}",System.currentTimeMillis()/1000); - //把语音文件转换成流,发送给前端 - System.out.println("接收到二进制数据,长度: " + bytes.length + " bytes"); - try { - //文件转换成文件流 - ByteBuffer outByteBuffer = convertFileToByteBuffer(resultPathUrl); - //发送文件流数据 - session.getBasicRemote().sendBinary(outByteBuffer); - // 发送响应确认 -// session.getBasicRemote().sendText("已收到二进制数据,长度: " + bytes.length); - log.info("7、运行时间:{}",System.currentTimeMillis()/1000); - } catch (IOException e) { - e.printStackTrace(); + String resultText = whisperClient.transcribeAudio(bytes,fileName); + //进行客户端文本数据存储 + String cacheString = cacheClientTts.get(clientId); + if(StrUtil.isNotEmpty(cacheString)){ + cacheString = cacheString+resultText; + }else { + cacheString = resultText; } - } catch (Exception e) { + cacheClientTts.put(clientId,cacheString); + + }catch (Exception e){ e.printStackTrace(); } + log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000); } // 连接关闭时调用 diff --git a/vetti-admin/target/classes/application-druid.yml b/vetti-admin/target/classes/application-druid.yml index 097def7..00422ab 100644 --- a/vetti-admin/target/classes/application-druid.yml +++ b/vetti-admin/target/classes/application-druid.yml @@ -146,7 +146,7 @@ verification: elevenLabs: baseUrl: https://api.elevenlabs.io/v1 apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812 - modelId: eleven_turbo_v2.5 + modelId: eleven_turbo_v2_5 # 语音转文本 whisper: apiUrl: https://api.openai.com/v1/audio/transcriptions diff --git a/vetti-common/src/main/java/com/vetti/common/ai/elevenLabs/ElevenLabsClient.java b/vetti-common/src/main/java/com/vetti/common/ai/elevenLabs/ElevenLabsClient.java index da3414f..464890a 100644 --- a/vetti-common/src/main/java/com/vetti/common/ai/elevenLabs/ElevenLabsClient.java +++ b/vetti-common/src/main/java/com/vetti/common/ai/elevenLabs/ElevenLabsClient.java @@ -106,8 +106,7 @@ public class ElevenLabsClient { VoicesResponse voicesResponse = getVoices(httpClient); if (voicesResponse != null && voicesResponse.getVoices() != null && voicesResponse.getVoices().length > 0) { - // 使用第一个可用语音进行文本转语音 -// String firstVoiceId = voicesResponse.getVoices()[0].getVoice_id(); + // 使用第一个可用语音进行文本转语音(澳洲本地女声) String firstVoiceId = "21m00Tcm4TlvDq8ikWAM"; textToSpeech(inputText, firstVoiceId, outputFile,httpClient); } diff --git a/vetti-common/src/main/java/com/vetti/common/ai/gpt/OpenAiStreamClient.java b/vetti-common/src/main/java/com/vetti/common/ai/gpt/OpenAiStreamClient.java new file mode 100644 index 0000000..12eb503 --- /dev/null +++ b/vetti-common/src/main/java/com/vetti/common/ai/gpt/OpenAiStreamClient.java @@ -0,0 +1,121 @@ +package com.vetti.common.ai.gpt; + +import cn.hutool.json.JSONObject; +import cn.hutool.json.JSONUtil; +import com.vetti.common.ai.gpt.service.OpenAiStreamListenerService; +import okhttp3.*; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +/** + * AI ChatGPT 对话 + */ +@Component +public class OpenAiStreamClient { + + @Value("${chatGpt.apiKey}") + private String apiKey; + // API端点URL + @Value("${chatGpt.apiUrl}") + private String apiUrl; + + @Value("${chatGpt.model}") + private String model; + + @Value("${chatGpt.role}") + private String role; + + /** + * 发送流式请求 + * + * @param prompt 提示词 + * @param listener 流式响应监听器 + */ + public void streamChat(String prompt, OpenAiStreamListenerService listener) { + OkHttpClient client = new OkHttpClient.Builder() + .connectTimeout(30, TimeUnit.SECONDS) + .readTimeout(60, TimeUnit.SECONDS) + .writeTimeout(30, TimeUnit.SECONDS) + .build(); + // 构建请求参数 + Map requestBody = new HashMap<>(); + requestBody.put("model", model); + requestBody.put("stream", true); + + // 构建消息 + Map message = new HashMap<>(); + message.put("role", role); + message.put("content", prompt); + requestBody.put("messages", new Object[]{message}); + + // 创建请求 + Request request = new Request.Builder() + .url(apiUrl) + .addHeader("Content-Type", "application/json") + .addHeader("Authorization", "Bearer " + apiKey) + .post(RequestBody.create(JSONUtil.toJsonStr(requestBody), + MediaType.parse("application/json; charset=utf-8") + )) + .build(); + + // 发送异步请求 + client.newCall(request).enqueue(new Callback() { + @Override + public void onFailure(Call call, IOException e) { + listener.onError(e); + } + + @Override + public void onResponse(Call call, Response response) throws IOException { + if (!response.isSuccessful()) { + listener.onError(new IOException("Unexpected code " + response)); + return; + } + // 处理流式响应 + ResponseBody body = response.body(); + if (body == null) { + listener.onError(new IOException("Response body is null")); + return; + } + try (var source = body.source()) { + // 逐行读取响应 + while (!source.exhausted()) { + String line = source.readUtf8Line(); + if (line == null || line.isEmpty()) continue; + // 处理SSE格式(去除"data: "前缀) + if (line.startsWith("data: ")) { + String data = line.substring(6).trim(); + // 检查是否为结束标记 + if (data.equals("[DONE]")) { + listener.onComplete(); + break; + } + // 解析JSON获取内容 + try { + JSONObject json = JSONUtil.parseObj(data); + String content = json.getJSONArray("choices") + .getJSONObject(0) + .getJSONObject("delta") + .getStr("content"); + + if (content != null && !content.isEmpty()) { + listener.onMessage(content); + } + } catch (Exception e) { + listener.onError(new IOException("Parse error: " + e.getMessage())); + } + } + } + } finally { + response.close(); + } + } + }); + } + +} diff --git a/vetti-common/src/main/java/com/vetti/common/ai/gpt/service/OpenAiStreamListenerService.java b/vetti-common/src/main/java/com/vetti/common/ai/gpt/service/OpenAiStreamListenerService.java new file mode 100644 index 0000000..e80e797 --- /dev/null +++ b/vetti-common/src/main/java/com/vetti/common/ai/gpt/service/OpenAiStreamListenerService.java @@ -0,0 +1,25 @@ +package com.vetti.common.ai.gpt.service; + +/** + * AI 流式响应监听器 服务层 + */ +public interface OpenAiStreamListenerService { + + /** + * 返回信息 + * @param content + */ + public void onMessage(String content); + + /** + * 返回完成通知 + */ + public void onComplete(); + + /** + * 异常信息通知 + * @param throwable + */ + public void onError(Throwable throwable); + +} diff --git a/vetti-common/src/main/java/com/vetti/common/ai/whisper/WhisperClient.java b/vetti-common/src/main/java/com/vetti/common/ai/whisper/WhisperClient.java index 2c3411b..5c0cee7 100644 --- a/vetti-common/src/main/java/com/vetti/common/ai/whisper/WhisperClient.java +++ b/vetti-common/src/main/java/com/vetti/common/ai/whisper/WhisperClient.java @@ -1,7 +1,6 @@ package com.vetti.common.ai.whisper; import cn.hutool.core.util.StrUtil; -import cn.hutool.json.JSONObject; import cn.hutool.json.JSONUtil; import okhttp3.*; import org.springframework.beans.factory.annotation.Value; @@ -11,6 +10,7 @@ import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.TimeUnit; /** * 语音转换文本 @@ -75,6 +75,45 @@ public class WhisperClient { } } + /** + * 将音频文件转换为文本 + * @param bytes 语音流 + * @param fileName 文件名 + * @return + * @throws Exception + */ + public String transcribeAudio(byte[] bytes, String fileName) throws Exception { + OkHttpClient client = new OkHttpClient.Builder() + .connectTimeout(60, TimeUnit.SECONDS) + .readTimeout(60, TimeUnit.SECONDS) + .writeTimeout(60, TimeUnit.SECONDS) + .build(); + + // 构建请求体,包含音频流 + RequestBody requestBody = new MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("model", MODEL) + .addFormDataPart( + "file", + fileName, + RequestBody.create(MediaType.parse("audio/opus"), bytes) + ) + .addFormDataPart("response_format", "json") + .build(); + Request request = new Request.Builder() + .url(API_URL) + .header("Authorization", "Bearer " + API_URL) + .post(requestBody) + .build(); + try (Response response = client.newCall(request).execute()) { + if (!response.isSuccessful()) { + throw new IOException("Unexpected response: " + response); + } + return response.body().string(); + } + } + + /** * 处理语音转换成文字 *