From a839571b4ea935acd109c7dfd4bff043c88f130e Mon Sep 17 00:00:00 2001 From: wangxiangshun Date: Sun, 19 Oct 2025 18:35:54 +0800 Subject: [PATCH] =?UTF-8?q?TTS=20=E8=BF=94=E5=9B=9E=E8=AF=AD=E9=9F=B3?= =?UTF-8?q?=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../vetti/socket/ChatWebSocketHandler.java | 48 ++++++++++++++++++- .../web/controller/ai/AiCommonController.java | 2 +- .../target/classes/application-druid.yml | 3 +- .../common/ai/gpt/OpenAiStreamClient.java | 31 ++++++------ 4 files changed, 66 insertions(+), 18 deletions(-) diff --git a/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java b/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java index 00fa5cd..1e294ac 100644 --- a/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java +++ b/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java @@ -144,8 +144,10 @@ public class ChatWebSocketHandler { log.info("3、开始进行AI回答时间:{}",System.currentTimeMillis()/1000); //持续返回数据流给客户端 try { + String resultOutPathUrl = RuoYiConfig.getProfile() + VOICE_STORAGE_RESULT_DIR + "110_"+resultFileName; + handleVoice(resultPathUrl,resultOutPathUrl); //文件转换成文件流 - ByteBuffer outByteBuffer = convertFileToByteBuffer(resultPathUrl); + ByteBuffer outByteBuffer = convertFileToByteBuffer(resultOutPathUrl); //发送文件流数据 session.getBasicRemote().sendBinary(outByteBuffer); // 发送响应确认 @@ -404,6 +406,50 @@ public class ChatWebSocketHandler { } } + private void handleVoice(String inputPath,String outputPath){ + double trimMs = 270; // 要去掉的尾部时长(毫秒) + + try { + // 1. 解析音频格式和总长度 + AudioInputStream audioIn = AudioSystem.getAudioInputStream(new File(inputPath)); + AudioFormat format = audioIn.getFormat(); + long totalBytes = audioIn.getFrameLength() * format.getFrameSize(); // 总字节数 + + // 2. 计算300毫秒对应的字节数 + float sampleRate = format.getSampleRate(); // 采样率(Hz) + int frameSize = format.getFrameSize(); // 每帧字节数(位深/8 * 声道数) + double trimSeconds = trimMs / 1000.0; // 转换为秒 + long trimBytes = (long) (sampleRate * trimSeconds * frameSize); // 要去掉的字节数 + + // 3. 计算需要保留的字节数(避免负数) + long keepBytes = Math.max(0, totalBytes - trimBytes); + if (keepBytes == 0) { + System.out.println("音频长度小于300毫秒,无法截断"); + return; + } + + // 4. 读取并保留前半部分(去掉最后300毫秒) + try (InputStream in = new FileInputStream(inputPath); + OutputStream out = new FileOutputStream(outputPath)) { + + byte[] buffer = new byte[4096]; + long totalRead = 0; + int bytesRead; + + while (totalRead < keepBytes && (bytesRead = in.read(buffer)) != -1) { + long remaining = keepBytes - totalRead; + int writeBytes = (remaining < bytesRead) ? (int) remaining : bytesRead; + out.write(buffer, 0, writeBytes); + totalRead += writeBytes; + } + + System.out.println("处理完成,去掉了最后" + trimMs + "毫秒,保留了" + totalRead + "字节"); + } + + } catch (UnsupportedAudioFileException | IOException e) { + e.printStackTrace(); + } + } } diff --git a/vetti-admin/src/main/java/com/vetti/web/controller/ai/AiCommonController.java b/vetti-admin/src/main/java/com/vetti/web/controller/ai/AiCommonController.java index a0dac86..53ccdc8 100644 --- a/vetti-admin/src/main/java/com/vetti/web/controller/ai/AiCommonController.java +++ b/vetti-admin/src/main/java/com/vetti/web/controller/ai/AiCommonController.java @@ -38,7 +38,7 @@ public class AiCommonController extends BaseController @GetMapping("/handleTextToVice") public AjaxResult handleTextToVice() { - elevenLabsClient.handleTextToVoice("我只是测试的文本转换成语音","/Users/wangxiangshun/Desktop/临时文件/output1112.mp3"); + elevenLabsClient.handleTextToVoice("Hello ! I can","/Users/wangxiangshun/Desktop/临时文件/output1112.wav"); return success(); } diff --git a/vetti-admin/target/classes/application-druid.yml b/vetti-admin/target/classes/application-druid.yml index 00422ab..9de00ac 100644 --- a/vetti-admin/target/classes/application-druid.yml +++ b/vetti-admin/target/classes/application-druid.yml @@ -145,7 +145,8 @@ verification: # 文本转语音 elevenLabs: baseUrl: https://api.elevenlabs.io/v1 - apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812 +# apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812 + apiKey: sk_88f5a560e1bbde0e5b8b6b6eb1812163a98bfb98554acbec modelId: eleven_turbo_v2_5 # 语音转文本 whisper: diff --git a/vetti-common/src/main/java/com/vetti/common/ai/gpt/OpenAiStreamClient.java b/vetti-common/src/main/java/com/vetti/common/ai/gpt/OpenAiStreamClient.java index 82858ee..5c45d7b 100644 --- a/vetti-common/src/main/java/com/vetti/common/ai/gpt/OpenAiStreamClient.java +++ b/vetti-common/src/main/java/com/vetti/common/ai/gpt/OpenAiStreamClient.java @@ -115,21 +115,22 @@ public class OpenAiStreamClient { .getJSONObject("delta") .getStr("content"); - if (content != null && !content.isEmpty()) { - if(punctuationSet.contains(content)){ - //说明有标点啦,直接返回 - bufferStr.append(content); - listener.onMessage(bufferStr.toString()); - }else{ - //加入缓冲区 - if(StrUtil.isEmpty(bufferStr.toString())){ - bufferStr.append(content); - }else { - bufferStr.append(" ").append(content); - } - } - - } +// if (content != null && !content.isEmpty()) { +// if(punctuationSet.contains(content)){ +// //说明有标点啦,直接返回 +// bufferStr.append(content); +// listener.onMessage(bufferStr.toString()); +// }else{ +// //加入缓冲区 +// if(StrUtil.isEmpty(bufferStr.toString())){ +// bufferStr.append(content); +// }else { +// bufferStr.append(" ").append(content); +// } +// } +// +// } + listener.onMessage(content); } catch (Exception e) { listener.onError(new IOException("Parse error: " + e.getMessage())); }