效率优化

This commit is contained in:
2025-11-29 09:59:12 +08:00
parent 56b6e67180
commit 6ee8e976cf
4 changed files with 842 additions and 28 deletions

View File

@@ -19,10 +19,7 @@ import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import javax.websocket.Session;
import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.*;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Map;
@@ -51,8 +48,8 @@ public class ElevenLabsStreamClient {
* @param voiceId 语音ID (可从ElevenLabs网站获取)
* @throws IOException 网络请求或文件操作异常
*/
private void textToSpeech(String text, String voiceId, CloseableHttpClient httpClient, Session session) throws IOException {
HttpPost httpPost = new HttpPost(BASE_URL + "/text-to-speech/" + voiceId+"/stream?output_format=mp3_24000_48&optimize_streaming_latency=2");
private void textToSpeech(String text, String voiceId, CloseableHttpClient httpClient, Session session,String outputFormat) throws IOException {
HttpPost httpPost = new HttpPost(BASE_URL + "/text-to-speech/" + voiceId+"/stream?output_format="+outputFormat);
httpPost.setHeader("xi-api-key", apiKey);
httpPost.setHeader("Content-Type", "application/json");
@@ -67,25 +64,48 @@ public class ElevenLabsStreamClient {
HttpEntity responseEntity = response.getEntity();
if (responseEntity != null) {
try (InputStream inputStream = responseEntity.getContent();) {
// byte[] allData = inputStream.readAllBytes();
// InputStream stableStream = new ByteArrayInputStream(allData);
// sendAudioStream(session,stableStream);
//用来合并零散的碎片
ByteArrayOutputStream smallChunkBuffer = new ByteArrayOutputStream(); //
byte[] buffer = new byte[4096];
int bytesRead;
int n = 0;
while ((bytesRead = inputStream.read(buffer)) != -1) {
ByteBuffer byteBuffer = ByteBuffer.wrap(buffer, 0, bytesRead);
// log.info("字符流的长度大小:{}", bytesRead);
if(bytesRead != 1 && bytesRead != 2){
session.getAsyncRemote().sendBinary(byteBuffer);
//语音流合并到2KB左右进行发送
if(smallChunkBuffer.size() >= 3072){
log.info("语音流大于"+smallChunkBuffer.size()+"啦,发送完成!!!");
byte[] merged = smallChunkBuffer.toByteArray();
smallChunkBuffer.reset();
session.getAsyncRemote().sendBinary(ByteBuffer.wrap(merged));
try {
Thread.sleep(20);
Thread.sleep(50);
}catch (Exception e){}
// log.info("正常语音发送出去语音流啦!!!");
}
//发送三次告诉前端要合成一次语音
// if(n == 2){
// Map<String,String> dataText = new HashMap<>();
// dataText.put("type","voiceMiddleEnd");
// dataText.put("content","");
// session.getBasicRemote().sendText(JSONUtil.toJsonStr(dataText));
// //重置一下
// n = 0;
// }
// 零散的碎片 → 加入缓冲区,不立即发送
smallChunkBuffer.write(buffer, 0, bytesRead);
n++;
}
//都加完缓冲区,最最后一次发送
if(smallChunkBuffer.size() > 2){
log.info("最后一次发送,语音流大于"+smallChunkBuffer.size()+"啦,发送完成!!!");
byte[] merged = smallChunkBuffer.toByteArray();
smallChunkBuffer.reset();
session.getAsyncRemote().sendBinary(ByteBuffer.wrap(merged));
try {
Thread.sleep(50);
}catch (Exception e){}
}
//返回结束点
try {
Thread.sleep(100);
Thread.sleep(50);
}catch (Exception e){}
Map<String,String> dataText = new HashMap<>();
dataText.put("type","voiceEnd");
@@ -120,13 +140,13 @@ public class ElevenLabsStreamClient {
* @param inputText
* @return
*/
public void handleTextToVoice(String inputText,Session session){
public void handleTextToVoice(String inputText,Session session,String outputFormat){
CloseableHttpClient httpClient = HttpClients.createDefault();
try {
// 使用第一个可用语音进行文本转语音(澳洲本地女声)
// String firstVoiceId = "56bWURjYFHyYyVf490Dp";
String firstVoiceId = "56bWURjYFHyYyVf490Dp";
textToSpeech(inputText, firstVoiceId,httpClient,session);
textToSpeech(inputText, firstVoiceId,httpClient,session,outputFormat);
} catch (IOException e) {
e.printStackTrace();
} finally {