TTS 返回语音优化

This commit is contained in:
2025-10-19 09:55:32 +08:00
parent 060ba472c7
commit 3cf8fbfe9e
2 changed files with 36 additions and 8 deletions

View File

@@ -15,10 +15,7 @@ import org.apache.commons.io.FileUtils;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import javax.sound.sampled.AudioFormat; import javax.sound.sampled.*;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.TargetDataLine;
import javax.websocket.*; import javax.websocket.*;
import javax.websocket.server.PathParam; import javax.websocket.server.PathParam;
import javax.websocket.server.ServerEndpoint; import javax.websocket.server.ServerEndpoint;
@@ -31,6 +28,9 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import javax.sound.sampled.*;
import java.io.*;
import java.nio.ByteBuffer;
/** /**
* 语音面试 web处理器 * 语音面试 web处理器
@@ -202,9 +202,14 @@ public class ChatWebSocketHandler {
// webSocket.send("{\"type\": \"input_audio_buffer.start\"}"); // webSocket.send("{\"type\": \"input_audio_buffer.start\"}");
log.info("3.1 开始发送数据音频流啦"); log.info("3.1 开始发送数据音频流啦");
// 将音频数据转换为 Base64 编码的字符串 // 将音频数据转换为 Base64 编码的字符串
String base64Audio = Base64.getEncoder().encodeToString(bytes); //进行转换
// 转换音频格式
AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
byte[] outputAudioBytes = convertAudio(bytes, format);
String base64Audio = Base64.getEncoder().encodeToString(outputAudioBytes);
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }"; String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
webSocket.send(message); webSocket.send(message);
log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
// 3. 提交音频并请求转录 // 3. 提交音频并请求转录
// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}"); // webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
// webSocket.send("{\"type\": \"response.create\"}"); // webSocket.send("{\"type\": \"response.create\"}");
@@ -212,7 +217,7 @@ public class ChatWebSocketHandler {
}catch (Exception e){ }catch (Exception e){
e.printStackTrace(); e.printStackTrace();
} }
log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
} }
// 连接关闭时调用 // 连接关闭时调用
@@ -228,6 +233,28 @@ public class ChatWebSocketHandler {
throwable.printStackTrace(); throwable.printStackTrace();
} }
public static byte[] convertAudio(byte[] inputAudioBytes, AudioFormat targetFormat) throws Exception {
// 将 byte[] 转换为 AudioInputStream
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(inputAudioBytes);
AudioInputStream inputAudioStream = new AudioInputStream(byteArrayInputStream, targetFormat, inputAudioBytes.length);
// 创建目标格式的 AudioInputStream
AudioInputStream outputAudioStream = AudioSystem.getAudioInputStream(targetFormat, inputAudioStream);
// 获取输出音频的 byte[]
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int bytesRead;
// 从 AudioInputStream 读取数据并写入 ByteArrayOutputStream
while ((bytesRead = outputAudioStream.read(buffer)) != -1) {
byteArrayOutputStream.write(buffer, 0, bytesRead);
}
// 返回转换后的 byte[]
return byteArrayOutputStream.toByteArray();
}
/** /**
* 将字节数组保存为WebM文件 * 将字节数组保存为WebM文件
* *
@@ -297,7 +324,7 @@ public class ChatWebSocketHandler {
private void createWhisperRealtimeSocket(String clientId){ private void createWhisperRealtimeSocket(String clientId){
try{ try{
OkHttpClient client = new OkHttpClient(); OkHttpClient client = new OkHttpClient();
CountDownLatch latch = new CountDownLatch(1); // CountDownLatch latch = new CountDownLatch(1);
// 设置 WebSocket 请求 // 设置 WebSocket 请求
Request request = new Request.Builder() Request request = new Request.Builder()
.url(API_URL) .url(API_URL)

View File

@@ -145,7 +145,8 @@ verification:
# 文本转语音 # 文本转语音
elevenLabs: elevenLabs:
baseUrl: https://api.elevenlabs.io/v1 baseUrl: https://api.elevenlabs.io/v1
apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812 # apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812
apiKey: sk_88f5a560e1bbde0e5b8b6b6eb1812163a98bfb98554acbec
modelId: eleven_turbo_v2_5 modelId: eleven_turbo_v2_5
# 语音转文本 # 语音转文本
whisper: whisper: