TTS 返回语音优化

This commit is contained in:
2025-10-19 09:55:32 +08:00
parent 060ba472c7
commit 3cf8fbfe9e
2 changed files with 36 additions and 8 deletions

View File

@@ -15,10 +15,7 @@ import org.apache.commons.io.FileUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.TargetDataLine;
import javax.sound.sampled.*;
import javax.websocket.*;
import javax.websocket.server.PathParam;
import javax.websocket.server.ServerEndpoint;
@@ -31,6 +28,9 @@ import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import javax.sound.sampled.*;
import java.io.*;
import java.nio.ByteBuffer;
/**
* 语音面试 web处理器
@@ -202,9 +202,14 @@ public class ChatWebSocketHandler {
// webSocket.send("{\"type\": \"input_audio_buffer.start\"}");
log.info("3.1 开始发送数据音频流啦");
// 将音频数据转换为 Base64 编码的字符串
String base64Audio = Base64.getEncoder().encodeToString(bytes);
//进行转换
// 转换音频格式
AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
byte[] outputAudioBytes = convertAudio(bytes, format);
String base64Audio = Base64.getEncoder().encodeToString(outputAudioBytes);
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
webSocket.send(message);
log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
// 3. 提交音频并请求转录
// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
// webSocket.send("{\"type\": \"response.create\"}");
@@ -212,7 +217,7 @@ public class ChatWebSocketHandler {
}catch (Exception e){
e.printStackTrace();
}
log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
}
// 连接关闭时调用
@@ -228,6 +233,28 @@ public class ChatWebSocketHandler {
throwable.printStackTrace();
}
public static byte[] convertAudio(byte[] inputAudioBytes, AudioFormat targetFormat) throws Exception {
// 将 byte[] 转换为 AudioInputStream
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(inputAudioBytes);
AudioInputStream inputAudioStream = new AudioInputStream(byteArrayInputStream, targetFormat, inputAudioBytes.length);
// 创建目标格式的 AudioInputStream
AudioInputStream outputAudioStream = AudioSystem.getAudioInputStream(targetFormat, inputAudioStream);
// 获取输出音频的 byte[]
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int bytesRead;
// 从 AudioInputStream 读取数据并写入 ByteArrayOutputStream
while ((bytesRead = outputAudioStream.read(buffer)) != -1) {
byteArrayOutputStream.write(buffer, 0, bytesRead);
}
// 返回转换后的 byte[]
return byteArrayOutputStream.toByteArray();
}
/**
* 将字节数组保存为WebM文件
*
@@ -297,7 +324,7 @@ public class ChatWebSocketHandler {
private void createWhisperRealtimeSocket(String clientId){
try{
OkHttpClient client = new OkHttpClient();
CountDownLatch latch = new CountDownLatch(1);
// CountDownLatch latch = new CountDownLatch(1);
// 设置 WebSocket 请求
Request request = new Request.Builder()
.url(API_URL)

View File

@@ -145,7 +145,8 @@ verification:
# 文本转语音
elevenLabs:
baseUrl: https://api.elevenlabs.io/v1
apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812
# apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812
apiKey: sk_88f5a560e1bbde0e5b8b6b6eb1812163a98bfb98554acbec
modelId: eleven_turbo_v2_5
# 语音转文本
whisper: