语音模式修改

This commit is contained in:
2025-10-31 00:05:51 +08:00
parent be216f8f7d
commit 7fd1720826
2 changed files with 51 additions and 46 deletions

View File

@@ -102,7 +102,7 @@ public class ChatWebSocketHandler {
log.info("WebSocket session 链接已建立:{}", session.getId()); log.info("WebSocket session 链接已建立:{}", session.getId());
cacheClientTts.put(clientId, new String()); cacheClientTts.put(clientId, new String());
//初始化STT流式语音转换文本的socket链接 //初始化STT流式语音转换文本的socket链接
createWhisperRealtimeSocket(session.getId()); // createWhisperRealtimeSocket(session.getId());
//是初次自我介绍后的问答环节 //是初次自我介绍后的问答环节
cacheReplyFlag.put(session.getId(),"YES"); cacheReplyFlag.put(session.getId(),"YES");
//初始化面试回答数据记录 //初始化面试回答数据记录
@@ -129,22 +129,27 @@ public class ChatWebSocketHandler {
public void onTextMessage(Session session, String message, @PathParam("clientId") String clientId) { public void onTextMessage(Session session, String message, @PathParam("clientId") String clientId) {
System.out.println("接收到文本消息: " + message); System.out.println("接收到文本消息: " + message);
try { try {
// {
// "type": "start | done | end",
// "content": "内容"
// }
//处理文本结果 //处理文本结果
if (StrUtil.isNotEmpty(message)) { if (StrUtil.isNotEmpty(message)) {
Map<String, String> mapResult = JSONUtil.toBean(JSONUtil.parseObj(message), Map.class); Map<String, String> mapResult = JSONUtil.toBean(JSONUtil.parseObj(message), Map.class);
String resultFlag = mapResult.get("msg"); String resultFlag = mapResult.get("type");
if ("done".equals(resultFlag)) { if ("done".equals(resultFlag)) {
//开始合并语音流 //开始合并语音流
//发送消息 //发送消息
WebSocket webSocket = cacheWebSocket.get(session.getId()); // WebSocket webSocket = cacheWebSocket.get(session.getId());
if (webSocket != null) { // if (webSocket != null) {
webSocket.send("{\"type\": \"input_audio_buffer.commit\"}"); // webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
webSocket.send("{\"type\": \"response.create\"}"); // webSocket.send("{\"type\": \"response.create\"}");
} // }
String startFlag = cacheReplyFlag.get(session.getId()); String startFlag = cacheReplyFlag.get(session.getId());
//语音结束,开始进行回答解析 //语音结束,开始进行回答解析
log.info("开始文本处理,客户端ID为:{}",clientId); log.info("开始文本处理,客户端ID为:{}",clientId);
String cacheResultText = cacheClientTts.get(session.getId()); // String cacheResultText = cacheClientTts.get(session.getId());
String cacheResultText = mapResult.get("content");
log.info("开始文本处理,面试者回答信息为:{}", cacheResultText); log.info("开始文本处理,面试者回答信息为:{}", cacheResultText);
if (StrUtil.isEmpty(cacheResultText)) { if (StrUtil.isEmpty(cacheResultText)) {
cacheResultText = "I first check the forklift's logbook for recent issues, inspect tires and brakes, verify the load capacity matches today's task, and confirm my licence is current—all per SWMS requirements."; cacheResultText = "I first check the forklift's logbook for recent issues, inspect tires and brakes, verify the load capacity matches today's task, and confirm my licence is current—all per SWMS requirements.";
@@ -314,43 +319,43 @@ public class ChatWebSocketHandler {
// 接收二进制消息(流数据) // 接收二进制消息(流数据)
@OnMessage @OnMessage
public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) { public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
log.info("客户端ID为:{}", clientId); // log.info("客户端ID为:{}", clientId);
// 处理二进制流数据 // // 处理二进制流数据
byte[] bytes = new byte[byteBuffer.remaining()]; // byte[] bytes = new byte[byteBuffer.remaining()];
//从缓冲区中读取数据并存储到指定的字节数组中 // //从缓冲区中读取数据并存储到指定的字节数组中
byteBuffer.get(bytes); // byteBuffer.get(bytes);
// 生成唯一文件名 // // 生成唯一文件名
String fileName = clientId + "_" + System.currentTimeMillis() + ".wav"; // String fileName = clientId + "_" + System.currentTimeMillis() + ".wav";
String pathUrl = RuoYiConfig.getProfile() + VOICE_STORAGE_DIR + fileName; // String pathUrl = RuoYiConfig.getProfile() + VOICE_STORAGE_DIR + fileName;
log.info("文件路径为:{}", pathUrl); // log.info("文件路径为:{}", pathUrl);
try { // try {
saveAsWebM(bytes, pathUrl); // saveAsWebM(bytes, pathUrl);
//接收到数据流后直接就进行SST处理 // //接收到数据流后直接就进行SST处理
//语音格式转换 // //语音格式转换
String fileOutName = clientId + "_" + System.currentTimeMillis() + ".pcm"; // String fileOutName = clientId + "_" + System.currentTimeMillis() + ".pcm";
String pathOutUrl = RuoYiConfig.getProfile() + VOICE_STORAGE_DIR + fileOutName; // String pathOutUrl = RuoYiConfig.getProfile() + VOICE_STORAGE_DIR + fileOutName;
handleAudioToPCM(pathUrl, pathOutUrl); // handleAudioToPCM(pathUrl, pathOutUrl);
//发送消息 // //发送消息
WebSocket webSocket = cacheWebSocket.get(session.getId()); // WebSocket webSocket = cacheWebSocket.get(session.getId());
log.info("获取的socket对象为:{}", webSocket); // log.info("获取的socket对象为:{}", webSocket);
if (webSocket != null) { // if (webSocket != null) {
// 1. 启动音频缓冲 //// 1. 启动音频缓冲
// webSocket.send("{\"type\": \"input_audio_buffer.start\"}"); //// webSocket.send("{\"type\": \"input_audio_buffer.start\"}");
File outputFile = new File(pathOutUrl); // 输出PCM格式文件 // File outputFile = new File(pathOutUrl); // 输出PCM格式文件
ByteBuffer buffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile)); // ByteBuffer buffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile));
byte[] outBytes = new byte[buffer.remaining()]; // byte[] outBytes = new byte[buffer.remaining()];
//从缓冲区中读取数据并存储到指定的字节数组中 // //从缓冲区中读取数据并存储到指定的字节数组中
buffer.get(outBytes); // buffer.get(outBytes);
String base64Audio = Base64.getEncoder().encodeToString(outBytes); // String base64Audio = Base64.getEncoder().encodeToString(outBytes);
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }"; // String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
webSocket.send(message); // webSocket.send(message);
// 3. 提交音频并请求转录 // // 3. 提交音频并请求转录
// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}"); //// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
// webSocket.send("{\"type\": \"response.create\"}"); //// webSocket.send("{\"type\": \"response.create\"}");
} // }
} catch (Exception e) { // } catch (Exception e) {
e.printStackTrace(); // e.printStackTrace();
} // }
} }

View File

@@ -167,7 +167,7 @@ whisper:
chatGpt: chatGpt:
apiKey: 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 apiKey: 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
apiUrl: https://api.openai.com/v1/chat/completions apiUrl: https://api.openai.com/v1/chat/completions
model: ft:gpt-3.5-turbo-0125:vetti:construction-labourer-test:CTIvLD5n model: ft:gpt-3.5-turbo-0125:vetti:construction-labourer-test:CWKBNvE2
role: system role: system