语音AI分片以及流式处理返回结果
This commit is contained in:
@@ -1,7 +1,10 @@
|
|||||||
package com.vetti.socket;
|
package com.vetti.socket;
|
||||||
|
|
||||||
|
import cn.hutool.core.util.StrUtil;
|
||||||
|
import cn.hutool.json.JSONUtil;
|
||||||
import com.vetti.common.ai.elevenLabs.ElevenLabsClient;
|
import com.vetti.common.ai.elevenLabs.ElevenLabsClient;
|
||||||
import com.vetti.common.ai.gpt.ChatGPTClient;
|
import com.vetti.common.ai.gpt.OpenAiStreamClient;
|
||||||
|
import com.vetti.common.ai.gpt.service.OpenAiStreamListenerService;
|
||||||
import com.vetti.common.ai.whisper.WhisperClient;
|
import com.vetti.common.ai.whisper.WhisperClient;
|
||||||
import com.vetti.common.config.RuoYiConfig;
|
import com.vetti.common.config.RuoYiConfig;
|
||||||
import com.vetti.common.utils.spring.SpringUtils;
|
import com.vetti.common.utils.spring.SpringUtils;
|
||||||
@@ -15,9 +18,10 @@ import javax.websocket.server.ServerEndpoint;
|
|||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.RandomAccessFile;
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.channels.FileChannel;
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 语音面试 web处理器
|
* 语音面试 web处理器
|
||||||
@@ -27,6 +31,11 @@ import java.nio.channels.FileChannel;
|
|||||||
@Component
|
@Component
|
||||||
public class ChatWebSocketHandler {
|
public class ChatWebSocketHandler {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 缓存客户端流式解析的语音文本数据
|
||||||
|
*/
|
||||||
|
private final Map<String,String> cacheClientTts = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
// 语音文件保存目录
|
// 语音文件保存目录
|
||||||
private static final String VOICE_STORAGE_DIR = "/voice_files/";
|
private static final String VOICE_STORAGE_DIR = "/voice_files/";
|
||||||
|
|
||||||
@@ -51,19 +60,72 @@ public class ChatWebSocketHandler {
|
|||||||
@OnOpen
|
@OnOpen
|
||||||
public void onOpen(Session session, @PathParam("clientId") String clientId) {
|
public void onOpen(Session session, @PathParam("clientId") String clientId) {
|
||||||
log.info("WebSocket 链接已建立:{}", clientId);
|
log.info("WebSocket 链接已建立:{}", clientId);
|
||||||
//创建会话
|
cacheClientTts.put(clientId,new String());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 接收文本消息
|
// 接收文本消息
|
||||||
@OnMessage
|
@OnMessage
|
||||||
public void onTextMessage(Session session, String message) {
|
public void onTextMessage(Session session, String message,@PathParam("clientId") String clientId) {
|
||||||
System.out.println("接收到文本消息: " + message);
|
System.out.println("接收到文本消息: " + message);
|
||||||
// 可以在这里处理文本流数据
|
|
||||||
try {
|
try {
|
||||||
// 发送响应
|
//处理文本结果
|
||||||
session.getBasicRemote().sendText("已收到文本: " + message);
|
if(StrUtil.isNotEmpty(message)){
|
||||||
} catch (IOException e) {
|
Map<String,String> mapResult = JSONUtil.toBean(JSONUtil.parseObj(message),Map.class);
|
||||||
|
String resultFlag = mapResult.get("msg");
|
||||||
|
if("done".equals(resultFlag)){
|
||||||
|
//语音结束,开始进行回答解析
|
||||||
|
String cacheResultText = cacheClientTts.get(clientId);
|
||||||
|
log.info("1、开始进行AI回答时间:{}",System.currentTimeMillis()/1000);
|
||||||
|
//把提问的文字发送给CPT(流式处理)
|
||||||
|
OpenAiStreamClient aiStreamClient = SpringUtils.getBean(OpenAiStreamClient.class);
|
||||||
|
aiStreamClient.streamChat(cacheResultText, new OpenAiStreamListenerService() {
|
||||||
|
@Override
|
||||||
|
public void onMessage(String content) {
|
||||||
|
// 实时输出内容
|
||||||
|
//开始进行语音输出-流式持续输出
|
||||||
|
log.info("2、开始进行AI回答时间:{}",System.currentTimeMillis()/1000);
|
||||||
|
//把结果文字转成语音文件
|
||||||
|
//生成文件
|
||||||
|
// 生成唯一文件名
|
||||||
|
String resultFileName = clientId + "_" + System.currentTimeMillis() + ".opus";
|
||||||
|
String resultPathUrl = RuoYiConfig.getProfile() + VOICE_STORAGE_RESULT_DIR + resultFileName;
|
||||||
|
ElevenLabsClient elevenLabsClient = SpringUtils.getBean(ElevenLabsClient.class);
|
||||||
|
elevenLabsClient.handleTextToVoice(content, resultPathUrl);
|
||||||
|
log.info("3、开始进行AI回答时间:{}",System.currentTimeMillis()/1000);
|
||||||
|
//持续返回数据流给客户端
|
||||||
|
try {
|
||||||
|
//文件转换成文件流
|
||||||
|
ByteBuffer outByteBuffer = convertFileToByteBuffer(resultPathUrl);
|
||||||
|
//发送文件流数据
|
||||||
|
session.getBasicRemote().sendBinary(outByteBuffer);
|
||||||
|
// 发送响应确认
|
||||||
|
log.info("4、开始进行AI回答时间:{}",System.currentTimeMillis()/1000);
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onComplete() {
|
||||||
|
try {
|
||||||
|
Map<String,String> resultEntity = new HashMap<>();
|
||||||
|
resultEntity.put("msg","done");
|
||||||
|
//发送通知告诉客户端已经回答结束了
|
||||||
|
session.getBasicRemote().sendText(JSONUtil.toJsonStr(resultEntity));
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
log.info("5、结束进行AI回答时间:{}",System.currentTimeMillis()/1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onError(Throwable throwable) {
|
||||||
|
throwable.printStackTrace();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -71,53 +133,36 @@ public class ChatWebSocketHandler {
|
|||||||
// 接收二进制消息(流数据)
|
// 接收二进制消息(流数据)
|
||||||
@OnMessage
|
@OnMessage
|
||||||
public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
|
public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
|
||||||
try {
|
log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||||
log.info("1、运行时间:{}",System.currentTimeMillis()/1000);
|
log.info("客户端ID为:{}", clientId);
|
||||||
log.info("客户端ID为:{}", clientId);
|
// 处理二进制流数据
|
||||||
// 处理二进制流数据
|
byte[] bytes = new byte[byteBuffer.remaining()];
|
||||||
byte[] bytes = new byte[byteBuffer.remaining()];
|
//从缓冲区中读取数据并存储到指定的字节数组中
|
||||||
//从缓冲区中读取数据并存储到指定的字节数组中
|
byteBuffer.get(bytes);
|
||||||
byteBuffer.get(bytes);
|
log.info("2、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||||
log.info("2、运行时间:{}",System.currentTimeMillis()/1000);
|
// 生成唯一文件名
|
||||||
// 生成唯一文件名
|
String fileName = clientId + "_" + System.currentTimeMillis() + ".webm";
|
||||||
String fileName = clientId + "_" + System.currentTimeMillis() + ".webm";
|
String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName;
|
||||||
String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName;
|
log.info("文件路径为:{}", pathUrl);
|
||||||
// String pathUrl = "/Users/wangxiangshun/Desktop/0.8733346782733291.webm";
|
log.info("3、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||||
log.info("文件路径为:{}", pathUrl);
|
try{
|
||||||
saveAsWebM(bytes, pathUrl);
|
//接收到数据流后直接就进行SST处理
|
||||||
log.info("3、运行时间:{}",System.currentTimeMillis()/1000);
|
|
||||||
//拿到文件进行文字转换
|
//拿到文件进行文字转换
|
||||||
WhisperClient whisperClient = SpringUtils.getBean(WhisperClient.class);
|
WhisperClient whisperClient = SpringUtils.getBean(WhisperClient.class);
|
||||||
String resultText = whisperClient.handleVoiceToText(pathUrl);
|
String resultText = whisperClient.transcribeAudio(bytes,fileName);
|
||||||
log.info("4、运行时间:{}",System.currentTimeMillis()/1000);
|
//进行客户端文本数据存储
|
||||||
//把提问的文字发送给CPT
|
String cacheString = cacheClientTts.get(clientId);
|
||||||
ChatGPTClient chatGPTClient = SpringUtils.getBean(ChatGPTClient.class);
|
if(StrUtil.isNotEmpty(cacheString)){
|
||||||
String resultMsg = chatGPTClient.handleAiChat(resultText);
|
cacheString = cacheString+resultText;
|
||||||
log.info("5、运行时间:{}",System.currentTimeMillis()/1000);
|
}else {
|
||||||
//把结果文字转成语音文件
|
cacheString = resultText;
|
||||||
//生成文件
|
|
||||||
// 生成唯一文件名
|
|
||||||
String resultFileName = clientId + "_" + System.currentTimeMillis() + ".opus";
|
|
||||||
String resultPathUrl = RuoYiConfig.getProfile() + VOICE_STORAGE_RESULT_DIR + resultFileName;
|
|
||||||
ElevenLabsClient elevenLabsClient = SpringUtils.getBean(ElevenLabsClient.class);
|
|
||||||
elevenLabsClient.handleTextToVoice(resultMsg, resultPathUrl);
|
|
||||||
log.info("6、运行时间:{}",System.currentTimeMillis()/1000);
|
|
||||||
//把语音文件转换成流,发送给前端
|
|
||||||
System.out.println("接收到二进制数据,长度: " + bytes.length + " bytes");
|
|
||||||
try {
|
|
||||||
//文件转换成文件流
|
|
||||||
ByteBuffer outByteBuffer = convertFileToByteBuffer(resultPathUrl);
|
|
||||||
//发送文件流数据
|
|
||||||
session.getBasicRemote().sendBinary(outByteBuffer);
|
|
||||||
// 发送响应确认
|
|
||||||
// session.getBasicRemote().sendText("已收到二进制数据,长度: " + bytes.length);
|
|
||||||
log.info("7、运行时间:{}",System.currentTimeMillis()/1000);
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
cacheClientTts.put(clientId,cacheString);
|
||||||
|
|
||||||
|
}catch (Exception e){
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 连接关闭时调用
|
// 连接关闭时调用
|
||||||
|
|||||||
@@ -146,7 +146,7 @@ verification:
|
|||||||
elevenLabs:
|
elevenLabs:
|
||||||
baseUrl: https://api.elevenlabs.io/v1
|
baseUrl: https://api.elevenlabs.io/v1
|
||||||
apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812
|
apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812
|
||||||
modelId: eleven_turbo_v2.5
|
modelId: eleven_turbo_v2_5
|
||||||
# 语音转文本
|
# 语音转文本
|
||||||
whisper:
|
whisper:
|
||||||
apiUrl: https://api.openai.com/v1/audio/transcriptions
|
apiUrl: https://api.openai.com/v1/audio/transcriptions
|
||||||
|
|||||||
@@ -106,8 +106,7 @@ public class ElevenLabsClient {
|
|||||||
VoicesResponse voicesResponse = getVoices(httpClient);
|
VoicesResponse voicesResponse = getVoices(httpClient);
|
||||||
if (voicesResponse != null && voicesResponse.getVoices() != null
|
if (voicesResponse != null && voicesResponse.getVoices() != null
|
||||||
&& voicesResponse.getVoices().length > 0) {
|
&& voicesResponse.getVoices().length > 0) {
|
||||||
// 使用第一个可用语音进行文本转语音
|
// 使用第一个可用语音进行文本转语音(澳洲本地女声)
|
||||||
// String firstVoiceId = voicesResponse.getVoices()[0].getVoice_id();
|
|
||||||
String firstVoiceId = "21m00Tcm4TlvDq8ikWAM";
|
String firstVoiceId = "21m00Tcm4TlvDq8ikWAM";
|
||||||
textToSpeech(inputText, firstVoiceId, outputFile,httpClient);
|
textToSpeech(inputText, firstVoiceId, outputFile,httpClient);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,121 @@
|
|||||||
|
package com.vetti.common.ai.gpt;
|
||||||
|
|
||||||
|
import cn.hutool.json.JSONObject;
|
||||||
|
import cn.hutool.json.JSONUtil;
|
||||||
|
import com.vetti.common.ai.gpt.service.OpenAiStreamListenerService;
|
||||||
|
import okhttp3.*;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AI ChatGPT 对话
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class OpenAiStreamClient {
|
||||||
|
|
||||||
|
@Value("${chatGpt.apiKey}")
|
||||||
|
private String apiKey;
|
||||||
|
// API端点URL
|
||||||
|
@Value("${chatGpt.apiUrl}")
|
||||||
|
private String apiUrl;
|
||||||
|
|
||||||
|
@Value("${chatGpt.model}")
|
||||||
|
private String model;
|
||||||
|
|
||||||
|
@Value("${chatGpt.role}")
|
||||||
|
private String role;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 发送流式请求
|
||||||
|
*
|
||||||
|
* @param prompt 提示词
|
||||||
|
* @param listener 流式响应监听器
|
||||||
|
*/
|
||||||
|
public void streamChat(String prompt, OpenAiStreamListenerService listener) {
|
||||||
|
OkHttpClient client = new OkHttpClient.Builder()
|
||||||
|
.connectTimeout(30, TimeUnit.SECONDS)
|
||||||
|
.readTimeout(60, TimeUnit.SECONDS)
|
||||||
|
.writeTimeout(30, TimeUnit.SECONDS)
|
||||||
|
.build();
|
||||||
|
// 构建请求参数
|
||||||
|
Map<String, Object> requestBody = new HashMap<>();
|
||||||
|
requestBody.put("model", model);
|
||||||
|
requestBody.put("stream", true);
|
||||||
|
|
||||||
|
// 构建消息
|
||||||
|
Map<String, String> message = new HashMap<>();
|
||||||
|
message.put("role", role);
|
||||||
|
message.put("content", prompt);
|
||||||
|
requestBody.put("messages", new Object[]{message});
|
||||||
|
|
||||||
|
// 创建请求
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(apiUrl)
|
||||||
|
.addHeader("Content-Type", "application/json")
|
||||||
|
.addHeader("Authorization", "Bearer " + apiKey)
|
||||||
|
.post(RequestBody.create(JSONUtil.toJsonStr(requestBody),
|
||||||
|
MediaType.parse("application/json; charset=utf-8")
|
||||||
|
))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// 发送异步请求
|
||||||
|
client.newCall(request).enqueue(new Callback() {
|
||||||
|
@Override
|
||||||
|
public void onFailure(Call call, IOException e) {
|
||||||
|
listener.onError(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onResponse(Call call, Response response) throws IOException {
|
||||||
|
if (!response.isSuccessful()) {
|
||||||
|
listener.onError(new IOException("Unexpected code " + response));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// 处理流式响应
|
||||||
|
ResponseBody body = response.body();
|
||||||
|
if (body == null) {
|
||||||
|
listener.onError(new IOException("Response body is null"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try (var source = body.source()) {
|
||||||
|
// 逐行读取响应
|
||||||
|
while (!source.exhausted()) {
|
||||||
|
String line = source.readUtf8Line();
|
||||||
|
if (line == null || line.isEmpty()) continue;
|
||||||
|
// 处理SSE格式(去除"data: "前缀)
|
||||||
|
if (line.startsWith("data: ")) {
|
||||||
|
String data = line.substring(6).trim();
|
||||||
|
// 检查是否为结束标记
|
||||||
|
if (data.equals("[DONE]")) {
|
||||||
|
listener.onComplete();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// 解析JSON获取内容
|
||||||
|
try {
|
||||||
|
JSONObject json = JSONUtil.parseObj(data);
|
||||||
|
String content = json.getJSONArray("choices")
|
||||||
|
.getJSONObject(0)
|
||||||
|
.getJSONObject("delta")
|
||||||
|
.getStr("content");
|
||||||
|
|
||||||
|
if (content != null && !content.isEmpty()) {
|
||||||
|
listener.onMessage(content);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
listener.onError(new IOException("Parse error: " + e.getMessage()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
response.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
package com.vetti.common.ai.gpt.service;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AI 流式响应监听器 服务层
|
||||||
|
*/
|
||||||
|
public interface OpenAiStreamListenerService {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 返回信息
|
||||||
|
* @param content
|
||||||
|
*/
|
||||||
|
public void onMessage(String content);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 返回完成通知
|
||||||
|
*/
|
||||||
|
public void onComplete();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 异常信息通知
|
||||||
|
* @param throwable
|
||||||
|
*/
|
||||||
|
public void onError(Throwable throwable);
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
package com.vetti.common.ai.whisper;
|
package com.vetti.common.ai.whisper;
|
||||||
|
|
||||||
import cn.hutool.core.util.StrUtil;
|
import cn.hutool.core.util.StrUtil;
|
||||||
import cn.hutool.json.JSONObject;
|
|
||||||
import cn.hutool.json.JSONUtil;
|
import cn.hutool.json.JSONUtil;
|
||||||
import okhttp3.*;
|
import okhttp3.*;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
@@ -11,6 +10,7 @@ import java.io.File;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 语音转换文本
|
* 语音转换文本
|
||||||
@@ -75,6 +75,45 @@ public class WhisperClient {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 将音频文件转换为文本
|
||||||
|
* @param bytes 语音流
|
||||||
|
* @param fileName 文件名
|
||||||
|
* @return
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
public String transcribeAudio(byte[] bytes, String fileName) throws Exception {
|
||||||
|
OkHttpClient client = new OkHttpClient.Builder()
|
||||||
|
.connectTimeout(60, TimeUnit.SECONDS)
|
||||||
|
.readTimeout(60, TimeUnit.SECONDS)
|
||||||
|
.writeTimeout(60, TimeUnit.SECONDS)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// 构建请求体,包含音频流
|
||||||
|
RequestBody requestBody = new MultipartBody.Builder()
|
||||||
|
.setType(MultipartBody.FORM)
|
||||||
|
.addFormDataPart("model", MODEL)
|
||||||
|
.addFormDataPart(
|
||||||
|
"file",
|
||||||
|
fileName,
|
||||||
|
RequestBody.create(MediaType.parse("audio/opus"), bytes)
|
||||||
|
)
|
||||||
|
.addFormDataPart("response_format", "json")
|
||||||
|
.build();
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(API_URL)
|
||||||
|
.header("Authorization", "Bearer " + API_URL)
|
||||||
|
.post(requestBody)
|
||||||
|
.build();
|
||||||
|
try (Response response = client.newCall(request).execute()) {
|
||||||
|
if (!response.isSuccessful()) {
|
||||||
|
throw new IOException("Unexpected response: " + response);
|
||||||
|
}
|
||||||
|
return response.body().string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 处理语音转换成文字
|
* 处理语音转换成文字
|
||||||
*
|
*
|
||||||
|
|||||||
Reference in New Issue
Block a user