语音AI分片以及流式处理返回结果

This commit is contained in:
2025-10-16 14:27:20 +08:00
parent 1c816402df
commit a5777d7beb
6 changed files with 285 additions and 56 deletions

View File

@@ -106,8 +106,7 @@ public class ElevenLabsClient {
VoicesResponse voicesResponse = getVoices(httpClient);
if (voicesResponse != null && voicesResponse.getVoices() != null
&& voicesResponse.getVoices().length > 0) {
// 使用第一个可用语音进行文本转语音
// String firstVoiceId = voicesResponse.getVoices()[0].getVoice_id();
// 使用第一个可用语音进行文本转语音(澳洲本地女声)
String firstVoiceId = "21m00Tcm4TlvDq8ikWAM";
textToSpeech(inputText, firstVoiceId, outputFile,httpClient);
}

View File

@@ -0,0 +1,121 @@
package com.vetti.common.ai.gpt;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import com.vetti.common.ai.gpt.service.OpenAiStreamListenerService;
import okhttp3.*;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* AI ChatGPT 对话
*/
@Component
public class OpenAiStreamClient {
@Value("${chatGpt.apiKey}")
private String apiKey;
// API端点URL
@Value("${chatGpt.apiUrl}")
private String apiUrl;
@Value("${chatGpt.model}")
private String model;
@Value("${chatGpt.role}")
private String role;
/**
* 发送流式请求
*
* @param prompt 提示词
* @param listener 流式响应监听器
*/
public void streamChat(String prompt, OpenAiStreamListenerService listener) {
OkHttpClient client = new OkHttpClient.Builder()
.connectTimeout(30, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(30, TimeUnit.SECONDS)
.build();
// 构建请求参数
Map<String, Object> requestBody = new HashMap<>();
requestBody.put("model", model);
requestBody.put("stream", true);
// 构建消息
Map<String, String> message = new HashMap<>();
message.put("role", role);
message.put("content", prompt);
requestBody.put("messages", new Object[]{message});
// 创建请求
Request request = new Request.Builder()
.url(apiUrl)
.addHeader("Content-Type", "application/json")
.addHeader("Authorization", "Bearer " + apiKey)
.post(RequestBody.create(JSONUtil.toJsonStr(requestBody),
MediaType.parse("application/json; charset=utf-8")
))
.build();
// 发送异步请求
client.newCall(request).enqueue(new Callback() {
@Override
public void onFailure(Call call, IOException e) {
listener.onError(e);
}
@Override
public void onResponse(Call call, Response response) throws IOException {
if (!response.isSuccessful()) {
listener.onError(new IOException("Unexpected code " + response));
return;
}
// 处理流式响应
ResponseBody body = response.body();
if (body == null) {
listener.onError(new IOException("Response body is null"));
return;
}
try (var source = body.source()) {
// 逐行读取响应
while (!source.exhausted()) {
String line = source.readUtf8Line();
if (line == null || line.isEmpty()) continue;
// 处理SSE格式去除"data: "前缀)
if (line.startsWith("data: ")) {
String data = line.substring(6).trim();
// 检查是否为结束标记
if (data.equals("[DONE]")) {
listener.onComplete();
break;
}
// 解析JSON获取内容
try {
JSONObject json = JSONUtil.parseObj(data);
String content = json.getJSONArray("choices")
.getJSONObject(0)
.getJSONObject("delta")
.getStr("content");
if (content != null && !content.isEmpty()) {
listener.onMessage(content);
}
} catch (Exception e) {
listener.onError(new IOException("Parse error: " + e.getMessage()));
}
}
}
} finally {
response.close();
}
}
});
}
}

View File

@@ -0,0 +1,25 @@
package com.vetti.common.ai.gpt.service;
/**
* AI 流式响应监听器 服务层
*/
public interface OpenAiStreamListenerService {
/**
* 返回信息
* @param content
*/
public void onMessage(String content);
/**
* 返回完成通知
*/
public void onComplete();
/**
* 异常信息通知
* @param throwable
*/
public void onError(Throwable throwable);
}

View File

@@ -1,7 +1,6 @@
package com.vetti.common.ai.whisper;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import okhttp3.*;
import org.springframework.beans.factory.annotation.Value;
@@ -11,6 +10,7 @@ import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* 语音转换文本
@@ -75,6 +75,45 @@ public class WhisperClient {
}
}
/**
* 将音频文件转换为文本
* @param bytes 语音流
* @param fileName 文件名
* @return
* @throws Exception
*/
public String transcribeAudio(byte[] bytes, String fileName) throws Exception {
OkHttpClient client = new OkHttpClient.Builder()
.connectTimeout(60, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(60, TimeUnit.SECONDS)
.build();
// 构建请求体,包含音频流
RequestBody requestBody = new MultipartBody.Builder()
.setType(MultipartBody.FORM)
.addFormDataPart("model", MODEL)
.addFormDataPart(
"file",
fileName,
RequestBody.create(MediaType.parse("audio/opus"), bytes)
)
.addFormDataPart("response_format", "json")
.build();
Request request = new Request.Builder()
.url(API_URL)
.header("Authorization", "Bearer " + API_URL)
.post(requestBody)
.build();
try (Response response = client.newCall(request).execute()) {
if (!response.isSuccessful()) {
throw new IOException("Unexpected response: " + response);
}
return response.body().string();
}
}
/**
* 处理语音转换成文字
*