STT流式输入业务逻辑处理
This commit is contained in:
@@ -0,0 +1,165 @@
|
||||
package com.vetti.common.ai.whisper;
|
||||
|
||||
import cn.hutool.json.JSONObject;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.java_websocket.client.WebSocketClient;
|
||||
import org.java_websocket.handshake.ServerHandshake;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Base64;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
@Slf4j
|
||||
public class OpenAIRealtimeClient extends WebSocketClient {
|
||||
|
||||
// 构造方法:初始化连接地址和请求头(携带认证信息)
|
||||
public OpenAIRealtimeClient(String apiKey) throws URISyntaxException {
|
||||
super(
|
||||
new URI("wss://api.openai.com/v1/realtime?intent=transcription"),
|
||||
buildHeaders(apiKey) // 构建请求头
|
||||
);
|
||||
}
|
||||
|
||||
// 构建请求头(携带认证和内容类型)
|
||||
private static Map<String, String> buildHeaders(String apiKey) {
|
||||
Map<String, String> headers = new HashMap<>();
|
||||
headers.put("Authorization", "Bearer " + apiKey); // 核心认证头
|
||||
headers.put("Content-Type", "application/json"); // 根据接口要求调整
|
||||
headers.put("OpenAI-Beta", "realtime=v1"); // 若接口要求 beta 版本标识
|
||||
return headers;
|
||||
}
|
||||
|
||||
// 连接成功回调
|
||||
@Override
|
||||
public void onOpen(ServerHandshake handshakedata) {
|
||||
System.out.println("WebSocket 连接已打开,状态码:" + handshakedata.getHttpStatus());
|
||||
// 连接成功后可发送初始化消息(如配置转录参数)
|
||||
sendInitMessage();
|
||||
}
|
||||
|
||||
// 接收服务器消息回调(处理转录结果)
|
||||
@Override
|
||||
public void onMessage(String message) {
|
||||
System.out.println("收到转录文本:" + message);
|
||||
// 解析 JSON 格式的转录结果(可使用 Jackson/Gson 等库)
|
||||
}
|
||||
|
||||
// 接收二进制消息(若服务器返回二进制数据,如音频片段确认)
|
||||
@Override
|
||||
public void onMessage(ByteBuffer bytes) {
|
||||
System.out.println("收到二进制数据,长度:" + bytes.remaining());
|
||||
// 处理二进制消息(如需)
|
||||
}
|
||||
|
||||
// 连接关闭回调
|
||||
@Override
|
||||
public void onClose(int code, String reason, boolean remote) {
|
||||
System.out.println("连接关闭,状态码:" + code + ",原因:" + reason);
|
||||
}
|
||||
|
||||
// 连接错误回调
|
||||
@Override
|
||||
public void onError(Exception ex) {
|
||||
System.err.println("连接错误:" + ex.getMessage());
|
||||
ex.printStackTrace();
|
||||
}
|
||||
|
||||
// 发送初始化消息(根据 OpenAI 接口要求配置转录参数)
|
||||
private void sendInitMessage() {
|
||||
JSONObject config = new JSONObject();
|
||||
JSONObject sessionConfig = new JSONObject();
|
||||
JSONObject transcription = new JSONObject();
|
||||
JSONObject turnDetection = new JSONObject();
|
||||
|
||||
// 配置转录参数
|
||||
transcription.put("model", "gpt-4o-mini-transcribe");
|
||||
transcription.put("language", "zh"); // 中文
|
||||
|
||||
// 配置断句检测
|
||||
turnDetection.put("type", "server_vad");
|
||||
turnDetection.put("prefix_padding_ms", 300);
|
||||
turnDetection.put("silence_duration_ms", 10);
|
||||
|
||||
// 组装完整配置
|
||||
sessionConfig.put("input_audio_transcription", transcription);
|
||||
sessionConfig.put("turn_detection", turnDetection);
|
||||
config.put("type", "transcription_session.update");
|
||||
config.put("session", sessionConfig);
|
||||
|
||||
this.send(config.toString());
|
||||
System.out.println("已发送初始化配置");
|
||||
}
|
||||
|
||||
// 发送音频数据(核心:将麦克风/文件的音频流发送到服务器)
|
||||
public void sendAudioData(byte[] audioBytes) {
|
||||
if (this.isOpen()) {
|
||||
// 按接口要求封装音频数据(通常为 JSON 包裹二进制,或直接发送二进制)
|
||||
// OpenAI要求语音数据以Base64编码发送
|
||||
String base64Chunk = Base64.getEncoder().encodeToString(audioBytes);
|
||||
String audioJson = "{\n" +
|
||||
" \"type\": \"input_audio_buffer.append\",\n" +
|
||||
" \"audio\": \""+base64Chunk+"\"\n" +
|
||||
"}";
|
||||
this.send(audioJson);
|
||||
// this.send(audioBytes);
|
||||
System.out.println("已发送音频数据,长度:" + audioBytes.length);
|
||||
} else {
|
||||
System.err.println("连接未打开,无法发送音频");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void commitData() {
|
||||
String base64Chunk = Base64.getEncoder().encodeToString(new byte[0]);
|
||||
String audioJson = "{\n" +
|
||||
" \"type\": \"input_audio_buffer.append\",\n" +
|
||||
" \"audio\": \""+base64Chunk+"\"\n" +
|
||||
"}";
|
||||
this.send(audioJson);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
String apiKey = "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"; // 替换为你的 OpenAI API Key
|
||||
|
||||
try {
|
||||
// 创建客户端
|
||||
OpenAIRealtimeClient client = new OpenAIRealtimeClient(apiKey);
|
||||
// 连接服务器
|
||||
client.connectBlocking(); // 阻塞式连接(也可使用非阻塞 connect())
|
||||
|
||||
// 模拟发送音频数据(实际应从麦克风或文件读取)
|
||||
// 注意:音频格式需符合接口要求(通常为 PCM 16kHz 单声道等)
|
||||
// 读取本地PCM文件(16kHz单声道16位)并分片发送
|
||||
try (java.io.FileInputStream fis = new java.io.FileInputStream("/Users/wangxiangshun/Desktop/临时文件/output1112.mp3")) {
|
||||
byte[] buffer = new byte[6400]; // 200ms的PCM数据(16000Hz*16位*1声道=32000字节/秒 → 6400字节/200ms)
|
||||
int len;
|
||||
while ((len = fis.read(buffer)) != -1) {
|
||||
byte[] chunk = new byte[len];
|
||||
System.arraycopy(buffer, 0, chunk, 0, len);
|
||||
client.sendAudioData(chunk);
|
||||
Thread.sleep(200); // 模拟实时流(每200ms发送一次)
|
||||
}
|
||||
}
|
||||
|
||||
//发送一个空的二进制流
|
||||
// 4. 发送结束标记(空二进制消息)
|
||||
client.sendAudioData(new byte[600]);
|
||||
|
||||
// 等待转录完成(实际场景需根据业务逻辑控制)
|
||||
Thread.sleep(20000);
|
||||
|
||||
// 关闭连接
|
||||
client.close();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
// 模拟读取音频数据(实际需用音频库采集,如 Java Sound API 或 VLCJ)
|
||||
private static byte[] readAudioFromSource() {
|
||||
// 示例:返回空字节数组(实际应填充真实音频数据)
|
||||
return new byte[1024];
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
package com.vetti.common.ai.whisper;
|
||||
|
||||
import cn.hutool.json.JSONObject;
|
||||
import okhttp3.*;
|
||||
|
||||
import javax.sound.sampled.AudioFormat;
|
||||
import javax.sound.sampled.AudioSystem;
|
||||
import javax.sound.sampled.DataLine;
|
||||
import javax.sound.sampled.TargetDataLine;
|
||||
import java.util.Base64;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
public class RealtimeTranscriptionMicrophone {
|
||||
|
||||
private static final String API_KEY = "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";
|
||||
private static final String URL = "wss://api.openai.com/v1/realtime?intent=transcription";
|
||||
private static final int SAMPLE_RATE = 16000; // 16kHz
|
||||
private static final int BUFFER_SIZE = 4096; // 4 KB 每次读取
|
||||
private static final int BITS_PER_SAMPLE = 16; // 每样本 16 位
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
OkHttpClient client = new OkHttpClient();
|
||||
CountDownLatch latch = new CountDownLatch(1);
|
||||
|
||||
// 设置 WebSocket 请求
|
||||
Request request = new Request.Builder()
|
||||
.url(URL)
|
||||
.addHeader("Authorization", "Bearer " + API_KEY)
|
||||
.addHeader("OpenAI-Beta", "realtime=v1")
|
||||
.build();
|
||||
|
||||
WebSocket ws = client.newWebSocket(request, new WebSocketListener() {
|
||||
@Override
|
||||
public void onOpen(WebSocket webSocket, Response response) {
|
||||
System.out.println("✅ WebSocket 连接成功");
|
||||
|
||||
//发送配置
|
||||
JSONObject config = new JSONObject();
|
||||
JSONObject sessionConfig = new JSONObject();
|
||||
JSONObject transcription = new JSONObject();
|
||||
JSONObject turnDetection = new JSONObject();
|
||||
|
||||
// 配置转录参数
|
||||
transcription.put("model", "gpt-4o-mini-transcribe");
|
||||
transcription.put("language", "zh"); // 中文
|
||||
// 配置断句检测
|
||||
turnDetection.put("type", "server_vad");
|
||||
turnDetection.put("prefix_padding_ms", 300);
|
||||
turnDetection.put("silence_duration_ms", 10);
|
||||
// 组装完整配置
|
||||
sessionConfig.put("input_audio_transcription", transcription);
|
||||
sessionConfig.put("turn_detection", turnDetection);
|
||||
config.put("type", "transcription_session.update");
|
||||
config.put("session", sessionConfig);
|
||||
|
||||
webSocket.send(config.toString());
|
||||
|
||||
// 1. 启动音频缓冲
|
||||
webSocket.send("{\"type\": \"input_audio_buffer.start\"}");
|
||||
|
||||
// 2. 开始录音并实时发送
|
||||
new Thread(() -> {
|
||||
try {
|
||||
// 设置麦克风输入流
|
||||
AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
|
||||
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
|
||||
TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
|
||||
line.open(format);
|
||||
line.start();
|
||||
|
||||
byte[] buffer = new byte[BUFFER_SIZE];
|
||||
int bytesRead;
|
||||
while ((bytesRead = line.read(buffer, 0, buffer.length)) > 0) {
|
||||
// 将音频数据转换为 Base64 编码的字符串
|
||||
byte[] audioData = new byte[bytesRead];
|
||||
System.arraycopy(buffer, 0, audioData, 0, bytesRead);
|
||||
String base64Audio = Base64.getEncoder().encodeToString(audioData);
|
||||
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||
webSocket.send(message);
|
||||
}
|
||||
|
||||
// 3. 提交音频并请求转录
|
||||
webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
|
||||
webSocket.send("{\"type\": \"response.create\"}");
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}).start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onMessage(WebSocket webSocket, String text) {
|
||||
System.out.println("📩 收到转录结果: " + text);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(WebSocket webSocket, Throwable t, Response response) {
|
||||
System.err.println("❌ 连接失败: " + t.getMessage());
|
||||
latch.countDown();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onClosing(WebSocket webSocket, int code, String reason) {
|
||||
System.out.println("⚠️ 连接即将关闭: " + reason);
|
||||
webSocket.close(1000, null);
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
|
||||
// 等待 WebSocket 关闭
|
||||
latch.await();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user