STT 数据流处理
This commit is contained in:
@@ -14,6 +14,7 @@ import okhttp3.*;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import javax.sound.sampled.AudioFormat;
|
||||
import javax.sound.sampled.AudioInputStream;
|
||||
import javax.sound.sampled.AudioSystem;
|
||||
import javax.websocket.*;
|
||||
@@ -21,10 +22,7 @@ import javax.websocket.server.PathParam;
|
||||
import javax.websocket.server.ServerEndpoint;
|
||||
import java.io.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
@@ -200,51 +198,6 @@ public class ChatWebSocketHandler {
|
||||
}
|
||||
}
|
||||
|
||||
// // 接收二进制消息(流数据)
|
||||
// @OnMessage
|
||||
// public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
|
||||
// log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||
// log.info("客户端ID为:{}", clientId);
|
||||
// // 处理二进制流数据
|
||||
// byte[] bytes = new byte[byteBuffer.remaining()];
|
||||
// //从缓冲区中读取数据并存储到指定的字节数组中
|
||||
// byteBuffer.get(bytes);
|
||||
// log.info("2、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||
// // 生成唯一文件名
|
||||
// String fileName = clientId + "_" + System.currentTimeMillis() + ".wav";
|
||||
// String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName;
|
||||
// log.info("文件路径为:{}", pathUrl);
|
||||
// log.info("3、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||
// try{
|
||||
// log.info("文件流的大小为:{}",bytes.length);
|
||||
// saveAsWebM(bytes,pathUrl);
|
||||
// //接收到数据流后直接就进行SST处理
|
||||
// //发送消息
|
||||
// WebSocket webSocket = cacheWebSocket.get(clientId);
|
||||
// log.info("获取的socket对象为:{}",webSocket);
|
||||
// if(webSocket != null){
|
||||
//// 1. 启动音频缓冲
|
||||
//// webSocket.send("{\"type\": \"input_audio_buffer.start\"}");
|
||||
// log.info("3.1 开始发送数据音频流啦");
|
||||
// // 将音频数据转换为 Base64 编码的字符串
|
||||
// //进行转换
|
||||
// // 转换音频格式
|
||||
// AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
|
||||
// byte[] outputAudioBytes = convertAudio(bytes, format);
|
||||
// String base64Audio = Base64.getEncoder().encodeToString(outputAudioBytes);
|
||||
// String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||
// webSocket.send(message);
|
||||
// log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||
// // 3. 提交音频并请求转录
|
||||
//// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
|
||||
//// webSocket.send("{\"type\": \"response.create\"}");
|
||||
// }
|
||||
// }catch (Exception e){
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
//
|
||||
// }
|
||||
|
||||
// 接收二进制消息(流数据)
|
||||
@OnMessage
|
||||
public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
|
||||
@@ -254,17 +207,66 @@ public class ChatWebSocketHandler {
|
||||
byte[] bytes = new byte[byteBuffer.remaining()];
|
||||
//从缓冲区中读取数据并存储到指定的字节数组中
|
||||
byteBuffer.get(bytes);
|
||||
|
||||
// 1. 获取当前会话的缓存
|
||||
List<byte[]> fragments = fragmentCache.get(clientId);
|
||||
if (fragments == null) {
|
||||
fragments = new ArrayList<>();
|
||||
fragmentCache.put(clientId, fragments);
|
||||
log.info("2、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||
// 生成唯一文件名
|
||||
String fileName = clientId + "_" + System.currentTimeMillis() + ".wav";
|
||||
String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName;
|
||||
log.info("文件路径为:{}", pathUrl);
|
||||
log.info("3、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||
try{
|
||||
log.info("文件流的大小为:{}",bytes.length);
|
||||
saveAsWebM(bytes,pathUrl);
|
||||
//接收到数据流后直接就进行SST处理
|
||||
//语音格式转换
|
||||
String fileOutName = clientId + "_" + System.currentTimeMillis() + ".pcm";
|
||||
String pathOutUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileOutName;
|
||||
handleAudioToPCM(pathUrl,pathOutUrl);
|
||||
//发送消息
|
||||
WebSocket webSocket = cacheWebSocket.get(clientId);
|
||||
log.info("获取的socket对象为:{}",webSocket);
|
||||
if(webSocket != null){
|
||||
// 1. 启动音频缓冲
|
||||
// webSocket.send("{\"type\": \"input_audio_buffer.start\"}");
|
||||
log.info("3.1 开始发送数据音频流啦");
|
||||
File outputFile = new File(pathOutUrl); // 输出PCM格式文件
|
||||
ByteBuffer buffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile));
|
||||
byte[] outBytes = new byte[buffer.remaining()];
|
||||
//从缓冲区中读取数据并存储到指定的字节数组中
|
||||
buffer.get(outBytes);
|
||||
String base64Audio = Base64.getEncoder().encodeToString(outBytes);
|
||||
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||
webSocket.send(message);
|
||||
log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||
// 3. 提交音频并请求转录
|
||||
// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
|
||||
// webSocket.send("{\"type\": \"response.create\"}");
|
||||
}
|
||||
}catch (Exception e){
|
||||
e.printStackTrace();
|
||||
}
|
||||
fragments.add(bytes);
|
||||
fragmentCache.put(clientId, fragments);
|
||||
|
||||
}
|
||||
|
||||
// // 接收二进制消息(流数据)
|
||||
// @OnMessage
|
||||
// public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
|
||||
// log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||
// log.info("客户端ID为:{}", clientId);
|
||||
// // 处理二进制流数据
|
||||
// byte[] bytes = new byte[byteBuffer.remaining()];
|
||||
// //从缓冲区中读取数据并存储到指定的字节数组中
|
||||
// byteBuffer.get(bytes);
|
||||
//
|
||||
// // 1. 获取当前会话的缓存
|
||||
// List<byte[]> fragments = fragmentCache.get(clientId);
|
||||
// if (fragments == null) {
|
||||
// fragments = new ArrayList<>();
|
||||
// fragmentCache.put(clientId, fragments);
|
||||
// }
|
||||
// fragments.add(bytes);
|
||||
// fragmentCache.put(clientId, fragments);
|
||||
// }
|
||||
|
||||
// 连接关闭时调用
|
||||
@OnClose
|
||||
public void onClose(Session session, CloseReason reason) {
|
||||
@@ -278,28 +280,6 @@ public class ChatWebSocketHandler {
|
||||
throwable.printStackTrace();
|
||||
}
|
||||
|
||||
// public static byte[] convertAudio(byte[] inputAudioBytes, AudioFormat targetFormat) throws Exception {
|
||||
// // 将 byte[] 转换为 AudioInputStream
|
||||
// ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(inputAudioBytes);
|
||||
// AudioInputStream inputAudioStream = new AudioInputStream(byteArrayInputStream, targetFormat, inputAudioBytes.length);
|
||||
//
|
||||
// // 创建目标格式的 AudioInputStream
|
||||
// AudioInputStream outputAudioStream = AudioSystem.getAudioInputStream(targetFormat, inputAudioStream);
|
||||
//
|
||||
// // 获取输出音频的 byte[]
|
||||
// ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
|
||||
// byte[] buffer = new byte[1024];
|
||||
// int bytesRead;
|
||||
//
|
||||
// // 从 AudioInputStream 读取数据并写入 ByteArrayOutputStream
|
||||
// while ((bytesRead = outputAudioStream.read(buffer)) != -1) {
|
||||
// byteArrayOutputStream.write(buffer, 0, bytesRead);
|
||||
// }
|
||||
//
|
||||
// // 返回转换后的 byte[]
|
||||
// return byteArrayOutputStream.toByteArray();
|
||||
// }
|
||||
|
||||
/**
|
||||
* 将字节数组保存为WebM文件
|
||||
*
|
||||
@@ -466,6 +446,49 @@ public class ChatWebSocketHandler {
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 语音流文件格式转换
|
||||
* @param pathUrl
|
||||
* @param outPathUrl
|
||||
*/
|
||||
private void handleAudioToPCM(String pathUrl,String outPathUrl){
|
||||
File inputFile = new File(pathUrl); // 输入音频文件
|
||||
File outputFile = new File(outPathUrl); // 输出PCM格式文件
|
||||
try {
|
||||
// 读取音频文件
|
||||
AudioInputStream inputAudioStream = AudioSystem.getAudioInputStream(inputFile);
|
||||
// 获取音频文件的格式信息
|
||||
AudioFormat sourceFormat = inputAudioStream.getFormat();
|
||||
System.out.println("Input Audio Format: " + sourceFormat);
|
||||
// 设置目标PCM格式 (可以是16-bit, 8kHz, Mono, Linear PCM)
|
||||
AudioFormat pcmFormat = new AudioFormat(
|
||||
AudioFormat.Encoding.PCM_SIGNED,
|
||||
sourceFormat.getSampleRate(),
|
||||
16, // 16-bit samples
|
||||
1, // 单声道
|
||||
2, // 每个样本2字节(16位)
|
||||
sourceFormat.getSampleRate(),
|
||||
false // 大端模式
|
||||
);
|
||||
// 获取PCM格式的音频流
|
||||
AudioInputStream pcmAudioStream = AudioSystem.getAudioInputStream(pcmFormat, inputAudioStream);
|
||||
// 创建输出文件流
|
||||
FileOutputStream fos = new FileOutputStream(outputFile);
|
||||
byte[] buffer = new byte[1024];
|
||||
int bytesRead;
|
||||
// 将PCM音频数据写入输出文件
|
||||
while ((bytesRead = pcmAudioStream.read(buffer)) != -1) {
|
||||
fos.write(buffer, 0, bytesRead);
|
||||
}
|
||||
// 关闭流
|
||||
pcmAudioStream.close();
|
||||
fos.close();
|
||||
System.out.println("Audio has been converted to PCM format and saved at: " + outputFile.getAbsolutePath());
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
package com.vetti.common.ai.whisper;
|
||||
|
||||
import javax.sound.sampled.*;
|
||||
import java.io.*;
|
||||
|
||||
public class AudioToPCM {
|
||||
public static void main(String[] args) {
|
||||
File inputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/buffer.wav"); // 输入音频文件
|
||||
File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio.pcm"); // 输出PCM格式文件
|
||||
|
||||
try {
|
||||
// 读取音频文件
|
||||
AudioInputStream inputAudioStream = AudioSystem.getAudioInputStream(inputFile);
|
||||
|
||||
// 获取音频文件的格式信息
|
||||
AudioFormat sourceFormat = inputAudioStream.getFormat();
|
||||
System.out.println("Input Audio Format: " + sourceFormat);
|
||||
|
||||
// 设置目标PCM格式 (可以是16-bit, 8kHz, Mono, Linear PCM)
|
||||
AudioFormat pcmFormat = new AudioFormat(
|
||||
AudioFormat.Encoding.PCM_SIGNED,
|
||||
sourceFormat.getSampleRate(),
|
||||
16, // 16-bit samples
|
||||
1, // 单声道
|
||||
2, // 每个样本2字节(16位)
|
||||
sourceFormat.getSampleRate(),
|
||||
false // 大端模式
|
||||
);
|
||||
|
||||
// 获取PCM格式的音频流
|
||||
AudioInputStream pcmAudioStream = AudioSystem.getAudioInputStream(pcmFormat, inputAudioStream);
|
||||
|
||||
// 创建输出文件流
|
||||
FileOutputStream fos = new FileOutputStream(outputFile);
|
||||
byte[] buffer = new byte[1024];
|
||||
int bytesRead;
|
||||
|
||||
// 将PCM音频数据写入输出文件
|
||||
while ((bytesRead = pcmAudioStream.read(buffer)) != -1) {
|
||||
fos.write(buffer, 0, bytesRead);
|
||||
}
|
||||
|
||||
// 关闭流
|
||||
pcmAudioStream.close();
|
||||
fos.close();
|
||||
|
||||
System.out.println("Audio has been converted to PCM format and saved at: " + outputFile.getAbsolutePath());
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,11 +2,14 @@ package com.vetti.common.ai.whisper;
|
||||
|
||||
import cn.hutool.json.JSONObject;
|
||||
import okhttp3.*;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
|
||||
import javax.sound.sampled.AudioFormat;
|
||||
import javax.sound.sampled.AudioSystem;
|
||||
import javax.sound.sampled.DataLine;
|
||||
import javax.sound.sampled.TargetDataLine;
|
||||
import java.io.File;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Base64;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
@@ -63,23 +66,30 @@ public class RealtimeTranscriptionMicrophone {
|
||||
new Thread(() -> {
|
||||
try {
|
||||
// 设置麦克风输入流
|
||||
AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
|
||||
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
|
||||
TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
|
||||
line.open(format);
|
||||
line.start();
|
||||
|
||||
byte[] buffer = new byte[BUFFER_SIZE];
|
||||
int bytesRead;
|
||||
while ((bytesRead = line.read(buffer, 0, buffer.length)) > 0) {
|
||||
// 将音频数据转换为 Base64 编码的字符串
|
||||
byte[] audioData = new byte[bytesRead];
|
||||
System.arraycopy(buffer, 0, audioData, 0, bytesRead);
|
||||
String base64Audio = Base64.getEncoder().encodeToString(audioData);
|
||||
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||
webSocket.send(message);
|
||||
}
|
||||
|
||||
// AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
|
||||
// DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
|
||||
// TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
|
||||
// line.open(format);
|
||||
// line.start();
|
||||
//
|
||||
// byte[] buffer = new byte[BUFFER_SIZE];
|
||||
// int bytesRead;
|
||||
// while ((bytesRead = line.read(buffer, 0, buffer.length)) > 0) {
|
||||
// // 将音频数据转换为 Base64 编码的字符串
|
||||
// byte[] audioData = new byte[bytesRead];
|
||||
// System.arraycopy(buffer, 0, audioData, 0, bytesRead);
|
||||
// String base64Audio = Base64.getEncoder().encodeToString(audioData);
|
||||
// String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||
// webSocket.send(message);
|
||||
// }
|
||||
File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio.pcm"); // 输出PCM格式文件
|
||||
ByteBuffer byteBuffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile));
|
||||
byte[] bytes = new byte[byteBuffer.remaining()];
|
||||
//从缓冲区中读取数据并存储到指定的字节数组中
|
||||
byteBuffer.get(bytes);
|
||||
String base64Audio = Base64.getEncoder().encodeToString(bytes);
|
||||
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||
webSocket.send(message);
|
||||
// 3. 提交音频并请求转录
|
||||
// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
|
||||
// webSocket.send("{\"type\": \"response.create\"}");
|
||||
|
||||
Reference in New Issue
Block a user