STT 数据流处理
This commit is contained in:
@@ -14,6 +14,7 @@ import okhttp3.*;
|
|||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import javax.sound.sampled.AudioFormat;
|
||||||
import javax.sound.sampled.AudioInputStream;
|
import javax.sound.sampled.AudioInputStream;
|
||||||
import javax.sound.sampled.AudioSystem;
|
import javax.sound.sampled.AudioSystem;
|
||||||
import javax.websocket.*;
|
import javax.websocket.*;
|
||||||
@@ -21,10 +22,7 @@ import javax.websocket.server.PathParam;
|
|||||||
import javax.websocket.server.ServerEndpoint;
|
import javax.websocket.server.ServerEndpoint;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -200,51 +198,6 @@ public class ChatWebSocketHandler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// // 接收二进制消息(流数据)
|
|
||||||
// @OnMessage
|
|
||||||
// public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
|
|
||||||
// log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
|
||||||
// log.info("客户端ID为:{}", clientId);
|
|
||||||
// // 处理二进制流数据
|
|
||||||
// byte[] bytes = new byte[byteBuffer.remaining()];
|
|
||||||
// //从缓冲区中读取数据并存储到指定的字节数组中
|
|
||||||
// byteBuffer.get(bytes);
|
|
||||||
// log.info("2、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
|
||||||
// // 生成唯一文件名
|
|
||||||
// String fileName = clientId + "_" + System.currentTimeMillis() + ".wav";
|
|
||||||
// String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName;
|
|
||||||
// log.info("文件路径为:{}", pathUrl);
|
|
||||||
// log.info("3、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
|
||||||
// try{
|
|
||||||
// log.info("文件流的大小为:{}",bytes.length);
|
|
||||||
// saveAsWebM(bytes,pathUrl);
|
|
||||||
// //接收到数据流后直接就进行SST处理
|
|
||||||
// //发送消息
|
|
||||||
// WebSocket webSocket = cacheWebSocket.get(clientId);
|
|
||||||
// log.info("获取的socket对象为:{}",webSocket);
|
|
||||||
// if(webSocket != null){
|
|
||||||
//// 1. 启动音频缓冲
|
|
||||||
//// webSocket.send("{\"type\": \"input_audio_buffer.start\"}");
|
|
||||||
// log.info("3.1 开始发送数据音频流啦");
|
|
||||||
// // 将音频数据转换为 Base64 编码的字符串
|
|
||||||
// //进行转换
|
|
||||||
// // 转换音频格式
|
|
||||||
// AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
|
|
||||||
// byte[] outputAudioBytes = convertAudio(bytes, format);
|
|
||||||
// String base64Audio = Base64.getEncoder().encodeToString(outputAudioBytes);
|
|
||||||
// String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
|
||||||
// webSocket.send(message);
|
|
||||||
// log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
|
||||||
// // 3. 提交音频并请求转录
|
|
||||||
//// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
|
|
||||||
//// webSocket.send("{\"type\": \"response.create\"}");
|
|
||||||
// }
|
|
||||||
// }catch (Exception e){
|
|
||||||
// e.printStackTrace();
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
|
|
||||||
// 接收二进制消息(流数据)
|
// 接收二进制消息(流数据)
|
||||||
@OnMessage
|
@OnMessage
|
||||||
public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
|
public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
|
||||||
@@ -254,17 +207,66 @@ public class ChatWebSocketHandler {
|
|||||||
byte[] bytes = new byte[byteBuffer.remaining()];
|
byte[] bytes = new byte[byteBuffer.remaining()];
|
||||||
//从缓冲区中读取数据并存储到指定的字节数组中
|
//从缓冲区中读取数据并存储到指定的字节数组中
|
||||||
byteBuffer.get(bytes);
|
byteBuffer.get(bytes);
|
||||||
|
log.info("2、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||||
// 1. 获取当前会话的缓存
|
// 生成唯一文件名
|
||||||
List<byte[]> fragments = fragmentCache.get(clientId);
|
String fileName = clientId + "_" + System.currentTimeMillis() + ".wav";
|
||||||
if (fragments == null) {
|
String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName;
|
||||||
fragments = new ArrayList<>();
|
log.info("文件路径为:{}", pathUrl);
|
||||||
fragmentCache.put(clientId, fragments);
|
log.info("3、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||||
|
try{
|
||||||
|
log.info("文件流的大小为:{}",bytes.length);
|
||||||
|
saveAsWebM(bytes,pathUrl);
|
||||||
|
//接收到数据流后直接就进行SST处理
|
||||||
|
//语音格式转换
|
||||||
|
String fileOutName = clientId + "_" + System.currentTimeMillis() + ".pcm";
|
||||||
|
String pathOutUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileOutName;
|
||||||
|
handleAudioToPCM(pathUrl,pathOutUrl);
|
||||||
|
//发送消息
|
||||||
|
WebSocket webSocket = cacheWebSocket.get(clientId);
|
||||||
|
log.info("获取的socket对象为:{}",webSocket);
|
||||||
|
if(webSocket != null){
|
||||||
|
// 1. 启动音频缓冲
|
||||||
|
// webSocket.send("{\"type\": \"input_audio_buffer.start\"}");
|
||||||
|
log.info("3.1 开始发送数据音频流啦");
|
||||||
|
File outputFile = new File(pathOutUrl); // 输出PCM格式文件
|
||||||
|
ByteBuffer buffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile));
|
||||||
|
byte[] outBytes = new byte[buffer.remaining()];
|
||||||
|
//从缓冲区中读取数据并存储到指定的字节数组中
|
||||||
|
buffer.get(outBytes);
|
||||||
|
String base64Audio = Base64.getEncoder().encodeToString(outBytes);
|
||||||
|
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||||
|
webSocket.send(message);
|
||||||
|
log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||||
|
// 3. 提交音频并请求转录
|
||||||
|
// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
|
||||||
|
// webSocket.send("{\"type\": \"response.create\"}");
|
||||||
|
}
|
||||||
|
}catch (Exception e){
|
||||||
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
fragments.add(bytes);
|
|
||||||
fragmentCache.put(clientId, fragments);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// // 接收二进制消息(流数据)
|
||||||
|
// @OnMessage
|
||||||
|
// public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
|
||||||
|
// log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
|
||||||
|
// log.info("客户端ID为:{}", clientId);
|
||||||
|
// // 处理二进制流数据
|
||||||
|
// byte[] bytes = new byte[byteBuffer.remaining()];
|
||||||
|
// //从缓冲区中读取数据并存储到指定的字节数组中
|
||||||
|
// byteBuffer.get(bytes);
|
||||||
|
//
|
||||||
|
// // 1. 获取当前会话的缓存
|
||||||
|
// List<byte[]> fragments = fragmentCache.get(clientId);
|
||||||
|
// if (fragments == null) {
|
||||||
|
// fragments = new ArrayList<>();
|
||||||
|
// fragmentCache.put(clientId, fragments);
|
||||||
|
// }
|
||||||
|
// fragments.add(bytes);
|
||||||
|
// fragmentCache.put(clientId, fragments);
|
||||||
|
// }
|
||||||
|
|
||||||
// 连接关闭时调用
|
// 连接关闭时调用
|
||||||
@OnClose
|
@OnClose
|
||||||
public void onClose(Session session, CloseReason reason) {
|
public void onClose(Session session, CloseReason reason) {
|
||||||
@@ -278,28 +280,6 @@ public class ChatWebSocketHandler {
|
|||||||
throwable.printStackTrace();
|
throwable.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
||||||
// public static byte[] convertAudio(byte[] inputAudioBytes, AudioFormat targetFormat) throws Exception {
|
|
||||||
// // 将 byte[] 转换为 AudioInputStream
|
|
||||||
// ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(inputAudioBytes);
|
|
||||||
// AudioInputStream inputAudioStream = new AudioInputStream(byteArrayInputStream, targetFormat, inputAudioBytes.length);
|
|
||||||
//
|
|
||||||
// // 创建目标格式的 AudioInputStream
|
|
||||||
// AudioInputStream outputAudioStream = AudioSystem.getAudioInputStream(targetFormat, inputAudioStream);
|
|
||||||
//
|
|
||||||
// // 获取输出音频的 byte[]
|
|
||||||
// ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
|
|
||||||
// byte[] buffer = new byte[1024];
|
|
||||||
// int bytesRead;
|
|
||||||
//
|
|
||||||
// // 从 AudioInputStream 读取数据并写入 ByteArrayOutputStream
|
|
||||||
// while ((bytesRead = outputAudioStream.read(buffer)) != -1) {
|
|
||||||
// byteArrayOutputStream.write(buffer, 0, bytesRead);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // 返回转换后的 byte[]
|
|
||||||
// return byteArrayOutputStream.toByteArray();
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 将字节数组保存为WebM文件
|
* 将字节数组保存为WebM文件
|
||||||
*
|
*
|
||||||
@@ -466,6 +446,49 @@ public class ChatWebSocketHandler {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 语音流文件格式转换
|
||||||
|
* @param pathUrl
|
||||||
|
* @param outPathUrl
|
||||||
|
*/
|
||||||
|
private void handleAudioToPCM(String pathUrl,String outPathUrl){
|
||||||
|
File inputFile = new File(pathUrl); // 输入音频文件
|
||||||
|
File outputFile = new File(outPathUrl); // 输出PCM格式文件
|
||||||
|
try {
|
||||||
|
// 读取音频文件
|
||||||
|
AudioInputStream inputAudioStream = AudioSystem.getAudioInputStream(inputFile);
|
||||||
|
// 获取音频文件的格式信息
|
||||||
|
AudioFormat sourceFormat = inputAudioStream.getFormat();
|
||||||
|
System.out.println("Input Audio Format: " + sourceFormat);
|
||||||
|
// 设置目标PCM格式 (可以是16-bit, 8kHz, Mono, Linear PCM)
|
||||||
|
AudioFormat pcmFormat = new AudioFormat(
|
||||||
|
AudioFormat.Encoding.PCM_SIGNED,
|
||||||
|
sourceFormat.getSampleRate(),
|
||||||
|
16, // 16-bit samples
|
||||||
|
1, // 单声道
|
||||||
|
2, // 每个样本2字节(16位)
|
||||||
|
sourceFormat.getSampleRate(),
|
||||||
|
false // 大端模式
|
||||||
|
);
|
||||||
|
// 获取PCM格式的音频流
|
||||||
|
AudioInputStream pcmAudioStream = AudioSystem.getAudioInputStream(pcmFormat, inputAudioStream);
|
||||||
|
// 创建输出文件流
|
||||||
|
FileOutputStream fos = new FileOutputStream(outputFile);
|
||||||
|
byte[] buffer = new byte[1024];
|
||||||
|
int bytesRead;
|
||||||
|
// 将PCM音频数据写入输出文件
|
||||||
|
while ((bytesRead = pcmAudioStream.read(buffer)) != -1) {
|
||||||
|
fos.write(buffer, 0, bytesRead);
|
||||||
|
}
|
||||||
|
// 关闭流
|
||||||
|
pcmAudioStream.close();
|
||||||
|
fos.close();
|
||||||
|
System.out.println("Audio has been converted to PCM format and saved at: " + outputFile.getAbsolutePath());
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,54 @@
|
|||||||
|
package com.vetti.common.ai.whisper;
|
||||||
|
|
||||||
|
import javax.sound.sampled.*;
|
||||||
|
import java.io.*;
|
||||||
|
|
||||||
|
public class AudioToPCM {
|
||||||
|
public static void main(String[] args) {
|
||||||
|
File inputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/buffer.wav"); // 输入音频文件
|
||||||
|
File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio.pcm"); // 输出PCM格式文件
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 读取音频文件
|
||||||
|
AudioInputStream inputAudioStream = AudioSystem.getAudioInputStream(inputFile);
|
||||||
|
|
||||||
|
// 获取音频文件的格式信息
|
||||||
|
AudioFormat sourceFormat = inputAudioStream.getFormat();
|
||||||
|
System.out.println("Input Audio Format: " + sourceFormat);
|
||||||
|
|
||||||
|
// 设置目标PCM格式 (可以是16-bit, 8kHz, Mono, Linear PCM)
|
||||||
|
AudioFormat pcmFormat = new AudioFormat(
|
||||||
|
AudioFormat.Encoding.PCM_SIGNED,
|
||||||
|
sourceFormat.getSampleRate(),
|
||||||
|
16, // 16-bit samples
|
||||||
|
1, // 单声道
|
||||||
|
2, // 每个样本2字节(16位)
|
||||||
|
sourceFormat.getSampleRate(),
|
||||||
|
false // 大端模式
|
||||||
|
);
|
||||||
|
|
||||||
|
// 获取PCM格式的音频流
|
||||||
|
AudioInputStream pcmAudioStream = AudioSystem.getAudioInputStream(pcmFormat, inputAudioStream);
|
||||||
|
|
||||||
|
// 创建输出文件流
|
||||||
|
FileOutputStream fos = new FileOutputStream(outputFile);
|
||||||
|
byte[] buffer = new byte[1024];
|
||||||
|
int bytesRead;
|
||||||
|
|
||||||
|
// 将PCM音频数据写入输出文件
|
||||||
|
while ((bytesRead = pcmAudioStream.read(buffer)) != -1) {
|
||||||
|
fos.write(buffer, 0, bytesRead);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 关闭流
|
||||||
|
pcmAudioStream.close();
|
||||||
|
fos.close();
|
||||||
|
|
||||||
|
System.out.println("Audio has been converted to PCM format and saved at: " + outputFile.getAbsolutePath());
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@@ -2,11 +2,14 @@ package com.vetti.common.ai.whisper;
|
|||||||
|
|
||||||
import cn.hutool.json.JSONObject;
|
import cn.hutool.json.JSONObject;
|
||||||
import okhttp3.*;
|
import okhttp3.*;
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
|
||||||
import javax.sound.sampled.AudioFormat;
|
import javax.sound.sampled.AudioFormat;
|
||||||
import javax.sound.sampled.AudioSystem;
|
import javax.sound.sampled.AudioSystem;
|
||||||
import javax.sound.sampled.DataLine;
|
import javax.sound.sampled.DataLine;
|
||||||
import javax.sound.sampled.TargetDataLine;
|
import javax.sound.sampled.TargetDataLine;
|
||||||
|
import java.io.File;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
import java.util.Base64;
|
import java.util.Base64;
|
||||||
import java.util.concurrent.CountDownLatch;
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
|
||||||
@@ -63,23 +66,30 @@ public class RealtimeTranscriptionMicrophone {
|
|||||||
new Thread(() -> {
|
new Thread(() -> {
|
||||||
try {
|
try {
|
||||||
// 设置麦克风输入流
|
// 设置麦克风输入流
|
||||||
AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
|
// AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
|
||||||
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
|
// DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
|
||||||
TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
|
// TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
|
||||||
line.open(format);
|
// line.open(format);
|
||||||
line.start();
|
// line.start();
|
||||||
|
//
|
||||||
byte[] buffer = new byte[BUFFER_SIZE];
|
// byte[] buffer = new byte[BUFFER_SIZE];
|
||||||
int bytesRead;
|
// int bytesRead;
|
||||||
while ((bytesRead = line.read(buffer, 0, buffer.length)) > 0) {
|
// while ((bytesRead = line.read(buffer, 0, buffer.length)) > 0) {
|
||||||
// 将音频数据转换为 Base64 编码的字符串
|
// // 将音频数据转换为 Base64 编码的字符串
|
||||||
byte[] audioData = new byte[bytesRead];
|
// byte[] audioData = new byte[bytesRead];
|
||||||
System.arraycopy(buffer, 0, audioData, 0, bytesRead);
|
// System.arraycopy(buffer, 0, audioData, 0, bytesRead);
|
||||||
String base64Audio = Base64.getEncoder().encodeToString(audioData);
|
// String base64Audio = Base64.getEncoder().encodeToString(audioData);
|
||||||
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
// String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||||
webSocket.send(message);
|
// webSocket.send(message);
|
||||||
}
|
// }
|
||||||
|
File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio.pcm"); // 输出PCM格式文件
|
||||||
|
ByteBuffer byteBuffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile));
|
||||||
|
byte[] bytes = new byte[byteBuffer.remaining()];
|
||||||
|
//从缓冲区中读取数据并存储到指定的字节数组中
|
||||||
|
byteBuffer.get(bytes);
|
||||||
|
String base64Audio = Base64.getEncoder().encodeToString(bytes);
|
||||||
|
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||||
|
webSocket.send(message);
|
||||||
// 3. 提交音频并请求转录
|
// 3. 提交音频并请求转录
|
||||||
// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
|
// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
|
||||||
// webSocket.send("{\"type\": \"response.create\"}");
|
// webSocket.send("{\"type\": \"response.create\"}");
|
||||||
|
|||||||
Reference in New Issue
Block a user