STT 数据流处理-试一下合并后的格式

2025-10-20 11:34:58 +08:00
parent 36cceafac5
commit 1d3394cb2d
2 changed files with 70 additions and 61 deletions
--- a/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java
+++ b/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java
@@ -120,7 +120,7 @@ public class ChatWebSocketHandler {
                    // 合并所有分片为完整语音数据
                    byte[] fullVoiceData = mergeFragments(fragments);
                            // 生成唯一文件名
-                    String fileName = clientId + "_" + System.currentTimeMillis() + ".webm";
+                    String fileName = clientId + "_" + System.currentTimeMillis() + ".wav";
                    String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName;
                    log.info("文件路径为:{}", pathUrl);
                    log.info("文件流的大小为:{}",fullVoiceData.length);
@@ -199,55 +199,6 @@ public class ChatWebSocketHandler {
    }

    // 接收二进制消息（流数据）
-    @OnMessage
-    public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
-        log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
-        log.info("客户端ID为:{}", clientId);
-        // 处理二进制流数据
-        byte[] bytes = new byte[byteBuffer.remaining()];
-        //从缓冲区中读取数据并存储到指定的字节数组中
-        byteBuffer.get(bytes);
-        log.info("2、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
-        // 生成唯一文件名
-        String fileName = clientId + "_" + System.currentTimeMillis() + ".wav";
-        String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName;
-        log.info("文件路径为:{}", pathUrl);
-        log.info("3、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
-        try{
-            log.info("文件流的大小为:{}",bytes.length);
-            saveAsWebM(bytes,pathUrl);
-            //接收到数据流后直接就进行SST处理
-            //语音格式转换
-            String fileOutName = clientId + "_" + System.currentTimeMillis() + ".pcm";
-            String pathOutUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileOutName;
-            handleAudioToPCM(pathUrl,pathOutUrl);
-            //发送消息
-            WebSocket webSocket = cacheWebSocket.get(clientId);
-            log.info("获取的socket对象为:{}",webSocket);
-            if(webSocket != null){
-//                 1. 启动音频缓冲
-//                webSocket.send("{\"type\": \"input_audio_buffer.start\"}");
-                log.info("3.1 开始发送数据音频流啦");
-                File outputFile = new File(pathOutUrl);  // 输出PCM格式文件
-                ByteBuffer buffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile));
-                byte[] outBytes = new byte[buffer.remaining()];
-                //从缓冲区中读取数据并存储到指定的字节数组中
-                buffer.get(outBytes);
-                String base64Audio = Base64.getEncoder().encodeToString(outBytes);
-                String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
-                webSocket.send(message);
-                log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
-                // 3. 提交音频并请求转录
-//                webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
-//                webSocket.send("{\"type\": \"response.create\"}");
-            }
-        }catch (Exception e){
-            e.printStackTrace();
-        }
-
-    }
-
-//    // 接收二进制消息（流数据）
 //    @OnMessage
 //    public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
 //        log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
@@ -256,17 +207,66 @@ public class ChatWebSocketHandler {
 //        byte[] bytes = new byte[byteBuffer.remaining()];
 //        //从缓冲区中读取数据并存储到指定的字节数组中
 //        byteBuffer.get(bytes);
-//
-//        // 1. 获取当前会话的缓存
-//        List<byte[]> fragments = fragmentCache.get(clientId);
-//        if (fragments == null) {
-//            fragments = new ArrayList<>();
-//            fragmentCache.put(clientId, fragments);
+//        log.info("2、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
+//        // 生成唯一文件名
+//        String fileName = clientId + "_" + System.currentTimeMillis() + ".wav";
+//        String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName;
+//        log.info("文件路径为:{}", pathUrl);
+//        log.info("3、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
+//        try{
+//            log.info("文件流的大小为:{}",bytes.length);
+//            saveAsWebM(bytes,pathUrl);
+//            //接收到数据流后直接就进行SST处理
+//            //语音格式转换
+//            String fileOutName = clientId + "_" + System.currentTimeMillis() + ".pcm";
+//            String pathOutUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileOutName;
+//            handleAudioToPCM(pathUrl,pathOutUrl);
+//            //发送消息
+//            WebSocket webSocket = cacheWebSocket.get(clientId);
+//            log.info("获取的socket对象为:{}",webSocket);
+//            if(webSocket != null){
+////                 1. 启动音频缓冲
+////                webSocket.send("{\"type\": \"input_audio_buffer.start\"}");
+//                log.info("3.1 开始发送数据音频流啦");
+//                File outputFile = new File(pathOutUrl);  // 输出PCM格式文件
+//                ByteBuffer buffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile));
+//                byte[] outBytes = new byte[buffer.remaining()];
+//                //从缓冲区中读取数据并存储到指定的字节数组中
+//                buffer.get(outBytes);
+//                String base64Audio = Base64.getEncoder().encodeToString(outBytes);
+//                String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
+//                webSocket.send(message);
+//                log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
+//                // 3. 提交音频并请求转录
+////                webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
+////                webSocket.send("{\"type\": \"response.create\"}");
+//            }
+//        }catch (Exception e){
+//            e.printStackTrace();
 //        }
-//        fragments.add(bytes);
-//        fragmentCache.put(clientId, fragments);
+//
 //    }

+    // 接收二进制消息（流数据）
+    @OnMessage
+    public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) {
+        log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000);
+        log.info("客户端ID为:{}", clientId);
+        // 处理二进制流数据
+        byte[] bytes = new byte[byteBuffer.remaining()];
+        //从缓冲区中读取数据并存储到指定的字节数组中
+        byteBuffer.get(bytes);
+
+        // 1. 获取当前会话的缓存
+        List<byte[]> fragments = fragmentCache.get(clientId);
+        if (fragments == null) {
+            fragments = new ArrayList<>();
+            fragmentCache.put(clientId, fragments);
+        }
+        fragments.add(bytes);
+        fragmentCache.put(clientId, fragments);
+    }
+
    // 连接关闭时调用
    @OnClose
    public void onClose(Session session, CloseReason reason) {
--- a/vetti-common/src/main/java/com/vetti/common/ai/whisper/AudioToPCM.java
+++ b/vetti-common/src/main/java/com/vetti/common/ai/whisper/AudioToPCM.java
@@ -5,9 +5,18 @@ import java.io.*;

 public class AudioToPCM {
    public static void main(String[] args) {
-        File inputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/buffer.wav");  // 输入音频文件
-        File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio.pcm");  // 输出PCM格式文件
-
+        File inputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/111123456_1760930173334.wav");  // 输入音频文件
+        File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio1.pcm");  // 输出PCM格式文件
+        try {
+            AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(inputFile);
+            // 进行后续操作，如播放等
+        } catch (UnsupportedAudioFileException e) {
+            System.out.println("Unsupported audio file format: " + e.getMessage());
+        } catch (IOException e) {
+            System.out.println("Error reading the file: " + e.getMessage());
+        } catch (Exception e) {
+            System.out.println("An error occurred: " + e.getMessage());
+        }
        try {
            // 读取音频文件
            AudioInputStream inputAudioStream = AudioSystem.getAudioInputStream(inputFile);