diff --git a/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java b/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java index 1aed10a..4ecbd43 100644 --- a/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java +++ b/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java @@ -14,6 +14,7 @@ import okhttp3.*; import org.apache.commons.io.FileUtils; import org.springframework.stereotype.Component; +import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioInputStream; import javax.sound.sampled.AudioSystem; import javax.websocket.*; @@ -21,10 +22,7 @@ import javax.websocket.server.PathParam; import javax.websocket.server.ServerEndpoint; import java.io.*; import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; /** @@ -200,51 +198,6 @@ public class ChatWebSocketHandler { } } -// // 接收二进制消息(流数据) -// @OnMessage -// public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) { -// log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000); -// log.info("客户端ID为:{}", clientId); -// // 处理二进制流数据 -// byte[] bytes = new byte[byteBuffer.remaining()]; -// //从缓冲区中读取数据并存储到指定的字节数组中 -// byteBuffer.get(bytes); -// log.info("2、开始接收数据流时间:{}",System.currentTimeMillis()/1000); -// // 生成唯一文件名 -// String fileName = clientId + "_" + System.currentTimeMillis() + ".wav"; -// String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName; -// log.info("文件路径为:{}", pathUrl); -// log.info("3、开始接收数据流时间:{}",System.currentTimeMillis()/1000); -// try{ -// log.info("文件流的大小为:{}",bytes.length); -// saveAsWebM(bytes,pathUrl); -// //接收到数据流后直接就进行SST处理 -// //发送消息 -// WebSocket webSocket = cacheWebSocket.get(clientId); -// log.info("获取的socket对象为:{}",webSocket); -// if(webSocket != null){ -//// 1. 启动音频缓冲 -//// webSocket.send("{\"type\": \"input_audio_buffer.start\"}"); -// log.info("3.1 开始发送数据音频流啦"); -// // 将音频数据转换为 Base64 编码的字符串 -// //进行转换 -// // 转换音频格式 -// AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false); -// byte[] outputAudioBytes = convertAudio(bytes, format); -// String base64Audio = Base64.getEncoder().encodeToString(outputAudioBytes); -// String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }"; -// webSocket.send(message); -// log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000); -// // 3. 提交音频并请求转录 -//// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}"); -//// webSocket.send("{\"type\": \"response.create\"}"); -// } -// }catch (Exception e){ -// e.printStackTrace(); -// } -// -// } - // 接收二进制消息(流数据) @OnMessage public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) { @@ -254,17 +207,66 @@ public class ChatWebSocketHandler { byte[] bytes = new byte[byteBuffer.remaining()]; //从缓冲区中读取数据并存储到指定的字节数组中 byteBuffer.get(bytes); - - // 1. 获取当前会话的缓存 - List fragments = fragmentCache.get(clientId); - if (fragments == null) { - fragments = new ArrayList<>(); - fragmentCache.put(clientId, fragments); + log.info("2、开始接收数据流时间:{}",System.currentTimeMillis()/1000); + // 生成唯一文件名 + String fileName = clientId + "_" + System.currentTimeMillis() + ".wav"; + String pathUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileName; + log.info("文件路径为:{}", pathUrl); + log.info("3、开始接收数据流时间:{}",System.currentTimeMillis()/1000); + try{ + log.info("文件流的大小为:{}",bytes.length); + saveAsWebM(bytes,pathUrl); + //接收到数据流后直接就进行SST处理 + //语音格式转换 + String fileOutName = clientId + "_" + System.currentTimeMillis() + ".pcm"; + String pathOutUrl = RuoYiConfig.getProfile()+VOICE_STORAGE_DIR + fileOutName; + handleAudioToPCM(pathUrl,pathOutUrl); + //发送消息 + WebSocket webSocket = cacheWebSocket.get(clientId); + log.info("获取的socket对象为:{}",webSocket); + if(webSocket != null){ +// 1. 启动音频缓冲 +// webSocket.send("{\"type\": \"input_audio_buffer.start\"}"); + log.info("3.1 开始发送数据音频流啦"); + File outputFile = new File(pathOutUrl); // 输出PCM格式文件 + ByteBuffer buffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile)); + byte[] outBytes = new byte[buffer.remaining()]; + //从缓冲区中读取数据并存储到指定的字节数组中 + buffer.get(outBytes); + String base64Audio = Base64.getEncoder().encodeToString(outBytes); + String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }"; + webSocket.send(message); + log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000); + // 3. 提交音频并请求转录 +// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}"); +// webSocket.send("{\"type\": \"response.create\"}"); + } + }catch (Exception e){ + e.printStackTrace(); } - fragments.add(bytes); - fragmentCache.put(clientId, fragments); + } +// // 接收二进制消息(流数据) +// @OnMessage +// public void onBinaryMessage(Session session, @PathParam("clientId") String clientId, ByteBuffer byteBuffer) { +// log.info("1、开始接收数据流时间:{}",System.currentTimeMillis()/1000); +// log.info("客户端ID为:{}", clientId); +// // 处理二进制流数据 +// byte[] bytes = new byte[byteBuffer.remaining()]; +// //从缓冲区中读取数据并存储到指定的字节数组中 +// byteBuffer.get(bytes); +// +// // 1. 获取当前会话的缓存 +// List fragments = fragmentCache.get(clientId); +// if (fragments == null) { +// fragments = new ArrayList<>(); +// fragmentCache.put(clientId, fragments); +// } +// fragments.add(bytes); +// fragmentCache.put(clientId, fragments); +// } + // 连接关闭时调用 @OnClose public void onClose(Session session, CloseReason reason) { @@ -278,28 +280,6 @@ public class ChatWebSocketHandler { throwable.printStackTrace(); } -// public static byte[] convertAudio(byte[] inputAudioBytes, AudioFormat targetFormat) throws Exception { -// // 将 byte[] 转换为 AudioInputStream -// ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(inputAudioBytes); -// AudioInputStream inputAudioStream = new AudioInputStream(byteArrayInputStream, targetFormat, inputAudioBytes.length); -// -// // 创建目标格式的 AudioInputStream -// AudioInputStream outputAudioStream = AudioSystem.getAudioInputStream(targetFormat, inputAudioStream); -// -// // 获取输出音频的 byte[] -// ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); -// byte[] buffer = new byte[1024]; -// int bytesRead; -// -// // 从 AudioInputStream 读取数据并写入 ByteArrayOutputStream -// while ((bytesRead = outputAudioStream.read(buffer)) != -1) { -// byteArrayOutputStream.write(buffer, 0, bytesRead); -// } -// -// // 返回转换后的 byte[] -// return byteArrayOutputStream.toByteArray(); -// } - /** * 将字节数组保存为WebM文件 * @@ -466,6 +446,49 @@ public class ChatWebSocketHandler { return result; } + /** + * 语音流文件格式转换 + * @param pathUrl + * @param outPathUrl + */ + private void handleAudioToPCM(String pathUrl,String outPathUrl){ + File inputFile = new File(pathUrl); // 输入音频文件 + File outputFile = new File(outPathUrl); // 输出PCM格式文件 + try { + // 读取音频文件 + AudioInputStream inputAudioStream = AudioSystem.getAudioInputStream(inputFile); + // 获取音频文件的格式信息 + AudioFormat sourceFormat = inputAudioStream.getFormat(); + System.out.println("Input Audio Format: " + sourceFormat); + // 设置目标PCM格式 (可以是16-bit, 8kHz, Mono, Linear PCM) + AudioFormat pcmFormat = new AudioFormat( + AudioFormat.Encoding.PCM_SIGNED, + sourceFormat.getSampleRate(), + 16, // 16-bit samples + 1, // 单声道 + 2, // 每个样本2字节(16位) + sourceFormat.getSampleRate(), + false // 大端模式 + ); + // 获取PCM格式的音频流 + AudioInputStream pcmAudioStream = AudioSystem.getAudioInputStream(pcmFormat, inputAudioStream); + // 创建输出文件流 + FileOutputStream fos = new FileOutputStream(outputFile); + byte[] buffer = new byte[1024]; + int bytesRead; + // 将PCM音频数据写入输出文件 + while ((bytesRead = pcmAudioStream.read(buffer)) != -1) { + fos.write(buffer, 0, bytesRead); + } + // 关闭流 + pcmAudioStream.close(); + fos.close(); + System.out.println("Audio has been converted to PCM format and saved at: " + outputFile.getAbsolutePath()); + } catch (Exception e) { + e.printStackTrace(); + } + } + } diff --git a/vetti-common/src/main/java/com/vetti/common/ai/whisper/AudioToPCM.java b/vetti-common/src/main/java/com/vetti/common/ai/whisper/AudioToPCM.java new file mode 100644 index 0000000..45b13b7 --- /dev/null +++ b/vetti-common/src/main/java/com/vetti/common/ai/whisper/AudioToPCM.java @@ -0,0 +1,54 @@ +package com.vetti.common.ai.whisper; + +import javax.sound.sampled.*; +import java.io.*; + +public class AudioToPCM { + public static void main(String[] args) { + File inputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/buffer.wav"); // 输入音频文件 + File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio.pcm"); // 输出PCM格式文件 + + try { + // 读取音频文件 + AudioInputStream inputAudioStream = AudioSystem.getAudioInputStream(inputFile); + + // 获取音频文件的格式信息 + AudioFormat sourceFormat = inputAudioStream.getFormat(); + System.out.println("Input Audio Format: " + sourceFormat); + + // 设置目标PCM格式 (可以是16-bit, 8kHz, Mono, Linear PCM) + AudioFormat pcmFormat = new AudioFormat( + AudioFormat.Encoding.PCM_SIGNED, + sourceFormat.getSampleRate(), + 16, // 16-bit samples + 1, // 单声道 + 2, // 每个样本2字节(16位) + sourceFormat.getSampleRate(), + false // 大端模式 + ); + + // 获取PCM格式的音频流 + AudioInputStream pcmAudioStream = AudioSystem.getAudioInputStream(pcmFormat, inputAudioStream); + + // 创建输出文件流 + FileOutputStream fos = new FileOutputStream(outputFile); + byte[] buffer = new byte[1024]; + int bytesRead; + + // 将PCM音频数据写入输出文件 + while ((bytesRead = pcmAudioStream.read(buffer)) != -1) { + fos.write(buffer, 0, bytesRead); + } + + // 关闭流 + pcmAudioStream.close(); + fos.close(); + + System.out.println("Audio has been converted to PCM format and saved at: " + outputFile.getAbsolutePath()); + + } catch (Exception e) { + e.printStackTrace(); + } + } +} + diff --git a/vetti-common/src/main/java/com/vetti/common/ai/whisper/RealtimeTranscriptionMicrophone.java b/vetti-common/src/main/java/com/vetti/common/ai/whisper/RealtimeTranscriptionMicrophone.java index 3c96031..fe7083e 100644 --- a/vetti-common/src/main/java/com/vetti/common/ai/whisper/RealtimeTranscriptionMicrophone.java +++ b/vetti-common/src/main/java/com/vetti/common/ai/whisper/RealtimeTranscriptionMicrophone.java @@ -2,11 +2,14 @@ package com.vetti.common.ai.whisper; import cn.hutool.json.JSONObject; import okhttp3.*; +import org.apache.commons.io.FileUtils; import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioSystem; import javax.sound.sampled.DataLine; import javax.sound.sampled.TargetDataLine; +import java.io.File; +import java.nio.ByteBuffer; import java.util.Base64; import java.util.concurrent.CountDownLatch; @@ -63,23 +66,30 @@ public class RealtimeTranscriptionMicrophone { new Thread(() -> { try { // 设置麦克风输入流 - AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false); - DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); - TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info); - line.open(format); - line.start(); - - byte[] buffer = new byte[BUFFER_SIZE]; - int bytesRead; - while ((bytesRead = line.read(buffer, 0, buffer.length)) > 0) { - // 将音频数据转换为 Base64 编码的字符串 - byte[] audioData = new byte[bytesRead]; - System.arraycopy(buffer, 0, audioData, 0, bytesRead); - String base64Audio = Base64.getEncoder().encodeToString(audioData); - String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }"; - webSocket.send(message); - } - +// AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false); +// DataLine.Info info = new DataLine.Info(TargetDataLine.class, format); +// TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info); +// line.open(format); +// line.start(); +// +// byte[] buffer = new byte[BUFFER_SIZE]; +// int bytesRead; +// while ((bytesRead = line.read(buffer, 0, buffer.length)) > 0) { +// // 将音频数据转换为 Base64 编码的字符串 +// byte[] audioData = new byte[bytesRead]; +// System.arraycopy(buffer, 0, audioData, 0, bytesRead); +// String base64Audio = Base64.getEncoder().encodeToString(audioData); +// String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }"; +// webSocket.send(message); +// } + File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio.pcm"); // 输出PCM格式文件 + ByteBuffer byteBuffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile)); + byte[] bytes = new byte[byteBuffer.remaining()]; + //从缓冲区中读取数据并存储到指定的字节数组中 + byteBuffer.get(bytes); + String base64Audio = Base64.getEncoder().encodeToString(bytes); + String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }"; + webSocket.send(message); // 3. 提交音频并请求转录 // webSocket.send("{\"type\": \"input_audio_buffer.commit\"}"); // webSocket.send("{\"type\": \"response.create\"}");