diff --git a/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java b/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java index 1477227..094fd7b 100644 --- a/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java +++ b/vetti-admin/src/main/java/com/vetti/socket/ChatWebSocketHandler.java @@ -15,10 +15,7 @@ import org.apache.commons.io.FileUtils; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; -import javax.sound.sampled.AudioFormat; -import javax.sound.sampled.AudioSystem; -import javax.sound.sampled.DataLine; -import javax.sound.sampled.TargetDataLine; +import javax.sound.sampled.*; import javax.websocket.*; import javax.websocket.server.PathParam; import javax.websocket.server.ServerEndpoint; @@ -31,6 +28,9 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; +import javax.sound.sampled.*; +import java.io.*; +import java.nio.ByteBuffer; /** * 语音面试 web处理器 @@ -202,9 +202,14 @@ public class ChatWebSocketHandler { // webSocket.send("{\"type\": \"input_audio_buffer.start\"}"); log.info("3.1 开始发送数据音频流啦"); // 将音频数据转换为 Base64 编码的字符串 - String base64Audio = Base64.getEncoder().encodeToString(bytes); + //进行转换 + // 转换音频格式 + AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false); + byte[] outputAudioBytes = convertAudio(bytes, format); + String base64Audio = Base64.getEncoder().encodeToString(outputAudioBytes); String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }"; webSocket.send(message); + log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000); // 3. 提交音频并请求转录 // webSocket.send("{\"type\": \"input_audio_buffer.commit\"}"); // webSocket.send("{\"type\": \"response.create\"}"); @@ -212,7 +217,7 @@ public class ChatWebSocketHandler { }catch (Exception e){ e.printStackTrace(); } - log.info("4、开始接收数据流时间:{}",System.currentTimeMillis()/1000); + } // 连接关闭时调用 @@ -228,6 +233,28 @@ public class ChatWebSocketHandler { throwable.printStackTrace(); } + public static byte[] convertAudio(byte[] inputAudioBytes, AudioFormat targetFormat) throws Exception { + // 将 byte[] 转换为 AudioInputStream + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(inputAudioBytes); + AudioInputStream inputAudioStream = new AudioInputStream(byteArrayInputStream, targetFormat, inputAudioBytes.length); + + // 创建目标格式的 AudioInputStream + AudioInputStream outputAudioStream = AudioSystem.getAudioInputStream(targetFormat, inputAudioStream); + + // 获取输出音频的 byte[] + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + byte[] buffer = new byte[1024]; + int bytesRead; + + // 从 AudioInputStream 读取数据并写入 ByteArrayOutputStream + while ((bytesRead = outputAudioStream.read(buffer)) != -1) { + byteArrayOutputStream.write(buffer, 0, bytesRead); + } + + // 返回转换后的 byte[] + return byteArrayOutputStream.toByteArray(); + } + /** * 将字节数组保存为WebM文件 * @@ -297,7 +324,7 @@ public class ChatWebSocketHandler { private void createWhisperRealtimeSocket(String clientId){ try{ OkHttpClient client = new OkHttpClient(); - CountDownLatch latch = new CountDownLatch(1); +// CountDownLatch latch = new CountDownLatch(1); // 设置 WebSocket 请求 Request request = new Request.Builder() .url(API_URL) diff --git a/vetti-admin/src/main/resources/application-druid.yml b/vetti-admin/src/main/resources/application-druid.yml index 00422ab..9de00ac 100644 --- a/vetti-admin/src/main/resources/application-druid.yml +++ b/vetti-admin/src/main/resources/application-druid.yml @@ -145,7 +145,8 @@ verification: # 文本转语音 elevenLabs: baseUrl: https://api.elevenlabs.io/v1 - apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812 +# apiKey: sk_5240d8f56cb1eb5225fffcf903f62479884d1af5b3de6812 + apiKey: sk_88f5a560e1bbde0e5b8b6b6eb1812163a98bfb98554acbec modelId: eleven_turbo_v2_5 # 语音转文本 whisper: