STT 数据流处理

This commit is contained in:
2025-10-20 11:12:29 +08:00
parent 0d0c6c32f0
commit 36cceafac5
3 changed files with 183 additions and 96 deletions

View File

@@ -0,0 +1,54 @@
package com.vetti.common.ai.whisper;
import javax.sound.sampled.*;
import java.io.*;
public class AudioToPCM {
public static void main(String[] args) {
File inputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/buffer.wav"); // 输入音频文件
File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio.pcm"); // 输出PCM格式文件
try {
// 读取音频文件
AudioInputStream inputAudioStream = AudioSystem.getAudioInputStream(inputFile);
// 获取音频文件的格式信息
AudioFormat sourceFormat = inputAudioStream.getFormat();
System.out.println("Input Audio Format: " + sourceFormat);
// 设置目标PCM格式 (可以是16-bit, 8kHz, Mono, Linear PCM)
AudioFormat pcmFormat = new AudioFormat(
AudioFormat.Encoding.PCM_SIGNED,
sourceFormat.getSampleRate(),
16, // 16-bit samples
1, // 单声道
2, // 每个样本2字节16位
sourceFormat.getSampleRate(),
false // 大端模式
);
// 获取PCM格式的音频流
AudioInputStream pcmAudioStream = AudioSystem.getAudioInputStream(pcmFormat, inputAudioStream);
// 创建输出文件流
FileOutputStream fos = new FileOutputStream(outputFile);
byte[] buffer = new byte[1024];
int bytesRead;
// 将PCM音频数据写入输出文件
while ((bytesRead = pcmAudioStream.read(buffer)) != -1) {
fos.write(buffer, 0, bytesRead);
}
// 关闭流
pcmAudioStream.close();
fos.close();
System.out.println("Audio has been converted to PCM format and saved at: " + outputFile.getAbsolutePath());
} catch (Exception e) {
e.printStackTrace();
}
}
}

View File

@@ -2,11 +2,14 @@ package com.vetti.common.ai.whisper;
import cn.hutool.json.JSONObject;
import okhttp3.*;
import org.apache.commons.io.FileUtils;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.TargetDataLine;
import java.io.File;
import java.nio.ByteBuffer;
import java.util.Base64;
import java.util.concurrent.CountDownLatch;
@@ -63,23 +66,30 @@ public class RealtimeTranscriptionMicrophone {
new Thread(() -> {
try {
// 设置麦克风输入流
AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
line.open(format);
line.start();
byte[] buffer = new byte[BUFFER_SIZE];
int bytesRead;
while ((bytesRead = line.read(buffer, 0, buffer.length)) > 0) {
// 将音频数据转换为 Base64 编码的字符串
byte[] audioData = new byte[bytesRead];
System.arraycopy(buffer, 0, audioData, 0, bytesRead);
String base64Audio = Base64.getEncoder().encodeToString(audioData);
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
webSocket.send(message);
}
// AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
// DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
// TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
// line.open(format);
// line.start();
//
// byte[] buffer = new byte[BUFFER_SIZE];
// int bytesRead;
// while ((bytesRead = line.read(buffer, 0, buffer.length)) > 0) {
// // 将音频数据转换为 Base64 编码的字符串
// byte[] audioData = new byte[bytesRead];
// System.arraycopy(buffer, 0, audioData, 0, bytesRead);
// String base64Audio = Base64.getEncoder().encodeToString(audioData);
// String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
// webSocket.send(message);
// }
File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio.pcm"); // 输出PCM格式文件
ByteBuffer byteBuffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile));
byte[] bytes = new byte[byteBuffer.remaining()];
//从缓冲区中读取数据并存储到指定的字节数组中
byteBuffer.get(bytes);
String base64Audio = Base64.getEncoder().encodeToString(bytes);
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
webSocket.send(message);
// 3. 提交音频并请求转录
// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
// webSocket.send("{\"type\": \"response.create\"}");