STT 数据流处理
This commit is contained in:
@@ -0,0 +1,54 @@
|
||||
package com.vetti.common.ai.whisper;
|
||||
|
||||
import javax.sound.sampled.*;
|
||||
import java.io.*;
|
||||
|
||||
public class AudioToPCM {
|
||||
public static void main(String[] args) {
|
||||
File inputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/buffer.wav"); // 输入音频文件
|
||||
File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio.pcm"); // 输出PCM格式文件
|
||||
|
||||
try {
|
||||
// 读取音频文件
|
||||
AudioInputStream inputAudioStream = AudioSystem.getAudioInputStream(inputFile);
|
||||
|
||||
// 获取音频文件的格式信息
|
||||
AudioFormat sourceFormat = inputAudioStream.getFormat();
|
||||
System.out.println("Input Audio Format: " + sourceFormat);
|
||||
|
||||
// 设置目标PCM格式 (可以是16-bit, 8kHz, Mono, Linear PCM)
|
||||
AudioFormat pcmFormat = new AudioFormat(
|
||||
AudioFormat.Encoding.PCM_SIGNED,
|
||||
sourceFormat.getSampleRate(),
|
||||
16, // 16-bit samples
|
||||
1, // 单声道
|
||||
2, // 每个样本2字节(16位)
|
||||
sourceFormat.getSampleRate(),
|
||||
false // 大端模式
|
||||
);
|
||||
|
||||
// 获取PCM格式的音频流
|
||||
AudioInputStream pcmAudioStream = AudioSystem.getAudioInputStream(pcmFormat, inputAudioStream);
|
||||
|
||||
// 创建输出文件流
|
||||
FileOutputStream fos = new FileOutputStream(outputFile);
|
||||
byte[] buffer = new byte[1024];
|
||||
int bytesRead;
|
||||
|
||||
// 将PCM音频数据写入输出文件
|
||||
while ((bytesRead = pcmAudioStream.read(buffer)) != -1) {
|
||||
fos.write(buffer, 0, bytesRead);
|
||||
}
|
||||
|
||||
// 关闭流
|
||||
pcmAudioStream.close();
|
||||
fos.close();
|
||||
|
||||
System.out.println("Audio has been converted to PCM format and saved at: " + outputFile.getAbsolutePath());
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,11 +2,14 @@ package com.vetti.common.ai.whisper;
|
||||
|
||||
import cn.hutool.json.JSONObject;
|
||||
import okhttp3.*;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
|
||||
import javax.sound.sampled.AudioFormat;
|
||||
import javax.sound.sampled.AudioSystem;
|
||||
import javax.sound.sampled.DataLine;
|
||||
import javax.sound.sampled.TargetDataLine;
|
||||
import java.io.File;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Base64;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
@@ -63,23 +66,30 @@ public class RealtimeTranscriptionMicrophone {
|
||||
new Thread(() -> {
|
||||
try {
|
||||
// 设置麦克风输入流
|
||||
AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
|
||||
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
|
||||
TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
|
||||
line.open(format);
|
||||
line.start();
|
||||
|
||||
byte[] buffer = new byte[BUFFER_SIZE];
|
||||
int bytesRead;
|
||||
while ((bytesRead = line.read(buffer, 0, buffer.length)) > 0) {
|
||||
// 将音频数据转换为 Base64 编码的字符串
|
||||
byte[] audioData = new byte[bytesRead];
|
||||
System.arraycopy(buffer, 0, audioData, 0, bytesRead);
|
||||
String base64Audio = Base64.getEncoder().encodeToString(audioData);
|
||||
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||
webSocket.send(message);
|
||||
}
|
||||
|
||||
// AudioFormat format = new AudioFormat(SAMPLE_RATE, BITS_PER_SAMPLE, 1, true, false);
|
||||
// DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
|
||||
// TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
|
||||
// line.open(format);
|
||||
// line.start();
|
||||
//
|
||||
// byte[] buffer = new byte[BUFFER_SIZE];
|
||||
// int bytesRead;
|
||||
// while ((bytesRead = line.read(buffer, 0, buffer.length)) > 0) {
|
||||
// // 将音频数据转换为 Base64 编码的字符串
|
||||
// byte[] audioData = new byte[bytesRead];
|
||||
// System.arraycopy(buffer, 0, audioData, 0, bytesRead);
|
||||
// String base64Audio = Base64.getEncoder().encodeToString(audioData);
|
||||
// String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||
// webSocket.send(message);
|
||||
// }
|
||||
File outputFile = new File("/Users/wangxiangshun/Desktop/临时文件/110/output_pcm_audio.pcm"); // 输出PCM格式文件
|
||||
ByteBuffer byteBuffer = ByteBuffer.wrap(FileUtils.readFileToByteArray(outputFile));
|
||||
byte[] bytes = new byte[byteBuffer.remaining()];
|
||||
//从缓冲区中读取数据并存储到指定的字节数组中
|
||||
byteBuffer.get(bytes);
|
||||
String base64Audio = Base64.getEncoder().encodeToString(bytes);
|
||||
String message = "{ \"type\": \"input_audio_buffer.append\", \"audio\": \"" + base64Audio + "\" }";
|
||||
webSocket.send(message);
|
||||
// 3. 提交音频并请求转录
|
||||
// webSocket.send("{\"type\": \"input_audio_buffer.commit\"}");
|
||||
// webSocket.send("{\"type\": \"response.create\"}");
|
||||
|
||||
Reference in New Issue
Block a user