TTS 返回语音优化

This commit is contained in:
2025-10-19 16:20:27 +08:00
parent 3f305d0b75
commit a75d2dd985

View File

@@ -146,7 +146,9 @@ public class ChatWebSocketHandler {
try { try {
File inputFile = new File(resultPathUrl); File inputFile = new File(resultPathUrl);
File outputFile = new File(resultPathUrl); File outputFile = new File(resultPathUrl);
trimSilence(inputFile, outputFile); // 设置去除尾部的秒数
float removeSeconds = 0.25f; // 去除最后5秒
trimEndByTime(inputFile, outputFile, removeSeconds);
//文件转换成文件流 //文件转换成文件流
ByteBuffer outByteBuffer = convertFileToByteBuffer(resultPathUrl); ByteBuffer outByteBuffer = convertFileToByteBuffer(resultPathUrl);
//发送文件流数据 //发送文件流数据
@@ -408,30 +410,37 @@ public class ChatWebSocketHandler {
} }
// 截取音频流,去除静音部分 // 裁剪音频文件,去除最后多少秒
public void trimSilence(File inputFile, File outputFile) { public void trimEndByTime(File inputFile, File outputFile, float removeSeconds) {
try { try {
// 获取音频流 // 获取音频输入
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(inputFile); AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(inputFile);
AudioFormat format = audioInputStream.getFormat(); AudioFormat format = audioInputStream.getFormat();
// 将音频流转为字节数组 // 获取音频文件的总帧数
byte[] audioBytes = audioInputStream.readAllBytes(); long totalFrames = audioInputStream.getFrameLength();
// 去除前后静音部分 // 计算音频文件的总时长(秒)
int start = findNonSilenceStart(audioBytes, format); float totalDuration = totalFrames / format.getSampleRate();
int end = findNonSilenceEnd(audioBytes, format);
// 截取音频流 // 计算新的结束位置(去除指定的时间后)
byte[] trimmedAudio = new byte[end - start]; long newEndFrame = (long) ((totalDuration - removeSeconds) * format.getSampleRate());
System.arraycopy(audioBytes, start, trimmedAudio, 0, trimmedAudio.length);
// 创建新的音频输入流 // 确保裁剪的结束帧在合理范围内
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(trimmedAudio); if (newEndFrame < 0) {
AudioInputStream trimmedAudioStream = new AudioInputStream(byteArrayInputStream, format, trimmedAudio.length / format.getFrameSize()); System.out.println("去除的时间超过了音频的总时长");
return;
}
// 保存裁剪后的音频文件 // 创建一个新的输入流,裁剪音频数据
AudioSystem.write(trimmedAudioStream, AudioFileFormat.Type.WAVE, outputFile); AudioInputStream trimmedStream = new AudioInputStream(
audioInputStream,
format,
newEndFrame
);
// 创建新的文件并保存裁剪后的音频
AudioSystem.write(trimmedStream, AudioFileFormat.Type.WAVE, outputFile);
System.out.println("裁剪后的音频已保存到: " + outputFile.getAbsolutePath()); System.out.println("裁剪后的音频已保存到: " + outputFile.getAbsolutePath());
audioInputStream.close(); audioInputStream.close();
@@ -440,27 +449,5 @@ public class ChatWebSocketHandler {
} }
} }
// 找到音频流中非静音部分的开始位置
private int findNonSilenceStart(byte[] audioBytes, AudioFormat format) {
int threshold = 10; // 假设小于这个值的幅度为静音
for (int i = 0; i < audioBytes.length; i++) {
if (Math.abs(audioBytes[i]) > threshold) {
return i;
}
}
return 0; // 如果音频全是静音,返回 0
}
// 找到音频流中非静音部分的结束位置
private int findNonSilenceEnd(byte[] audioBytes, AudioFormat format) {
int threshold = 10; // 假设小于这个值的幅度为静音
for (int i = audioBytes.length - 1; i >= 0; i--) {
if (Math.abs(audioBytes[i]) > threshold) {
return i + 1;
}
}
return audioBytes.length; // 如果音频全是静音,返回音频的最后位置
}
} }