更新语音转文本模型

This commit is contained in:
2025-10-14 22:26:36 +08:00
parent 0f19d0940b
commit f4371f332f
4 changed files with 12 additions and 4 deletions

View File

@@ -62,7 +62,7 @@ public class AiCommonController extends BaseController
@GetMapping("/handleViceToText") @GetMapping("/handleViceToText")
public AjaxResult handleViceToText() public AjaxResult handleViceToText()
{ {
whisperClient.handleVoiceToText("/Users/wangxiangshun/Desktop/临时文件/output.mp3"); whisperClient.handleVoiceToText("F:\\/output.mp3");
return success(); return success();
} }

View File

@@ -150,7 +150,7 @@ elevenLabs:
# 语音转文本 # 语音转文本
whisper: whisper:
apiUrl: https://api.openai.com/v1/audio/transcriptions apiUrl: https://api.openai.com/v1/audio/transcriptions
model: whisper-1 model: gpt-4o-mini-transcribe
apiKey: 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 apiKey: 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
language: en language: en

View File

@@ -150,7 +150,7 @@ elevenLabs:
# 语音转文本 # 语音转文本
whisper: whisper:
apiUrl: https://api.openai.com/v1/audio/transcriptions apiUrl: https://api.openai.com/v1/audio/transcriptions
model: whisper-1 model: gpt-4o-mini-transcribe
apiKey: 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 apiKey: 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
language: en language: en
@@ -158,7 +158,7 @@ whisper:
chatGpt: chatGpt:
apiKey: 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 apiKey: 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
apiUrl: https://api.openai.com/v1/chat/completions apiUrl: https://api.openai.com/v1/chat/completions
model: gpt-4 model: gpt-3.5-turbo
role: user role: user

View File

@@ -1,6 +1,8 @@
package com.vetti.common.ai.whisper; package com.vetti.common.ai.whisper;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONObject; import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import okhttp3.*; import okhttp3.*;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
@@ -80,6 +82,7 @@ public class WhisperClient {
* @return * @return
*/ */
public String handleVoiceToText(String audioFileUrl) { public String handleVoiceToText(String audioFileUrl) {
System.out.println("1-处理记录时间:"+System.currentTimeMillis()/1000);
String resultText = ""; String resultText = "";
OkHttpClient client = new OkHttpClient(); OkHttpClient client = new OkHttpClient();
// 音频文件路径 // 音频文件路径
@@ -89,10 +92,15 @@ public class WhisperClient {
Map<String, String> options = new HashMap<>(); Map<String, String> options = new HashMap<>();
options.put("language", language); options.put("language", language);
resultText = transcribe(audioFile, options, client); resultText = transcribe(audioFile, options, client);
if(StrUtil.isNotEmpty(resultText)){
Map<String, String> map = JSONUtil.toBean(resultText, Map.class);
resultText = map.get("text");
}
System.out.println("转写结果: " + resultText); System.out.println("转写结果: " + resultText);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
System.out.println("2-处理记录时间:"+System.currentTimeMillis()/1000);
return resultText; return resultText;
} }
} }