百度AI开放平台,语音识别,语音合成以及短文本相似度

时间:2022-10-16 06:49:41

百度AI开放平台:https://ai.baidu.com/

语音合成

from aip import AipSpeech
APP_ID="" #'你的 App ID'
API_KEY="6bPrLnkguN5ltxvfxRYP96Hk" #'你的 Api Key'
SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" #'你的 Secret Key' client=AipSpeech(APP_ID,API_KEY,SECRET_KEY) result=client.synthesis("快乐的池塘里面有只小青蛙,呱呱呱儿 ","zh",1,{
"vol":5, #音量
"spd":4, #语速
"pit":7, #语调
"per":1 #音色(0,1,3,4)
})
print(result) if not isinstance(result,dict):
with open("audio.mp3","wb") as f:
f.write(result)

语音识别

from aip import AipSpeech
import os """ 你的 APPID AK SK """
APP_ID=""
API_KEY="6bPrLnkguN5ltxvfxRYP96Hk"
SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" client=AipSpeech(APP_ID,API_KEY,SECRET_KEY) # 读取文件
def get_file_content(filePath):
with open(filePath, 'rb') as fp:
return fp.read() # 识别本地文件
res=client.asr(get_file_content('wb.m4a'), 'pcm', 16000, {
'dev_pid': 1536,
}) print(res.get("result")[0])

学说话

from aip import AipSpeech
import os
import time """ 你的 APPID AK SK """ APP_ID=""
API_KEY="6bPrLnkguN5ltxvfxRYP96Hk"
SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" client=AipSpeech(APP_ID,API_KEY,SECRET_KEY) ############语音识别##########
#读取文件
def get_file_content(filePath):
os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read() # 识别本地文件
def audio2text(filename):
res=client.asr(get_file_content(filename), 'pcm', 16000, {
'dev_pid': 1536,
})
return res.get("result")[0] ############语音合成#########
def text2audio(getedtext):
filename = f"{time.time()}.mp3"
result = client.synthesis(getedtext, "zh", 1, {
"vol": 5,
"spd": 4,
"pit": 7,
"per": 0
}) if not isinstance(result, dict):
with open(filename, "wb") as f:
f.write(result)
return filename getedtext=audio2text("wb.m4a") res=text2audio(getedtext) os.system(res)

语音回答问题

from aip import AipSpeech
import os
import time """ 你的 APPID AK SK """ APP_ID=""
API_KEY="6bPrLnkguN5ltxvfxRYP96Hk"
SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" client=AipSpeech(APP_ID,API_KEY,SECRET_KEY) ############语音识别##########
#读取文件
def get_file_content(filePath):
with open(filePath, 'rb') as fp:
return fp.read() # 识别本地文件
def audio2text(filepath):
res = client.asr(get_file_content(filepath), 'pcm', 16000, {
'dev_pid': 1536,
})
return res.get("result")[0] filename = f"{time.time()}.mp3"
############语音合成#########
def text2audio(getedtext): result = client.synthesis(getedtext, "zh", 1, {
"vol": 5,
"spd": 4,
"pit": 7,
"per": 1
})
# print(result) if not isinstance(result, dict):
with open(filename, "wb") as f:
f.write(result)
getedtext=audio2text("wb.m4a") text2audio(getedtext) os.system(filename)

短文本相似度

from aip import AipSpeech,AipNlp
import os
import time """ 你的 APPID AK SK """ APP_ID=""
API_KEY="6bPrLnkguN5ltxvfxRYP96Hk"
SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" client=AipSpeech(APP_ID,API_KEY,SECRET_KEY)
nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY) ############语音识别##########
#读取文件
def get_file_content(filePath):
os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read() # 识别本地文件
def audio2text(filename):
res=client.asr(get_file_content(filename), 'pcm', 16000, {
'dev_pid': 1536,
})
return res.get("result")[0] ############语音合成#########
def text2audio(getedtext):
filename = f"{time.time()}.mp3"
result = client.synthesis(getedtext, "zh", 1, {
"vol": 5,
"spd": 4,
"pit": 7,
"per": 0
}) if not isinstance(result, dict):
with open(filename, "wb") as f:
f.write(result)
return filename ##########图灵################
def to_tuling(text):
import requests args = {
"reqType": 0,
"perception": {
"inputText": {
"text": text
}
},
"userInfo": {
"apiKey": "e963f65c4c7a466a80e5aaa3510da2fa",
"userId": ""
}
} url = "http://openapi.tuling123.com/openapi/api/v2" res = requests.post(url, json=args)
print(res)
text = res.json().get("results")[0].get("values").get("text") print("图灵答案",text)
return text ###########调用################ getedtext=audio2text("wb.m4a") if nlp.simnet("你叫什么名字",getedtext).get("score")>=0.68:
getedtext="我才不告诉你呢,你个糟老头子坏得很"
else:
getedtext=to_tuling(getedtext) res=text2audio(getedtext) os.system(res)

对话机器人玩具

应用结构:

百度AI开放平台,语音识别,语音合成以及短文本相似度

百度AI开放平台,语音识别,语音合成以及短文本相似度

在index.html中

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<audio controls autoplay id="player"></audio>
<p>
<button onclick="start_reco()" style="background-color: yellow">录制语音指令</button>
</p>
<p>
<button onclick="stop_reco_audio()" style="background-color: blue">发送语音指令</button>
</p>
</body>
<!--<script type="application/javascript" src="/static/Recorder.js"></script>-->
<script type="application/javascript" src="https://cdn.bootcss.com/recorderjs/0.1.0/recorder.js"></script>
<script type="text/javascript" src="/static/jQuery3.1.1.js"></script> <script type="text/javascript">
var reco = null;
var audio_context = new AudioContext();
navigator.getUserMedia = (navigator.getUserMedia ||
navigator.webkitGetUserMedia ||
navigator.mozGetUserMedia ||
navigator.msGetUserMedia); navigator.getUserMedia({audio: true}, create_stream, function (err) {
console.log(err)
}); function create_stream(user_media) {
var stream_input = audio_context.createMediaStreamSource(user_media);
reco = new Recorder(stream_input);
} function start_reco() {
reco.record();
} function stop_reco_audio() {
reco.stop();
send_audio();
reco.clear();
} function send_audio() {
reco.exportWAV(function (wav_file) {
var formdata = new FormData();
formdata.append("record", wav_file);
console.log(formdata);
$.ajax({
url: "http://192.168.43.158:9527/ai",
type: 'post',
processData: false,
contentType: false,
data: formdata,
dataType: 'json',
success: function (data) {
document.getElementById("player").src ="http://192.168.43.158:9527/get_audio/" + data.filename
}
}); })
} </script>
</html>

在app.py中

from flask import Flask,render_template,request,jsonify,send_file
from uuid import uuid4
import baidu_ai app = Flask(__name__) @app.route("/")
def index():
return render_template("index.html") @app.route("/ai",methods=["POST"])
def ai():
# 1.保存录音文件
print(111)
audio = request.files.get("record")
print('audio',audio)
filename = f"{uuid4()}.wav"
audio.save(filename)
#2.将录音文件转换为PCM发送给百度进行语音识别
q_text = baidu_ai.audio2text(filename)
print(8585)
print(q_text)
#3.将识别的问题交给图灵或自主处理获取答案
a_text = baidu_ai.to_tuling(q_text)
print(a_text)
#4.将答案发送给百度语音合成,合成音频文件
a_file = baidu_ai.text2audio(a_text)
print(a_file)
#5.将音频文件发送给前端播放 return jsonify({"filename":a_file}) @app.route("/get_audio/<filename>")
def get_audio(filename):
print(filename)
return send_file(filename) if __name__ == '__main__':
app.run("0.0.0.0",9527,debug=True)

在baidu_ai.py中

from aip import AipSpeech,AipNlp
import time,os """ 你的 APPID AK SK """
APP_ID=""
API_KEY="6bPrLnkguN5ltxvfxRYP96Hk"
SECRET_KEY="ckSFGccmaGr0b2EPGE3dueb1PkfW5IsW" # client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) nlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) # 读取文件
def get_file_content(filePath):
print(234)
os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")
print(354)
with open(f"{filePath}.pcm", 'rb') as fp:
return fp.read() def audio2text(filepath):
# 识别本地文件
print(123)
res = client.asr(get_file_content(filepath), 'pcm', 16000, {
'dev_pid': 1536,
})
print(456456)
print('----------res',res) return res.get("result")[0] def text2audio(text):
filename = f"{time.time()}.mp3"
result = client.synthesis(text, 'zh', 1, {
'vol': 5,
"spd": 3,
"pit": 7,
"per": 4
}) # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
if not isinstance(result, dict):
with open(filename, 'wb') as f:
f.write(result) return filename def to_tuling(text):
import requests args = {
"reqType": 0,
"perception": {
"inputText": {
"text": text
}
},
"userInfo": {
"apiKey": "e963f65c4c7a466a80e5aaa3510da2fa",
"userId": ""
}
} url = "http://openapi.tuling123.com/openapi/api/v2" res = requests.post(url, json=args)
print(res)
text = res.json().get("results")[0].get("values").get("text") print("图灵答案",text)
return text