最近有个c#.net程序需要用到文字转语音功能,但是是需要离线模式下使用,经过再三的斟酌;还是选用了科大讯飞的离线语音合成库。既然研究了百度语音合成调用那就放上来给大家做个参考,目前只尝试了在线语音合成离线语音合成暂时没弄;改天有机会再弄弄吧,科大讯飞的语音合成请看这篇文章 C# 使用科大讯飞实现离线语音合成、在线语音合成 - 实用工具_软件教程_.net_c#-有码挺好个人博客 (cisharp.com)。
首先Nuget中安装 Baidu.AI组件库,如下图。
using Baidu.Aip.Speech;
using DevComponents.DotNetBar;
using NAudio.Wave;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Windows.Forms;
namespace Text2Voice.VOICESDK
{
class BaiduSDK
{
private readonly Asr _asrClient;
private readonly Tts _ttsClient;
public BaiduSDK()
{
_asrClient = new Asr("appId", "apiKey", "secretKey");
_ttsClient = new Tts("apiKey", "secretKey");
}
// 识别本地文件
public void AsrData()
{
var data = File.ReadAllBytes("语音pcm文件地址");
var result = _asrClient.Recognize(data, "pcm", 16000);
Console.Write(result);
}
// 识别URL中的语音文件
public void AsrUrl()
{
var result = _asrClient.Recognize(
"http://xxx.com/待识别的pcm文件地址",
"http://xxx.com/识别结果回调地址",
"pcm",
16000);
Console.WriteLine(result);
}
// 合成
public void Tts(string content, ref byte[] bte, bool isSave = false, int spd = 4, int vol = 5, int per = 4)
{
try
{
// 可选参数
var option = new Dictionary<string, object>()
{
{"spd", spd}, // 语速
{"vol", vol}, // 音量
{"per", per} // 发音人,4:情感度丫丫童声
};
var result = _ttsClient.Synthesis(content, option);
if (result.ErrorCode == 0) // 或 result.Success
{
WaveOut waveOut = new WaveOut();
MemoryStream mStream = new MemoryStream(result.Data);
Mp3FileReader rdr = new Mp3FileReader(mStream);
bte = result.Data;
var wavStream = WaveFormatConversionStream.CreatePcmStream(rdr);
var baStream = new BlockAlignReductionStream(wavStream);
waveOut = new WaveOut(WaveCallbackInfo.FunctionCallback());
waveOut.Init(baStream);
waveOut.Stop();
waveOut.Play();
}
}
catch (Exception ex)
{
MessageBoxEx.Show("语音引擎初始化失败!", "错误", MessageBoxButtons.OK, MessageBoxIcon.Warning);
throw ex;
}
}
}
public enum BaiduSpeaker
{
青年女生 = 0,
青年男生 = 1,
度逍遥 = 3,
度丫丫 = 4
}
}
调用示例
BaiduSDK sdk = new BaiduSDK();
//语速设置
int spd = cboSpeed.SelectedIndex;
//声音设置
int vol = cboVol.SelectedIndex;
//发音人设置
var per = Enum.Parse(typeof(BaiduSpeaker), cboPerson.Text);
sdk.Tts(txtContent.Text, ref bte, isSave, spd, vol, (int)per);