iOS：百度长语音识别具体的封装：识别、播放、进度刷新

一、介绍

以前做过讯飞语音识别，比较简单，识别率很不错，但是它的识别时间是有限制的，最多60秒。可是有的时候我们需要更长的识别时间，例如朗诵古诗等功能。当然讯飞语音也是可以通过曲线救国来实现，就是每达到60秒时识别停止就立即重新开启，每次结束拼接录音。这么做，显然是麻烦的，百度语音解决了这个问题，它最近上线了长语音识别技术，可以不受时间限制，还是非常棒的。这次，我就专门抽成一个工具类使用，包括语音识别，录音拼接，录音播放、进度刷新，具体如何集成SDK看官方文档，我就不废话了，直接介绍如何使用我的这个工具类。

二、依赖

因为语音录制格式是pcm格式的，我使用lame静态库将其转成了mp3格式。

下载路径参考我的博客，有具体的介绍：http://www.cnblogs.com/XYQ-208910/p/7650759.html

三、代码

SJVoiceTransform.h

#import <Foundation/Foundation.h>

@interface SJVoiceTransform : NSObject

/**

 *  根据路径将pcm文件转化为MP3

 *

 *  @param docPath docment路径

 */

+(NSString *)stransformToMp3ByUrlWithUrl:(NSString *)docPath;

@end

SJVoiceTransform.m

#import "SJVoiceTransform.h"

#import "lame.h"

@interface SJVoiceTransform()

//@property (strong , nonatomic)NSString * path;//存放音频沙河路径

@end

@implementation SJVoiceTransform

+(NSString *)stransformToMp3ByUrlWithUrl:(NSString *)docPath

{

    NSString *pathUrl = [NSString stringWithFormat:@"%@",docPath];//存储录音pcm格式音频地址

    NSString * mp3Url = pathUrl;

    NSString *mp3FilePath = [docPath stringByAppendingString:@".mp3"];//存放Mp3地址

    if (!mp3Url || !mp3FilePath) {

        return ;

    }

    @try {

        unsigned long read, write;

        FILE *pcm = fopen([mp3Url cStringUsingEncoding:], "rb"); //source 被转换的音频文件位置

        //音频不能为空

        if (!pcm) {

            return nil;

        }

        fseek(pcm, *, SEEK_CUR); //skip file header

        FILE *mp3 = fopen([mp3FilePath cStringUsingEncoding:], "wb"); //output 输出生成的Mp3文件位置

        const int PCM_SIZE = ;

        const int MP3_SIZE = ;

        short int pcm_buffer[PCM_SIZE*];

        unsigned char mp3_buffer[MP3_SIZE];

        lame_t lame = lame_init();

        lame_set_num_channels(lame,);

        lame_set_in_samplerate(lame, 8000.0); //11025.0

        //lame_set_VBR(lame, vbr_default);

        lame_set_brate(lame, );

        lame_set_mode(lame, );

        lame_set_quality(lame, );//

        lame_init_params(lame);

        do {

            read = fread(pcm_buffer, *sizeof(short int), PCM_SIZE, pcm);

            if (read == )

                write = lame_encode_flush(lame, mp3_buffer, MP3_SIZE);

            else

                write = lame_encode_buffer_interleaved(lame, pcm_buffer, read, mp3_buffer, MP3_SIZE);

            fwrite(mp3_buffer, write, , mp3);

        } while (read != );

        lame_close(lame);

        fclose(mp3);

        fclose(pcm);

    }

    @catch (NSException *exception) {

        NSLog(@"%@",[exception description]);

    }

    @finally {

        NSLog(@"MP3生成成功: %@",mp3FilePath);

    }

    return mp3FilePath;

}

@end

BDHelper.h

//

//  BDHelper.h

//  BDRecognizer

//

//  Created by 夏远全 on 2017/11/14.

//  Copyright © 2017年 夏远全. All rights reserved.

#import <Foundation/Foundation.h>

#import <UIKit/UIKit.h>

#import <AudioToolbox/AudioToolbox.h>

#import <AVFoundation/AVFoundation.h>

@protocol BDHelperDelegate <NSObject>

@optional

-(void)recognitionPartialResult:(NSString *)recognitionResult; //中间结果

-(void)recognitionFinalResult:(NSString *)recognitionResult;   //最终结果

-(void)recognitionError:(NSError *)error;                      //识别错误

-(void)updateProgress:(CGFloat)progress duration:(int)duration;//更新播放进度

-(void)updateReadingTime:(int)readingTime;//更新朗诵时间

-(void)recognitionRecordFinishedPlay;//语音识别录音播放完成

@end

@interface BDHelper : NSObject

/**

 代理

 */

@property (nonatomic,   weak) id<BDHelperDelegate> delegate;

/**

 播放器

 */

@property (nonatomic, strong) AVAudioPlayer   *audioPlayer;

/**

 文件路径

 */

@property (nonatomic,   copy) NSString       *audioFilePath;

/**

 创建对象

 @param voiceFileName 录音文件名

 @return 实例

 */

+(BDHelper *)sharedBDHelperWithVoiceFileName:(NSString *)voiceFileName;

/**

 开始语音识别

 */

- (void)startLongSpeechRecognition;

/**

 结束语音识别

 */

- (void)endLongSpeechRecognition;

/**

 播放识别语音

 */

-(void)playListenningRecognition;

/**

 暂停语音播放

 */

-(void)pauseListenningRecognition;

/**

 @param isNeedDeleteFilePath 是否需要移除缓存的音频文件

 销毁播放器

 */

-(void)didRemoveAudioPlayer:(BOOL)isNeedDeleteFilePath;

/**

 启动计时器，累计朗诵时间

 */

- (void)beginStatisticsReadingTime;

/**

 销毁计时器

 */

- (void)endStatisticsReadingTime;

@end

BDHelper.m

//

//  BDHelper.m

//  BDRecognizer

//

//  Created by 夏远全 on 2017/11/14.

//  Copyright © 2017年 夏远全. All rights reserved.

//

#import "BDHelper.h"

#import "SJVoiceTransform.h"

#if !TARGET_IPHONE_SIMULATOR

#import "BDSEventManager.h"

#import "BDSASRDefines.h"

#import "BDSASRParameters.h"

/// "请在官网新建应用，配置包名，并在此填写应用的 api key, secret key, appid(即appcode)"

static NSString* const API_KEY    =     @"BxLweqmGUxxxxxxxxxxxxxx";

static NSString* const SECRET_KEY = @"rhUIXG4gXmxxxxxxxxxxxxxx";

static NSString* const APP_ID     =     @"81xxxxx";

@interface BDHelper()<BDSClientASRDelegate,AVAudioPlayerDelegate>

@property (nonatomic, strong) BDSEventManager      *asrEventManager;

@property (nonatomic, strong) CADisplayLink        *progressLink;

@property (nonatomic, strong) NSTimer              *readingTimer;

@property (nonatomic, strong) NSMutableData        *mutabelData;

@property (nonatomic, strong) NSFileHandle         *fileHandler;

@property (nonatomic,   copy) NSString             *voiceFileName;

@end

#endif

@implementation BDHelper

#if !TARGET_IPHONE_SIMULATOR

+(BDHelper *)sharedBDHelperWithVoiceFileName:(NSString *)voiceFileName{

    BDHelper *helper = [[self alloc] init];

    helper.voiceFileName = voiceFileName;

    [helper setupDefalutValue];

    return helper;

}

-(void)setupDefalutValue{

    self.asrEventManager = [BDSEventManager createEventManagerWithName:BDS_ASR_NAME];

    [self configVoiceRecognitionClient];

    NSLog(@"current sdk version: %@", [self.asrEventManager libver]);

}

#pragma mark - public: Method

- (void)startLongSpeechRecognition{

    //移除播放器

    [self didRemoveAudioPlayer:NO];

    //设置录音路径

    if (!_audioFilePath) {

        [self pcmFilePathConfig];

    }

    //启动识别服务

    [self beginStatisticsReadingTime];

    [self.asrEventManager sendCommand:BDS_ASR_CMD_START];

}

- (void)endLongSpeechRecognition{

    //关闭识别服务

    [self endStatisticsReadingTime];

    [self.asrEventManager sendCommand:BDS_ASR_CMD_STOP];

    [self.fileHandler writeData:self.mutabelData];

    self.mutabelData = nil;

}

-(void)playListenningRecognition{

    //避免重复点击

    if (_audioPlayer && _audioPlayer.isPlaying) {

        return;

    }

    //直接播放

    if (_audioPlayer && !_audioPlayer.isPlaying) {

        [_audioPlayer play];

         _progressLink.paused = NO;

        return;

    }

    //播放识别语音(pcm格式转成mp3格式)

    NSString *mp3Path = [SJVoiceTransform stransformToMp3ByUrlWithUrl:_audioFilePath];

    if (!mp3Path) {

        return;

    }

    //初始化播放器

    _audioPlayer = [[AVAudioPlayer alloc]initWithContentsOfURL:[NSURL fileURLWithPath:mp3Path] error:NULL];

    _audioPlayer.volume = ;

    _audioPlayer.delegate = self;

    [[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryPlayback error:nil];

    [[AVAudioSession sharedInstance] setActive:YES error:nil];

    [_audioPlayer prepareToPlay];

    [_audioPlayer play];

    _progressLink = [CADisplayLink displayLinkWithTarget:self selector:@selector(updateProgressValue)];

    [_progressLink addToRunLoop:[NSRunLoop currentRunLoop] forMode:NSRunLoopCommonModes];

}

-(void)pauseListenningRecognition{

    //暂停播放

    if (_audioPlayer && _audioPlayer.isPlaying) {

        [_audioPlayer pause];

        _progressLink.paused = YES;

    }

}

#pragma mark - event

-(void)updateProgressValue{

    //更新播放进度

    int duration = round(_audioPlayer.duration);

    if (self.delegate && [self.delegate respondsToSelector:@selector(updateProgress:duration:)]) {

        [self.delegate updateProgress:_audioPlayer.currentTime/_audioPlayer.duration duration:duration];

    }

}

-(void)startReadingTimer{

    //累计朗诵时间

    if (self.delegate && [self.delegate respondsToSelector:@selector(updateReadingTime:)]) {

        [self.delegate updateReadingTime:];

    }

}

#pragma mark - Private: Configuration

- (void)configVoiceRecognitionClient {

    // ---- 设置DEBUG_LOG的级别

    [self.asrEventManager setParameter:@(EVRDebugLogLevelTrace) forKey:BDS_ASR_DEBUG_LOG_LEVEL];

    // ---- 配置API_KEY 和 SECRET_KEY 和 APP_ID

    [self.asrEventManager setParameter:@[API_KEY, SECRET_KEY] forKey:BDS_ASR_API_SECRET_KEYS];

    [self.asrEventManager setParameter:APP_ID forKey:BDS_ASR_OFFLINE_APP_CODE];

    // ---- 配置端点检测（二选一）

    [self configModelVAD];

    //[self configDNNMFE];

    // ---- 语义与标点 -----

    [self enableNLU];

    [self enablePunctuation];

    // ---- 长语音请务必开启本地VAD -----

    [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_LONG_SPEECH];

    [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_LOCAL_VAD];

    // ---- 录音文件路径 -----

    [self pcmFilePathConfig];

    // ---- 设置代理 -----

    [self.asrEventManager setDelegate:self];

    [self.asrEventManager setParameter:nil forKey:BDS_ASR_AUDIO_FILE_PATH];

    [self.asrEventManager setParameter:nil forKey:BDS_ASR_AUDIO_INPUT_STREAM];

}

- (void)pcmFilePathConfig{

    [self configFileHandler:self.voiceFileName];

    _audioFilePath = [self getFilePath:self.voiceFileName];

}

- (void)enableNLU {

    // ---- 开启语义理解 -----

    [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_NLU];

    [self.asrEventManager setParameter:@"" forKey:BDS_ASR_PRODUCT_ID];

}

- (void)enablePunctuation {

    // ---- 开启标点输出 -----

    [self.asrEventManager setParameter:@(NO) forKey:BDS_ASR_DISABLE_PUNCTUATION];

    // ---- 普通话标点 -----

    [self.asrEventManager setParameter:@"" forKey:BDS_ASR_PRODUCT_ID];

}

- (void)configModelVAD {

    NSString *modelVAD_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_basic_model" ofType:@"dat"];

    [self.asrEventManager setParameter:modelVAD_filepath forKey:BDS_ASR_MODEL_VAD_DAT_FILE];

    [self.asrEventManager setParameter:@(YES) forKey:BDS_ASR_ENABLE_MODEL_VAD];

}

- (void)configDNNMFE {

    NSString *mfe_dnn_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_mfe_dnn" ofType:@"dat"];

    NSString *cmvn_dnn_filepath = [[NSBundle mainBundle] pathForResource:@"bds_easr_mfe_cmvn" ofType:@"dat"];

    [self.asrEventManager setParameter:mfe_dnn_filepath forKey:BDS_ASR_MFE_DNN_DAT_FILE];

    [self.asrEventManager setParameter:cmvn_dnn_filepath forKey:BDS_ASR_MFE_CMVN_DAT_FILE];

    //自定义静音时长（单位：每帧10ms）

    //[self.asrEventManager setParameter:@(500) forKey:BDS_ASR_MFE_MAX_SPEECH_PAUSE];

    //[self.asrEventManager setParameter:@(501) forKey:BDS_ASR_MFE_MAX_WAIT_DURATION];

}

#pragma mark - MVoiceRecognitionClientDelegate

- (void)VoiceRecognitionClientWorkStatus:(int)workStatus obj:(id)aObj {

    switch (workStatus) {

        case EVoiceRecognitionClientWorkStatusNewRecordData: {

            /// 录音数据回调、NSData-原始音频数据，此处可以用来存储录音

            NSData *originData = (NSData *)aObj;

            [self.mutabelData appendData:originData];

            break;

        }

        case EVoiceRecognitionClientWorkStatusStartWorkIng: {

            /// 识别工作开始，开始采集及处理数据

            NSDictionary *logDic = [self parseLogToDic:aObj];

            NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: start vr, log: %@\n", logDic]);

            break;

        }

        case EVoiceRecognitionClientWorkStatusStart: {

            /// 检测到用户开始说话

            NSLog(@"CALLBACK: detect voice start point.\n");

            break;

        }

        case EVoiceRecognitionClientWorkStatusEnd: {

            /// 本地声音采集结束

            NSLog(@"CALLBACK: detect voice end point.\n");

            break;

        }

        case EVoiceRecognitionClientWorkStatusFlushData: {

            /// 连续上屏、NSDictionary-中间结果

            NSString *result = [self getDescriptionForDic:aObj];

            NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: partial result -%@.\n\n" ,result]);

            NSMutableString *recognitionResult = [aObj[@"results_recognition"] firstObject];

            if (self.delegate && [self.delegate respondsToSelector:@selector(recognitionPartialResult:)]) {

                [self.delegate recognitionPartialResult:recognitionResult];

            }

            break;

        }

        case EVoiceRecognitionClientWorkStatusFinish: {

            /// 语音识别功能完成，服务器返回正确结果、NSDictionary-最终识别结果

            NSString *result = [self getDescriptionForDic:aObj];

            NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: final result - %@.\n\n",result]);

            NSString *recognitionResult = [aObj[@"results_recognition"] firstObject];

            if (self.delegate && [self.delegate respondsToSelector:@selector(recognitionFinalResult:)]) {

                [self.delegate recognitionFinalResult:recognitionResult];

            }

            break;

        }

        case EVoiceRecognitionClientWorkStatusMeterLevel: {

            /// 当前音量回调、NSNumber:int-当前音量

            NSLog(@"-------voice volume:%d-------",[aObj intValue]);

            break;

        }

        case EVoiceRecognitionClientWorkStatusCancel: {

            /// 用户主动取消

            NSLog(@"CALLBACK: user press cancel.\n");

            break;

        }

        case EVoiceRecognitionClientWorkStatusError: {

            /// 发生错误 NSError-错误信息

            NSLog(@"%@", [NSString stringWithFormat:@"CALLBACK: encount error - %@.\n", (NSError *)aObj]);

            if (self.delegate && [self.delegate respondsToSelector:@selector(recognitionError:)]) {

                [self.delegate recognitionError:(NSError *)aObj];

            }

            break;

        }

        case EVoiceRecognitionClientWorkStatusLoaded: {

            /// 离线引擎加载完成

            NSLog(@"CALLBACK: offline engine loaded.\n");

            break;

        }

        case EVoiceRecognitionClientWorkStatusUnLoaded: {

            /// 离线引擎卸载完成

            NSLog(@"CALLBACK: offline engine unLoaded.\n");

            break;

        }

        case EVoiceRecognitionClientWorkStatusChunkThirdData: {

            /// CHUNK: 识别结果中的第三方数据  NSData

            NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: Chunk 3-party data length: %lu\n", (unsigned long)[(NSData *)aObj length]]);

            break;

        }

        case EVoiceRecognitionClientWorkStatusChunkNlu: {

            /// CHUNK: 识别结果中的语义结果  NSData

            NSString *nlu = [[NSString alloc] initWithData:(NSData *)aObj encoding:NSUTF8StringEncoding];

            NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: Chunk NLU data: %@\n", nlu]);

            break;

        }

        case EVoiceRecognitionClientWorkStatusChunkEnd: {

            /// CHUNK: 识别过程结束 NSString

            NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK: Chunk end, sn: %@.\n", aObj]);

            break;

        }

        case EVoiceRecognitionClientWorkStatusFeedback: {

            /// Feedback: 识别过程反馈的打点数据 NSString

            NSDictionary *logDic = [self parseLogToDic:aObj];

            NSLog(@"%@",[NSString stringWithFormat:@"CALLBACK Feedback: %@\n", logDic]);

            break;

        }

        case EVoiceRecognitionClientWorkStatusRecorderEnd: {

            /// 录音机关闭，页面跳转需检测此时间，规避状态条 (iOS)

            NSLog(@"CALLBACK: recorder closed.\n");

            break;

        }

        case EVoiceRecognitionClientWorkStatusLongSpeechEnd: {

            /// 长语音结束状态

            NSLog(@"CALLBACK: Long Speech end.\n");

            [self endLongSpeechRecognition];

            break;

        }

        default:

            break;

    }

}

#pragma mark - AVAudioPlayerDelegate

-(void)audioPlayerDidFinishPlaying:(AVAudioPlayer *)player successfully:(BOOL)flag{

    if (flag) {

        if (self.delegate && [self.delegate respondsToSelector:@selector(recognitionRecordFinishedPlay)]) {

            [self.delegate recognitionRecordFinishedPlay];

        }

    }

}

#pragma mark - public: Method

-(void)didRemoveAudioPlayer:(BOOL)isNeedDeleteFilePath{

    [_audioPlayer  stop];

    [_progressLink invalidate];

    _audioPlayer = nil;

    _progressLink = nil;

    if (isNeedDeleteFilePath) {

        [[NSFileManager defaultManager] removeItemAtPath:_audioFilePath error:nil];

        _audioFilePath = nil;

    }

}

- (void)beginStatisticsReadingTime{

    [self.readingTimer fire];

}

- (void)endStatisticsReadingTime{

    if (self.readingTimer.isValid) {

        [self.readingTimer invalidate];

        self.readingTimer = nil;

    }

}

#pragma mark - private: Method

- (NSDictionary *)parseLogToDic:(NSString *)logString

{

    NSArray *tmp = NULL;

    NSMutableDictionary *logDic = [[NSMutableDictionary alloc] initWithCapacity:];

    NSArray *items = [logString componentsSeparatedByString:@"&"];

    for (NSString *item in items) {

        tmp = [item componentsSeparatedByString:@"="];

        if (tmp.count == ) {

            [logDic setObject:tmp.lastObject forKey:tmp.firstObject];

        }

    }

    return logDic;

}

- (NSString *)getDescriptionForDic:(NSDictionary *)dic {

    if (dic) {

        return [[NSString alloc] initWithData:[NSJSONSerialization dataWithJSONObject:dic

                                                                              options:NSJSONWritingPrettyPrinted

                                                                                error:nil]

                                     encoding:NSUTF8StringEncoding];

    }

    return nil;

}

#pragma mark - Private: File

- (NSString *)getFilePath:(NSString *)fileName {

    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES);

    if (paths && [paths count]) {

        return [[paths objectAtIndex:] stringByAppendingPathComponent:fileName];

    } else {

        return nil;

    }

}

- (void)configFileHandler:(NSString *)fileName {

    self.fileHandler = [self createFileHandleWithName:fileName isAppend:NO];

}

- (NSFileHandle *)createFileHandleWithName:(NSString *)aFileName isAppend:(BOOL)isAppend {

    NSFileHandle *fileHandle = nil;

    NSString *fileName = [self getFilePath:aFileName];

    int fd = -;

    if (fileName) {

        if ([[NSFileManager defaultManager] fileExistsAtPath:fileName]&& !isAppend) {

            [[NSFileManager defaultManager] removeItemAtPath:fileName error:nil];

        }

        int flags = O_WRONLY | O_APPEND | O_CREAT;

        fd = open([fileName fileSystemRepresentation], flags, );

    }

    if (fd != -) {

        fileHandle = [[NSFileHandle alloc] initWithFileDescriptor:fd closeOnDealloc:YES];

    }

    return fileHandle;

}

#pragma mark - lazy load

-(NSMutableData *)mutabelData{

    if (!_mutabelData) {

        _mutabelData = [NSMutableData data];

    }

    return _mutabelData;

}

-(NSTimer *)readingTimer{

    if (!_readingTimer) {

        _readingTimer = [NSTimer scheduledTimerWithTimeInterval:1.0 target:self selector:@selector(startReadingTimer) userInfo:nil repeats:YES];

        [[NSRunLoop currentRunLoop] addTimer:_readingTimer forMode:UITrackingRunLoopMode];

    }

    return _readingTimer;

}

#endif

@end

四、注意

百度语音SDK只支持armv6、armv7的真机架构，不支持x86_64模拟器架构。

五、如何在模拟器下开发

办法：

1、首先将涉及到百度语音的代码全部采用宏定义注释掉，如：

#if !TARGET_IPHONE_SIMULATOR

// 语音相关调用

// self.asrEventManager = [BDSEventManager createEventManagerWithName:BDS_ASR_NAME];

// 其他调用

#endif

2、（重点要处理的地方）切换到模拟时，将libBaiduSpeechSDK.a静态包从Linked Frameworks and Librarise删掉(切换到真机时，再将libBaiduSpeechSDK.a导入进来就行)

iOS：百度长语音识别具体的封装：识别、播放、进度刷新