WebRTC VideoEngine超详细教程（三）——集成X264编码和ffmpeg解码

转自：http://blog.csdn.net/nonmarking/article/details/47958395

本系列目前共三篇文章，后续还会更新

WebRTC VideoEngine超详细教程（二）——集成OPENH264编解码器

总述

在前一篇文章中，讲解了如何将OPENH264编解码器集成到WebRTC中，但是OPENH264只能编码baseline的H264视频，而且就编码质量而言，还是X264最好，本文就来讲解一下如何将X264编码器集成到WebRTC中，为了实现解码，同时要用到ffmpeg。总体流程和之前一样，分为重新封装编解码器和注册调用两大步骤，注册调用这一步没有任何不同，主要是重新封装这一步骤有较大区别。

重新封装X264编码功能

首先当然还是要下载X264源码编译出相应的库以供调用。在windows下使用mingw进行编译，再使用poxports工具导出库，最后得到libx264.dll和libx264.lib，同时把x264.h和x264_config.h总共四个文件放到工程目录下，并在项目属性中进行相应配置。

使用x264进行视频编码的基本流程如下

#include <stdint.h>
#include <stdio.h>
#include <x264.h>
int main( int argc, char **argv )
{
int width, height;
x264_param_t param;
x264_picture_t pic;
x264_picture_t pic_out;
x264_t *h;
int i_frame = 0;
int i_frame_size;
x264_nal_t *nal;
int i_nal;
/* Get default params for preset/tuning */
if( x264_param_default_preset( &param, "medium", NULL ) < 0 )
goto fail;
/* Configure non-default params */
param.i_csp = X264_CSP_I420;
param.i_width = width;
param.i_height = height;
param.b_vfr_input = 0;
param.b_repeat_headers = 1;
param.b_annexb = 1;
/* Apply profile restrictions. */
if( x264_param_apply_profile( &param, "high" ) < 0 )
goto fail;
if( x264_picture_alloc( &pic, param.i_csp, param.i_width, param.i_height ) < 0 )
goto fail;
h = x264_encoder_open( &param);
if( !h )
goto fail;
int luma_size = width * height;
int chroma_size = luma_size / 4;
/* Encode frames */
for( ;; i_frame++ )
{
/* Read input frame */
if( fread( pic.img.plane[0], 1, luma_size, stdin ) != luma_size )
break;
if( fread( pic.img.plane[1], 1, chroma_size, stdin ) != chroma_size )
break;
if( fread( pic.img.plane[2], 1, chroma_size, stdin ) != chroma_size )
break;
pic.i_pts = i_frame;
i_frame_size = x264_encoder_encode( h, &nal, &i_nal, &pic, &pic_out );
if( i_frame_size < 0 )
goto fail;
else if( i_frame_size )
{
if( !fwrite( nal->p_payload, i_frame_size, 1, stdout ) )
goto fail;
}
}
/* Flush delayed frames */
while( x264_encoder_delayed_frames( h ) )
{
i_frame_size = x264_encoder_encode( h, &nal, &i_nal, NULL, &pic_out );
if( i_frame_size < 0 )
goto fail;
else if( i_frame_size )
{
if( !fwrite( nal->p_payload, i_frame_size, 1, stdout ) )
goto fail;
}
}
x264_encoder_close( h );
x264_picture_clean( &pic );
return 0;
}

还是一样，照葫芦画瓢，改写上一篇文章中提到的H264EncoderImpl类

首先是类的定义，去掉了原来的私有成员变量ISVCEncoder* encoder_，加入了以下几项，其他内容不变

x264_picture_t pic;
x264_picture_t pic_out;
x264_t *encoder_;
int i_frame = 0;//frame index
x264_nal_t *nal;

相应的，构造函数和析构函数也要改变，这里就不赘述了，重点看InitEncode方法和Encode方法。

InitEncode方法的实现改写如下

int H264EncoderImpl::InitEncode(const VideoCodec* inst,
int number_of_cores,
size_t max_payload_size) {
if (inst == NULL) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
if (inst->maxFramerate < 1) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
// allow zero to represent an unspecified maxBitRate
if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
if (inst->width < 1 || inst->height < 1) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
if (number_of_cores < 1) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
int ret_val = Release();
if (ret_val < 0) {
return ret_val;
}
/* Get default params for preset/tuning */
x264_param_t param;
ret_val = x264_param_default_preset(&param, "medium", NULL);
if (ret_val != 0) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d",
ret_val);
x264_encoder_close(encoder_);
encoder_ = NULL;
return WEBRTC_VIDEO_CODEC_ERROR;
}
/* Configure non-default params */
param.i_csp = X264_CSP_I420;
param.i_width = inst->width;
param.i_height = inst->height;
param.b_vfr_input = 0;
param.b_repeat_headers = 1;
param.b_annexb = 0;//这里设置为0，是为了使编码后的NAL统一有4字节的起始码，便于处理，否则会同时有3字节和4字节的起始码，很麻烦
param.i_fps_num = 1;
param.i_fps_num = codec_.maxFramerate;
param.rc.i_bitrate = codec_.maxBitrate;
/* Apply profile restrictions. */
ret_val = x264_param_apply_profile(&param, "high");
if (ret_val != 0) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d",
ret_val);
x264_encoder_close(encoder_);
encoder_ = NULL;
return WEBRTC_VIDEO_CODEC_ERROR;
}
ret_val = x264_picture_alloc(&pic, param.i_csp, param.i_width, param.i_height);
if (ret_val != 0) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d",
ret_val);
x264_encoder_close(encoder_);
encoder_ = NULL;
return WEBRTC_VIDEO_CODEC_ERROR;
}
encoder_ = x264_encoder_open(&param);
if (!encoder_){
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264EncoderImpl::InitEncode() fails to initialize encoder ret_val %d",
ret_val);
x264_encoder_close(encoder_);
x264_picture_clean(&pic);
encoder_ = NULL;
return WEBRTC_VIDEO_CODEC_ERROR;
}
if (&codec_ != inst) {
codec_ = *inst;
}
if (encoded_image_._buffer != NULL) {
delete[] encoded_image_._buffer;
}
encoded_image_._size = CalcBufferSize(kI420, codec_.width, codec_.height);
encoded_image_._buffer = new uint8_t[encoded_image_._size];
encoded_image_._completeFrame = true;
inited_ = true;
WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1,
"H264EncoderImpl::InitEncode(width:%d, height:%d, framerate:%d, start_bitrate:%d, max_bitrate:%d)",
inst->width, inst->height, inst->maxFramerate, inst->startBitrate, inst->maxBitrate);
return WEBRTC_VIDEO_CODEC_OK;
}

Encode方法的实现改写如下

int H264EncoderImpl::Encode(const I420VideoFrame& input_image,
const CodecSpecificInfo* codec_specific_info,
const std::vector<VideoFrameType>* frame_types) {
if (!inited_) {
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
if (input_image.IsZeroSize()) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
if (encoded_complete_callback_ == NULL) {
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
VideoFrameType frame_type = kDeltaFrame;
// We only support one stream at the moment.
if (frame_types && frame_types->size() > 0) {
frame_type = (*frame_types)[0];
}
bool send_keyframe = (frame_type == kKeyFrame);
if (send_keyframe) {
pic.b_keyframe = TRUE;
WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1,
"H264EncoderImpl::EncodeKeyFrame(width:%d, height:%d)",
input_image.width(), input_image.height());
}
// Check for change in frame size.
if (input_image.width() != codec_.width ||
input_image.height() != codec_.height) {
int ret = UpdateCodecFrameSize(input_image);
if (ret < 0) {
return ret;
}
}
/* Read input frame */
pic.img.plane[0] = const_cast<uint8_t*>(input_image.buffer(kYPlane));
pic.img.plane[1] = const_cast<uint8_t*>(input_image.buffer(kUPlane));
pic.img.plane[2] = const_cast<uint8_t*>(input_image.buffer(kVPlane));
pic.i_pts = i_frame;
int i_nal = 0;
int i_frame_size = x264_encoder_encode(encoder_, &nal, &i_nal, &pic, &pic_out);
if (i_frame_size < 0)
{
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264EncoderImpl::Encode() fails to encode %d",
i_frame_size);
x264_encoder_close(encoder_);
x264_picture_clean(&pic);
encoder_ = NULL;
return WEBRTC_VIDEO_CODEC_ERROR;
}
RTPFragmentationHeader frag_info;
if (i_frame_size)
{
if (i_nal == 0) {
return WEBRTC_VIDEO_CODEC_OK;
}
frag_info.VerifyAndAllocateFragmentationHeader(i_nal);
encoded_image_._length = 0;
uint32_t totalNaluIndex = 0;
for (int nal_index = 0; nal_index < i_nal; nal_index++)
{
uint32_t currentNaluSize = 0;
currentNaluSize = nal[nal_index].i_payload - 4; //x264_encoder_encode编码得到的nal单元是已经带有起始码的，此外，这里直接使用nal[index]即可，不必再使用x264_nal_encode函数
memcpy(encoded_image_._buffer + encoded_image_._length, nal[nal_index].p_payload + 4, currentNaluSize);//encoded_image_中存有的是去掉起始码的数据
encoded_image_._length += currentNaluSize;
WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1,
"H264EncoderImpl::Encode() nal_type %d, length:%d",
nal[nal_index].i_type, encoded_image_._length);
frag_info.fragmentationOffset[totalNaluIndex] = encoded_image_._length - currentNaluSize;
frag_info.fragmentationLength[totalNaluIndex] = currentNaluSize;
frag_info.fragmentationPlType[totalNaluIndex] = nal[nal_index].i_type;
frag_info.fragmentationTimeDiff[totalNaluIndex] = 0;
totalNaluIndex++;
}
}
i_frame++;
if (encoded_image_._length > 0) {
encoded_image_._timeStamp = input_image.timestamp();
encoded_image_.capture_time_ms_ = input_image.render_time_ms();
encoded_image_._encodedHeight = codec_.height;
encoded_image_._encodedWidth = codec_.width;
encoded_image_._frameType = frame_type;
// call back
encoded_complete_callback_->Encoded(encoded_image_, NULL, &frag_info);
}
return WEBRTC_VIDEO_CODEC_OK;
}

其他方法的实现均没有改变。

至此，X264编码器重新封装完毕，还是比较好理解的。

重新封装ffmpeg解码功能

首先还是一样，获得ffmpeg的头文件和库文件，加入工程中并进行相应设置，这里只需使用avcodec avformat avutil swscale四个库，头文件也可以做相应的删减。

ffmpeg解码的基本流程如下，实际集成之后是从WebRTC的EncodedImage& input_image中获得待解码数据的，所以不能使用常见的基于文件的解码流程

AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_H264);
AVCodecContext *codecCtx = avcodec_alloc_context3(codec);
avcodec_open2(codecCtx, codec, nil);
char *videoData;
int len;
AVFrame *frame = av_frame_alloc();
AVPacket packet;
av_new_packet(&packet, len);
memcpy(packet.data, videoData, len);
int ret, got_picture;
ret = avcodec_decode_video2(codecCtx, frame, &got_picture, &packet);
if (ret > 0){
if(got_picture){
//进行下一步的处理
}
}

相应的，对H264DecoderImpl类的定义和各方法的实现要进行改写。
首先是类的定义，去掉了ISVCDecoder* decoder_，加入了以下私有成员变量

AVCodecContext *pCodecCtx;
AVCodec *pCodec;
AVFrame *pFrame, *pFrameYUV;
AVPacket *packet;
struct SwsContext *img_convert_ctx;
uint8_t *decode_buffer;//存储最开始收到的SPS、PPS和IDR帧以便进行最开始的解码
uint8_t *out_buffer;
int framecnt = 0;
int encoded_length = 0;

构造函数和析构函数的改写省略不表，重点看一下InitDecode方法和Decode方法

InitDecode方法改写如下

int H264DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {
if (inst == NULL) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
int ret_val = Release();
if (ret_val < 0) {
return ret_val;
}
if (&codec_ != inst) {
// Save VideoCodec instance for later; mainly for duplicating the decoder.
codec_ = *inst;
}
pCodec = avcodec_find_decoder(AV_CODEC_ID_H264);
pCodecCtx = avcodec_alloc_context3(pCodec);
pCodecCtx->pix_fmt = PIX_FMT_YUV420P;
pCodecCtx->width = codec_.width;
pCodecCtx->height = codec_.height;
//pCodecCtx->bit_rate = codec_.targetBitrate*1000;
pCodecCtx->time_base.num = 1;
pCodecCtx->time_base.den = codec_.maxFramerate;
if (pCodec == NULL){
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264DecoderImpl::InitDecode, Codec not found.");
return WEBRTC_VIDEO_CODEC_ERROR;
}
if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0){
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264DecoderImpl::InitDecode, Could not open codec.");
return WEBRTC_VIDEO_CODEC_ERROR;
}
inited_ = true;
// Always start with a complete key frame.
key_frame_required_ = true;
WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1,
"H264DecoderImpl::InitDecode(width:%d, height:%d, framerate:%d, start_bitrate:%d, max_bitrate:%d)",
inst->width, inst->height, inst->maxFramerate, inst->startBitrate, inst->maxBitrate);
return WEBRTC_VIDEO_CODEC_OK;
}

Decode方法的实现改写如下

int H264DecoderImpl::Decode(const EncodedImage& input_image,
bool missing_frames,
const RTPFragmentationHeader* fragmentation,
const CodecSpecificInfo* codec_specific_info,
int64_t /*render_time_ms*/) {
if (!inited_) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264DecoderImpl::Decode, decoder is not initialized");
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
if (decode_complete_callback_ == NULL) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264DecoderImpl::Decode, decode complete call back is not set");
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
if (input_image._buffer == NULL) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264DecoderImpl::Decode, null buffer");
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
if (!codec_specific_info) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264EncoderImpl::Decode, no codec info");
return WEBRTC_VIDEO_CODEC_ERROR;
}
if (codec_specific_info->codecType != kVideoCodecH264) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264EncoderImpl::Decode, non h264 codec %d", codec_specific_info->codecType);
return WEBRTC_VIDEO_CODEC_ERROR;
}
WEBRTC_TRACE(webrtc::kTraceApiCall, webrtc::kTraceVideoCoding, -1,
"H264DecoderImpl::Decode(frame_type:%d, length:%d",
input_image._frameType, input_image._length);
if (framecnt < 2)
{//存储最开始的SPS PPS 和 IDR帧以便进行初始的解码
memcpy(decode_buffer + encoded_length, input_image._buffer, input_image._length);
encoded_length += input_image._length;
framecnt++;
}
else
{
pFrame = av_frame_alloc();
pFrameYUV = av_frame_alloc();
out_buffer = (uint8_t *)av_malloc(avpicture_get_size(PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height));
avpicture_fill((AVPicture *)pFrameYUV, out_buffer, PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height);
img_convert_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt,
pCodecCtx->width, pCodecCtx->height, PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);
if (framecnt == 2)
{
packet = (AVPacket *)av_malloc(sizeof(AVPacket));
av_new_packet(packet, encoded_length);
memcpy(packet->data, decode_buffer, encoded_length);
av_free(decode_buffer);
framecnt++;
printf("\n\nLoading");
}
else
{
packet = (AVPacket *)av_malloc(sizeof(AVPacket));
av_new_packet(packet, input_image._length);
memcpy(packet->data, input_image._buffer, input_image._length);
}
int got_picture = 0;
int ret = avcodec_decode_video2(pCodecCtx, pFrame, &got_picture, packet);
if (ret < 0){
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"H264DecoderImpl::Decode, Decode Error.");
return WEBRTC_VIDEO_CODEC_ERROR;
}
if (got_picture){
sws_scale(img_convert_ctx, (const uint8_t* const*)pFrame->data, pFrame->linesize, 0, pCodecCtx->height,
pFrameYUV->data, pFrameYUV->linesize);
int size_y = pFrameYUV->linesize[0] * pCodecCtx->height;
int size_u = pFrameYUV->linesize[1] * pCodecCtx->height / 2;
int size_v = pFrameYUV->linesize[2] * pCodecCtx->height / 2;
decoded_image_.CreateFrame(size_y, static_cast<uint8_t*>(pFrameYUV->data[0]),
size_u, static_cast<uint8_t*>(pFrameYUV->data[1]),
size_v, static_cast<uint8_t*>(pFrameYUV->data[2]),
pCodecCtx->width,
pCodecCtx->height,
pFrameYUV->linesize[0],
pFrameYUV->linesize[1],
pFrameYUV->linesize[2]);
decoded_image_.set_timestamp(input_image._timeStamp);
decode_complete_callback_->Decoded(decoded_image_);
return WEBRTC_VIDEO_CODEC_OK;
}
else
printf(".");
av_free_packet(packet);
}
return WEBRTC_VIDEO_CODEC_OK;
}

其他方法的实现保持不变，至此ffmpeg解码功能的重新封装也完成了。

从最后实现的效果来看，X264的视频质量的确是最好的，但是播放端的解码延时比较高，暂时还不清楚原因，希望了解的朋友指教。
本项目源代码