一.概述
本文將在Mac os系統(tǒng)上使用FFmpeg進(jìn)行音視頻的H264,H265編碼。
使用FFmpeg版本為4.2。
二、編碼器初始化
有兩點(diǎn)需要注意的是:
1.設(shè)置pCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;的目的是可以通過(guò)pCodecContext->extradata和pCodecContext->extradata_size提取到返回PPS,SPS,VPS數(shù)據(jù),適用于直播場(chǎng)景,注釋中也寫的很清楚Place global headers in extradata instead of every keyframe.。
如果沒有設(shè)置則會(huì)在AVPacket.data中和其他數(shù)據(jù)一起返回,適用于直接寫入文件。
2.設(shè)置pPacket.flags |= AV_PKT_FLAG_KEY的目的可以在編碼后的AVPacket中識(shí)別出是否為關(guān)鍵幀。
int ret;
enum AVCodecID codecID = AV_CODEC_ID_H264;
if (!kUseH264Encode) {
codecID = AV_CODEC_ID_HEVC;
}
pCodec = avcodec_find_encoder(codecID);
pCodecContext = avcodec_alloc_context3(pCodec);
pCodecContext->codec_type = AVMEDIA_TYPE_VIDEO;
pCodecContext->pix_fmt = AV_PIX_FMT_YUV420P;
pCodecContext->width = 1280;
pCodecContext->height = 720;
pCodecContext->time_base.num = 1;
pCodecContext->time_base.den = 25;
pCodecContext->bit_rate = 1000 * 1000;
pCodecContext->qmin = 10;
pCodecContext->qmax = 51;
pCodecContext->gop_size = 25;
pCodecContext->max_b_frames = 0;
// pCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
AVDictionary *param = NULL;
if (kUseH264Encode) {
av_dict_set(¶m, "preset", "slow", 0);
av_dict_set(¶m, "tune", "zerolatency", 0);
}else{
av_dict_set(¶m, "preset", "ultrafast", 0);
av_dict_set(¶m, "tune", "zero-latency", 0);
}
if (avcodec_open2(pCodecContext, pCodec, ¶m)<0) {
return;
}
pFrame = av_frame_alloc();
pFrame->width = pCodecContext->width;
pFrame->height = pCodecContext->height;
pFrame->format = pCodecContext->pix_fmt;
ret = av_frame_get_buffer(pFrame, 0);
if (ret < 0) {
printf("ret == %s\n", av_err2str(ret));
}
//初始化avpacket
av_init_packet(&pPacket);
pPacket.flags |= AV_PKT_FLAG_KEY;
三、編碼
在Mac OS系統(tǒng)或者iOS系統(tǒng)中,采集到的一般是CMSampleBufferRef對(duì)象,需要先從中拿到CVPixelBufferRef對(duì)象,再?gòu)钠渲刑崛?code>YUV數(shù)據(jù)。而此處的YUV格式,是一種two-plane模式,即Y和UV分為兩個(gè)Plane,但是UV(CbCr)為交錯(cuò)存儲(chǔ),而不是分為三個(gè)plane,需要最終轉(zhuǎn)換為420P格式,即YYYYUV。
// 鎖定imageBuffer內(nèi)存地址開始進(jìn)行編碼
if (CVPixelBufferLockBaseAddress(pixelBuffer, 0) == kCVReturnSuccess) {
//獲取Y分量的地址
UInt8 *bufferPtr = (UInt8 *)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer,0);
//獲取UV分量的地址
UInt8 *bufferPtr1 = (UInt8 *)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer,1);
//根據(jù)像素獲取圖片的真實(shí)寬度&高度
size_t width = CVPixelBufferGetWidth(pixelBuffer);
size_t height = CVPixelBufferGetHeight(pixelBuffer);
// 獲取Y分量長(zhǎng)度
size_t bytesrow0 = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer,0);
size_t bytesrow1 = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer,1);
UInt8 *yuv420_data = (UInt8 *)malloc(width * height * 3 / 2);
//將NV12數(shù)據(jù)轉(zhuǎn)成YUV420P(I420)數(shù)據(jù)
UInt8 *pY = bufferPtr;
UInt8 *pUV = bufferPtr1;
UInt8 *pU = yuv420_data + width * height;
UInt8 *pV = pU + width * height / 4;
for(int i =0;i<height;i++)
{
memcpy(yuv420_data+i*width,pY+i*bytesrow0,width);
}
for(int j = 0;j<height/2;j++)
{
for(int i =0;i<width/2;i++)
{
*(pU++) = pUV[i<<1];
*(pV++) = pUV[(i<<1) + 1];
}
pUV += bytesrow1;
}
// 3.5.分別讀取YUV的數(shù)據(jù)
pFrame->data[0] = yuv420_data;
pFrame->data[1] = pFrame->data[0] + width * height;
pFrame->data[2] = pFrame->data[1] + (width * height) / 4;
pFrame->pts = frameCount;
// 5.對(duì)編碼前的原始數(shù)據(jù)(AVFormat)利用編碼器進(jìn)行編碼,將 pFrame 編碼后的數(shù)據(jù)傳入pkt 中
int ret = avcodec_send_frame(pCodecContext, pFrame);
if (ret != 0) {
printf("Failed to encode! \n");
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
return;
}
while (1) {
ret = avcodec_receive_packet(pCodecContext, &pPacket);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
else if (ret < 0) {
fprintf(stderr, "Error encoding audio frame\n");
break;
}
frameCount++;
if (pPacket.flags & AV_PKT_FLAG_KEY) {
videoFrame.isKeyFrame = YES;
}
//write file
NSData *data = [NSData dataWithBytes:pPacket.data length:pPacket.size];
if ([self.delegate respondsToSelector:@selector(videoEncoder:encodeData:)]) {
[self.delegate videoEncoder:self encodeData:data];}
//釋放packet
av_packet_unref(&pPacket);
}
// 7.釋放yuv數(shù)據(jù)
free(yuv420_data);
}
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
}
四、提取SPS,PPS,VPS數(shù)據(jù)
上文說(shuō)了要單獨(dú)提取SPS,PPS,VPS數(shù)據(jù),需開始設(shè)置pCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER。
uint8_t *extra_data = pCodecContext->extradata;
int extra_size = pCodecContext->extradata_size;
1.H264編碼時(shí),拿到的extra_data如下所示:
00000001 6764001f acb300a0 0b742000 00030020 00000651 e3064d00 00000168 e9732c8b
很明顯SPS,PPS被4個(gè)字節(jié)的start code= 00 00 00 01分割開,NALU header只有一個(gè)字節(jié):
00 00 00 01 67 ---> (0x67 & 0x1f) = 7 ---> PPS
00 00 00 01 68 ---> (0x68 & 0x1f) = 8 ---> SPS
代碼如下:
int pos = 0;
int pps_pos = 0,pps_length = 0;
int sps_pos = 0,sps_length = 0;
while (pos < (extra_size - 4)) {
if (extra_data[pos] == 0 &&
extra_data[pos+1] == 0 &&
extra_data[pos+2] == 0 &&
extra_data[pos+3] == 1) {
if ((extra_data[pos+4] & 0x1f) == 7) {//sps
sps_pos = pos+4;
}else if ((extra_data[pos+4] & 0x1f) == 8){//pps
pps_pos = pos+4;
}
}
pos ++;
}
sps_length = pps_pos - sps_pos - 4;
pps_length = extra_size - pps_pos;
2.H265編碼時(shí),同樣方法拿到的extra_data提取SPS,PPS,VPS``NALU header有兩個(gè)字節(jié),提取方法如下:
00 00 00 01 40 01 ---> (0x40 & 0x7E)>>1 = 32 ---> VPS
00 00 00 01 42 01 ---> (0x42 & 0x7E)>>1 = 33 ---> SPS
00 00 00 01 44 01 ---> (0x44 & 0x7E)>>1 = 34 ---> PPS
需要注意的是,此處還可能包含被3個(gè)字節(jié)的start code= 00 00 01分割開的NAL_UNIT_SEI數(shù)據(jù):
00 00 00 01 4e 01 ---> (0x4e & 0x7E)>>1 = 39 ---> SEI
五、編碼結(jié)束
編碼結(jié)束時(shí),需要沖洗編碼器,將編碼器中緩存的數(shù)據(jù)沖洗出來(lái),防止丟幀。方法是發(fā)送avcodec_send_frame(pCodecContext, NULL),當(dāng)avcodec_receive_packet的返回值為AVERROR_EOF則表示沖洗完成。最后再釋放內(nèi)存。