我的前兩篇文章講到了MP3和AAC文件的編碼與生成,下面接著講一講如何解碼它們。經(jīng)過前面一段時(shí)間的積累,我們也對(duì)MP3和AAC有了初步的了解,本文直接以用法入題。
相關(guān)接口:
結(jié)構(gòu)介紹:
編解碼器上下文,這個(gè)結(jié)構(gòu)保存當(dāng)前打開的編解碼器、設(shè)置的參數(shù),摘錄部分成員如下:
struct AVCodecContext
{
enum AVMediaType codec_type; /* see AVMEDIA_TYPE_xxx */
const struct AVCodec *codec;
enum AVCodecID codec_id; /* see AV_CODEC_ID_xxx */
unsigned int codec_tag;
int64_t bit_rate;
int global_quality;
int compression_level;
AVRational time_base;
int width, height;
int coded_width, coded_height;
int gop_size;
enum AVPixelFormat pix_fmt;
int max_b_frames;
int has_b_frames;
/* audio only */
int sample_rate; ///< samples per second
int channels; ///< number of audio channels
enum AVSampleFormat sample_fmt; ///< sample format
uint64_t channel_layout;
AVRational framerate;
enum AVPixelFormat sw_pix_fmt;
};
媒體文件解析器,這個(gè)結(jié)構(gòu)保存了當(dāng)前解析媒體文體的上下文狀態(tài),摘錄部分成員如下:
struct AVCodecParserContext
{
const struct AVCodecParser *parser;
int64_t frame_offset; /* offset of the current frame */
int64_t cur_offset; /* current offset
(incremented by each av_parser_parse()) */
int64_t next_frame_offset; /* offset of the next frame */
/* video info */
int pict_type; /* XXX: Put it back in AVCodecContext. */
int repeat_pict; /* XXX: Put it back in AVCodecContext. */
int64_t pts; /* pts of the current frame */
int64_t dts; /* dts of the current frame */
/* private data */
int64_t last_pts;
int64_t last_dts;
int fetch_timestamp;
int width;
int height;
int coded_width;
int coded_height;
int format;
};
函數(shù)介紹:
根據(jù)編解碼器ID,返回對(duì)應(yīng)的編解碼器名稱。
const char *avcodec_get_name(enum AVCodecID id);
根據(jù)解碼器名稱查找注冊(cè)的解碼器。
const AVCodec *avcodec_find_decoder_by_name(const char *name);
根據(jù)編解碼器指針,初使化編解碼器上下文。
AVCodecContext *avcodec_alloc_context3(const AVCodec *codec);
釋放編解碼器上下文。
void avcodec_free_context(AVCodecContext **avctx);
綁定編解碼器到編解碼器上下文,并打開相關(guān)資源。
int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options);
根據(jù)編解碼器初使化解析器上下文。
AVCodecParserContext *av_parser_init(int codec_id);
關(guān)閉解析器上下文。
void av_parser_close(AVCodecParserContext *s);
解析一個(gè)包。
返回成功處理的緩沖區(qū)的長度,下一次處理應(yīng)該緩沖區(qū)的后續(xù)位置繼續(xù)處理,其效果相當(dāng)于解封裝時(shí)使用的av_read_frame()函數(shù)。
這個(gè)函數(shù)總是返回非負(fù),因而無需處理異常的情況。
int av_parser_parse2(AVCodecParserContext *s,
AVCodecContext *avctx,
uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size,
int64_t pts, int64_t dts,
int64_t pos);
發(fā)送待解碼數(shù)據(jù)。
返回值:0表示成功,其他表示出錯(cuò)。
可能的出錯(cuò)值:
AVERROR(EAGAIN) 表示發(fā)送緩沖區(qū)已滿,需要等待avcodec_receive_frame()提取。
AVERROR_EOF 表示解析器已經(jīng)關(guān)閉。
int avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt);
接收解碼數(shù)據(jù)。
返回值:0表示成功,其他表示出錯(cuò)。
可能的出錯(cuò)值:
AVERROR(EAGAIN) 表示接收緩沖區(qū)為空,無數(shù)據(jù)可提取。
AVERROR_EOF 表示接收緩沖區(qū)為空。
int avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame);
代碼舉例:
下面這個(gè)例子演示了讀取test.mp3文件并解碼,將結(jié)果寫為PCM文件的過程。只要修改相應(yīng)的解碼器為AAC,和打開的文件名,就能支持AAC。代碼如下:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
extern "C"
{
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavutil/channel_layout.h>
}
const char* getSampleFormatName(enum AVSampleFormat emSampleFormat);
bool decode(AVCodecContext* pCodecCTX, const AVPacket* pPacket, AVFrame* pFrame, FILE* pFile);
int main(int argc, char* argv[])
{
const char* pCodecName = avcodec_get_name(AV_CODEC_ID_MP3);
printf("codec: %d -> %s \n", AV_CODEC_ID_MP3, pCodecName);
const AVCodec* pCodec = avcodec_find_decoder_by_name(pCodecName);
if (pCodec == NULL)
{
printf("can't find decoder! \n");
return -1;
}
// 根據(jù)指定解碼器初使化對(duì)應(yīng)的解碼上下文
AVCodecContext* pCodecCTX = avcodec_alloc_context3(pCodec);
if (pCodecCTX == NULL)
{
printf("can't alloc decoder context! \n");
return -1;
}
// 打開解碼器上下文
int rc = avcodec_open2(pCodecCTX, pCodec, NULL);
if (rc < 0)
{
char sError[128] = {0};
av_strerror(rc, sError, sizeof(sError));
printf("avcodec_open2() ret:[%d:%s] \n", rc, sError);
return -1;
}
// 打開裸流解析上下文
AVCodecParserContext* pCodecParserCTX = av_parser_init(pCodec->id);
if (pCodecParserCTX == NULL)
{
printf("init parser context failed! \n");
return -1;
}
AVPacket* pPacket = av_packet_alloc();
AVFrame* pFrame = av_frame_alloc();
FILE* pFileInput = fopen("test.mp3", "rb");
FILE* pFileOutput = fopen("test.pcm", "wb");
while (true)
{
const int BUFF_SIZE = 100; //20480;
char sDataBuffer[BUFF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE] = {0};
int nBytesRead = fread(sDataBuffer, 1, BUFF_SIZE, pFileInput);
if (nBytesRead <= 0)
break;
printf("read bytes: %d \n", nBytesRead);
// 一次讀取,全部喂給
int nOffset = 0;
while (nOffset < nBytesRead)
{
// 盡量喂給,但是一次最大只解析出一個(gè)包
int nPacketSize = av_parser_parse2(pCodecParserCTX, pCodecCTX, &(pPacket->data), &(pPacket->size),
(uint8_t*)sDataBuffer + nOffset, nBytesRead - nOffset, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
printf("\t offset:%d max feed size:%d eat size:%d \n", nOffset, nBytesRead - nOffset, nPacketSize);
// 輸出了報(bào)文,需要解碼
if (pPacket->size > 0)
{
printf("\t\t out packet size:%d \n", pPacket->size);
// 解碼,這里AVPacket結(jié)構(gòu)只有data和size成員有效
if (!decode(pCodecCTX, pPacket, pFrame, pFileOutput))
{
printf("decode fatal! \n");
exit (-1);
}
}
nOffset += nPacketSize;
}
}
// 解碼尾包
decode(pCodecCTX, NULL, pFrame, pFileOutput);
AVSampleFormat emSampleFormat = pCodecCTX->sample_fmt;
if (av_sample_fmt_is_planar(emSampleFormat))
{
const char* pPacked = av_get_sample_fmt_name(emSampleFormat);
printf("Warning: the sample format the decoder produced is planar(%s). This example will output the first channel only.\n",
pPacked ? pPacked : "?");
emSampleFormat = av_get_packed_sample_fmt(emSampleFormat);
}
printf("Play the output audio file with command: \n");
printf("\t ffplay -f %s -ac %d -ar %d test.pcm \n", getSampleFormatName(emSampleFormat), pCodecCTX->channels, pCodecCTX->sample_rate);
fclose(pFileOutput);
fclose(pFileInput);
av_packet_free(&pPacket);
av_frame_free(&pFrame);
av_parser_close(pCodecParserCTX);
avcodec_free_context(&pCodecCTX);
return 0;
}
const char* getSampleFormatName(enum AVSampleFormat emSampleFormat)
{
switch (emSampleFormat)
{
case AV_SAMPLE_FMT_U8:
return "u8";
case AV_SAMPLE_FMT_S16:
return AV_NE("s16be", "s16le");
case AV_SAMPLE_FMT_S32:
return AV_NE("s32be", "s32le");
case AV_SAMPLE_FMT_FLT:
return AV_NE("f32be", "f32le");
case AV_SAMPLE_FMT_DBL:
return AV_NE("f64be", "f64le");
}
return "unkown";
}
bool decode(AVCodecContext* pCodecCTX, const AVPacket* pPacket, AVFrame* pFrame, FILE* pFile)
{
// 發(fā)送數(shù)據(jù)
int rc = avcodec_send_packet(pCodecCTX, pPacket);
if (rc < 0)
{
char sError[128] = {0};
av_strerror(rc, sError, sizeof(sError));
printf("avcodec_send_packet() ret:[%d:%s] \n", rc, sError);
return false;
}
// 接收解碼結(jié)果
while (true)
{
rc = avcodec_receive_frame(pCodecCTX, pFrame);
if (rc < 0)
{
// 無解碼輸出
if (rc == AVERROR(EAGAIN) || rc == AVERROR_EOF)
return true;
// 解碼出錯(cuò)
char sError[128] = {0};
av_strerror(rc, sError, sizeof(sError));
printf("avcodec_receive_frame() ret:[%d:%s] \n", rc, sError);
return false;
}
printf("\t\t => frame format:[%d:%s] channels:[%d] sample_rate:[%d] nb_samples:[%d] pkt_size:[%d] linesize:[%d] \n",
pFrame->format, av_get_sample_fmt_name((AVSampleFormat)pFrame->format), pFrame->channels, pFrame->sample_rate, pFrame->nb_samples, pFrame->pkt_size, pFrame->linesize[0]);
int nSampleSize = av_get_bytes_per_sample(pCodecCTX->sample_fmt);
// 多通道按交錯(cuò)排列寫入
for (int i = 0; i < pFrame->nb_samples; ++i)
{
for (int c = 0; c < pCodecCTX->channels; ++c)
{
fwrite(pFrame->data[c] + nSampleSize * i, 1, nSampleSize, pFile);
}
}
}
return true;
}
編譯:
g++ -o decode_mp3 decode_mp3.cpp -I/usr/local/ffmpeg/include -L/usr/local/ffmpeg/lib -lavformat -lavcodec -lavutil
運(yùn)行,輸出如下:
$ ./decode_mp3
codec: 86017 -> mp3
read bytes: 100
offset:0 max feed size:100 eat size:100
read bytes: 100
offset:0 max feed size:100 eat size:100
read bytes: 100
offset:0 max feed size:100 eat size:100
read bytes: 100
offset:0 max feed size:100 eat size:100
read bytes: 100
offset:0 max feed size:100 eat size:17
out packet size:417
=> frame format:[6:s16p] channels:[2] sample_rate:[44100] nb_samples:[1152] pkt_size:[417] linesize:[2304]
offset:17 max feed size:83 eat size:83
read bytes: 100
offset:0 max feed size:100 eat size:100
read bytes: 100
offset:0 max feed size:100 eat size:100
read bytes: 100
offset:0 max feed size:100 eat size:100
read bytes: 100
offset:0 max feed size:100 eat size:35
out packet size:418
=> frame format:[6:s16p] channels:[2] sample_rate:[44100] nb_samples:[1152] pkt_size:[418] linesize:[2304]
offset:35 max feed size:65 eat size:65
......
read bytes: 100
offset:0 max feed size:100 eat size:100
read bytes: 100
offset:0 max feed size:100 eat size:100
read bytes: 100
offset:0 max feed size:100 eat size:100
read bytes: 96
offset:0 max feed size:96 eat size:96
out packet size:418
=> frame format:[6:s16p] channels:[2] sample_rate:[44100] nb_samples:[1152] pkt_size:[418] linesize:[2304]
Warning: the sample format the decoder produced is planar(s16p). This example will output the first channel only.
Play the output audio file with command:
ffplay -f s16le -ac 2 -ar 44100 test.pcm
這里為了演示讀小包解析的過程,所以每次從文件中只讀入100字節(jié),交給解析器去拼接處理,正式場合中請(qǐng)盡量一次讀多點(diǎn)數(shù)據(jù),以減少系統(tǒng)調(diào)用。
由于是PCM文件,沒有元數(shù)據(jù)說明,無法直接播放,使用下面的命令:
ffplay -f s16le -ac 2 -ar 44100 test.pcm