第六章的内容有点多,现在学习一下音视频的软编和硬编。
使用libfdk_aac进行编码
书上的源码是采用ffmpeg的api进行编码的,当然你也可单独编译libfdk_aac来进行编码。首先我们还是需要配置采样率,声道,码率等参数进行初始化。
接着我们探测输出文件,让ffmpeg自动根据文件名探测格式。
int ret;
av_register_all();
avFormatContext = avformat_alloc_context();
LOGI("aacFilePath is %s ", aacFilePath);
//一种方法
//先探测格式,然后设置到avFormatContext中
// AVOutputFormat *fmt = av_guess_format(NULL, aacFilePath, NULL);
// avFormatContext->oformat = fmt;
//直接根据输出文件的名字来自动探测格式
if ((ret = avformat_alloc_output_context2(&avFormatContext, nullptr, nullptr, aacFilePath)) !=
) {
LOGI("avFormatContext alloc failed : %s", av_err2str(ret));
return -;
}
if (ret = avio_open2(&avFormatContext->pb, aacFilePath, AVIO_FLAG_WRITE, nullptr, nullptr)) {
LOGI("Could not avio open fail %s", av_err2str(ret));
return -;
}
接下来我们创建一个音频流,并且获取音频流的编解码器上下文。
AVCodec *codec;
AVSampleFormat preferedSampleFMT = AV_SAMPLE_FMT_S16;
int preferedChannels = audioChannels;
int preferedSampleRate = audioSampleRate;
audioStream = avformat_new_stream(avFormatContext, nullptr);
audioStream->id = ;
avCodecContext = audioStream->codec;
配置编解码上下文的参数,主要需要配置的有
- 编解码器类型
- 采样率
- 比特率
- 解码器的id,从之前探测的oformat可以获取
- 采样格式
- 通道数
-
AAC的规格,大概一下集中
MPEG-2 AAC LC 低复杂度规格(Low Complexity)
MPEG-2 AAC Main 主规格
MPEG-2 AAC SSR 可变采样率规格(Scaleable Sample Rate)
MPEG-4 AAC LC 低复杂度规格(Low Complexity),现在的手机比较常见的 MP4 文件中的音频部份就包括了该规格音频文件
MPEG-4 AAC Main 主规格
MPEG-4 AAC SSR 可变采样率规格(Scaleable Sample Rate)
MPEG-4 AAC LTP 长时期预测规格(Long Term Predicition)
MPEG-4 AAC LD 低延迟规格(Low Delay)
MPEG-4 AAC HE 高效率规格(High Efficiency)
avCodecContext->codec_type = AVMEDIA_TYPE_AUDIO;
avCodecContext->sample_rate = audioSampleRate;
if (publishBitRate > ) {
avCodecContext->bit_rate = publishBitRate;
} else {
avCodecContext->bit_rate = PUBLISH_BITE_RATE;
}
avCodecContext->codec_id = avFormatContext->oformat->audio_codec;
avCodecContext->sample_fmt = preferedSampleFMT;
LOGI("audioChannels is %d", audioChannels);
avCodecContext->channel_layout =
preferedChannels == ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;
avCodecContext->channels = av_get_channel_layout_nb_channels(avCodecContext->channel_layout);
//配置编码aac规格
avCodecContext->profile = FF_PROFILE_AAC_LOW;
LOGI("avCodecContext->channels is %d", avCodecContext->channels);
if (avFormatContext->oformat->flags & AVFMT_GLOBALHEADER) {
avCodecContext->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
找到对应的编解码器,并获取编解码器支持的采样格式,采样率,并进行一定条件下的筛选。书上的代码是如果采样格式不支持就直接使用支持的格式的第一个,采样率选取的是最接近的一个。
//使用之前探测格式来找
codec = avcodec_find_encoder(avCodecContext->codec_id);
if (!codec) {
LOGI("Couldn't find a valid audio codec");
return -;
}
if (codec->sample_fmts) {
/* check if the prefered sample format for this codec is supported.
* this is because, depending on the version of libav, and with the whole ffmpeg/libav fork situation,
* you have various implementations around. float samples in particular are not always supported.
*/
const enum AVSampleFormat *p = codec->sample_fmts;
for (; *p != -; p++) {
if (*p == audioStream->codec->sample_fmt)
break;
}
if (*p == -) {
LOGI("sample format incompatible with codec. Defaulting to a format known to work.........");
avCodecContext->sample_fmt = codec->sample_fmts[];
}
}
//从支持的采样率找到最接近的
if (codec->supported_samplerates) {
const int *p = codec->supported_samplerates;
int best = ;
int best_dist = INT_MAX;
for (; *p; p++) {
int dist = abs(audioStream->codec->sample_rate - *p);
if (dist < best_dist) {
best_dist = dist;
best = *p;
}
}
/* best is the closest supported sample rate (same as selected if best_dist == ) */
接着如果如果我们输入的PCM规格不支持编解码器的支持的规格,那么就需要进行转码。初始化swrContext,最后就可以打开编解码器了。
//初始化转码
if (preferedChannels != avCodecContext->channels
|| preferedSampleRate != avCodecContext->sample_rate
|| preferedSampleFMT != avCodecContext->sample_fmt) {
LOGI("channels is {%d, %d}", preferedChannels, audioStream->codec->channels);
LOGI("sample_rate is {%d, %d}", preferedSampleRate, audioStream->codec->sample_rate);
LOGI("sample_fmt is {%d, %d}", preferedSampleFMT, audioStream->codec->sample_fmt);
LOGI("AV_SAMPLE_FMT_S16P is %d AV_SAMPLE_FMT_S16 is %d AV_SAMPLE_FMT_FLTP is %d",
AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP);
swrContext = swr_alloc_set_opts(NULL,
av_get_default_channel_layout(avCodecContext->channels),
(AVSampleFormat) avCodecContext->sample_fmt,
avCodecContext->sample_rate,
av_get_default_channel_layout(preferedChannels),
preferedSampleFMT, preferedSampleRate,
, NULL);
if (!swrContext || swr_init(swrContext)) {
if (swrContext)
swr_free(&swrContext);
return -;
}
}
if (avcodec_open2(avCodecContext, codec, NULL) < ) {
LOGI("Couldn't open codec");
return -;
}
接下来我们先写入流的头信息,接着分配帧的缓存。
if (avformat_write_header(avFormatContext, nullptr) != ) {
LOGI("Could not write header\n");
return -;
}
this->isWriteHeaderSuccess = true;
this->alloc_avframe();
分配帧缓存主要有输入帧,以及如果需要转码还需要分配转码帧,以及转码时候的数据空间。对于音频帧编码需要手动初始化的有
- 当前帧的的采样个数(每一个声道)
- 采样格式
- 通道数
- 采样率
- 帧的缓冲区,分配好后调用avcodec_fill_audio_frame进行填充
int AudioEncoder::alloc_avframe() {
int ret = ;
AVSampleFormat preferedSampleFMT = AV_SAMPLE_FMT_S16;
int preferedChannels = audioChannels;
int preferedSampleRate = audioSampleRate;
input_frame = av_frame_alloc();
if (!input_frame) {
LOGI("Could not allocate audio frame\n");
return -;
}
input_frame->nb_samples = avCodecContext->frame_size;
input_frame->format = preferedSampleFMT;
input_frame->channel_layout = preferedChannels == ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;
input_frame->sample_rate = preferedSampleRate;
buffer_size = av_samples_get_buffer_size(NULL, av_get_channel_layout_nb_channels(
input_frame->channel_layout),
input_frame->nb_samples, preferedSampleFMT, );
samples = static_cast<uint8_t *>(av_malloc(buffer_size));
samplesCursor = ;
if (!samples) {
LOGI("Could not allocate %d bytes for samples buffer\n", buffer_size);
return -;
}
LOGI("allocate %d bytes for samples buffer\n", buffer_size);
/* setup the data pointers in the AVFrame */
//绑定avframe的缓冲区
ret = avcodec_fill_audio_frame(input_frame,
av_get_channel_layout_nb_channels(input_frame->channel_layout),
preferedSampleFMT, samples, buffer_size, );
if (ret < ) {
LOGI("Could not setup audio frame\n");
}
if (swrContext) {
if (av_sample_fmt_is_planar(avCodecContext->sample_fmt)) {
LOGI("Codec Context SampleFormat is Planar...");
}
/*分配空间*/
/* 分配空间 */
convert_data = (uint8_t **) calloc(avCodecContext->channels,
sizeof(*convert_data));
av_samples_alloc(convert_data, nullptr, avCodecContext->channels,
avCodecContext->frame_size,
avCodecContext->sample_fmt,
);
swrBufferSize = av_samples_get_buffer_size(NULL, avCodecContext->channels,
avCodecContext->frame_size,
avCodecContext->sample_fmt, );
swrBuffer = (uint8_t *) av_malloc(swrBufferSize);
LOGI("After av_malloc swrBuffer");
swrFrame = av_frame_alloc();
if (!swrFrame) {
LOGI("Could not allocate swrFrame frame\n");
return -;
}
swrFrame->nb_samples = avCodecContext->frame_size;
swrFrame->format = avCodecContext->sample_fmt;
swrFrame->channel_layout =
avCodecContext->channels == ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;
swrFrame->sample_rate = avCodecContext->sample_rate;
ret = avcodec_fill_audio_frame(swrFrame, avCodecContext->channels,
avCodecContext->sample_fmt, (const uint8_t *) swrBuffer,
swrBufferSize, );
LOGI("After avcodec_fill_audio_frame");
if (ret < ) {
LOGI("avcodec_fill_audio_frame error ");
return -;
}
}
return ret;
}
接下来就是编码了,我们从PCM文件中读取数据之后进行编码,如果格式不支持还需要先转码。编码完成之后写入文件,主要的编码流程是初始化AVPacket,它保存的是编码后的数据,然后需要手动设置的参数有
- 流的index
- 数据源
-
数据的大小
然后我们调用avcodec_encode_audio2进行编码,然后调用
av_interleaved_write_frame写入文件。
void AudioEncoder::encodePacket() {
int ret, got_output;
AVPacket pkt;
av_init_packet(&pkt);
AVFrame *encode_frame;
if (swrContext) {
long long beginSWRTimeMills = getCurrentTime();
swr_convert(swrContext, convert_data, avCodecContext->frame_size,
(const uint8_t **) input_frame->data, avCodecContext->frame_size);
int length =
avCodecContext->frame_size * av_get_bytes_per_sample(avCodecContext->sample_fmt);
for (int k = ; k < ; ++k) {
for (int j = ; j < length; ++j) {
swrFrame->data[k][j] = convert_data[k][j];
}
}
totalSWRTimeMills += (getCurrentTime() - beginSWRTimeMills);
encode_frame = swrFrame;
} else {
encode_frame = input_frame;
}
encode_frame->pts = frameIndex++;
pkt.stream_index = audioStream->index;
// pkt.duration = (int) AV_NOPTS_VALUE;
// pkt.pts = pkt.dts = ;
pkt.data = samples;
pkt.size = buffer_size;
ret = avcodec_encode_audio2(avCodecContext, &pkt, encode_frame, &got_output);
if (ret < ) {
LOGI("Error encoding audio frame\n");
return;
}
if (got_output) {
if (avCodecContext->coded_frame && avCodecContext->coded_frame->pts != AV_NOPTS_VALUE) {
pkt.pts = av_rescale_q(avCodecContext->coded_frame->pts, avCodecContext->time_base,
audioStream->time_base);
}
//包含关键帧
pkt.flags |= AV_PKT_FLAG_KEY;
this->duration += (pkt.duration * av_q2d(audioStream->time_base));
//此函数负责交错地输出一个媒体包。如果调用者无法保证来自各个媒体流的包正确交错,则最好调用此函数输出媒体包,反之,可以调用av_write_frame以提高性能。
int writeCode = av_interleaved_write_frame(avFormatContext, &pkt);
}
av_free_packet(&pkt);
}
完成之后需要在destory()中调用写入文件尾的方法,av_write_trailer(avFormatContext) 至此使用ffmpeg进行软编码完成。
在运行编码过程中我发现几个问题,不知道是不是自己设置的问题,还望有哪位大佬不吝赐教一下。
1、设置AVPacket的pts只是循环递增,如果不设置encode_frame的pts,AVPacket的pts将会是一个不变的初始值。
2、avCodecContext的time_base,fram_size不需要手动设置,但是书上的源码是手动设置了,而audioStream的time_base初始化后一直是默认1/90000,需要手动设置为1/44100,这个只能自己设置还是能利用api自动设置?
使用MediaCodec编码AAC
首先由采样率,比特率,音频通道数等参数初始化MediaCodec,这些参数都是保留才map中,看key就可以基本明白了意义了,然后根据对应的类型获取MediaCodec,并配置格式以及编解码类型。
init {
val encodeFormat: MediaFormat = MediaFormat.createAudioFormat(MINE_TYPE, sampleRate, channels)
encodeFormat.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)
encodeFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC)
encodeFormat.setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, * )
mediaCodec = MediaCodec.createEncoderByType(MINE_TYPE)
mediaCodec?.run {
configure(encodeFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
start()
[email protected].inputBuffers = inputBuffers
[email protected].outputBuffers = outputBuffers
}.ifNull {
Log.e("problem", "create mediaEncode failed");
[email protected]
}
}
MediaCodec使用原理如图
![](https://img.laitimes.com/img/__Qf2AjLwojIjJCLyojI0JCLiAzNvwVZ2x2bzNXak9CX90TQNNkRrFlQKBTSvwFbslmZvwFMwQzLcVmepNHdu9mZvwFVywUNMZTY18CX052bm9CX9kEVNp3Z65EMnpmTxUkMMBjVtJWd0ckW65UbM5WOHJWa5kHT20ESjBjUIF2LcRHelR3LcJzLctmch1mclRXY39jM5ITM1UDNwADNyYDM4EDMy8CX0Vmbu4GZzNmLn9Gbi1yZtl2Lc9CX6MHc0RHaiojIsJye.jpg)
简单来说就是,输入和输出不是同一个队列,我们将PCM放入输入队列,编码完成之后我们再输出队列获取AAC数据,所以还是老套路,我们不断从文件里读取PCM数据送入MediaCodec进行编码。主要就是需要注意ByteBuffer的位置偏移问题,其他都比较简单。
fun fireAudio(data: ByteArray, len: Int) {
mediaCodec?.run {
val inputBufferIndex = dequeueInputBuffer(-)
if (inputBufferIndex > ) {
val inputBuffer = [email protected].inputBuffers!![inputBufferIndex]
inputBuffer.clear()
inputBuffer.put(data)
queueInputBuffer(inputBufferIndex, , len, System.nanoTime(), )
}
val bufferInfo = MediaCodec.BufferInfo()
var outputBufferIndex = dequeueOutputBuffer(bufferInfo, )
while (outputBufferIndex > ) {
val outputBuffer [email protected].outputBuffers!![outputBufferIndex]
outputAACDelegate?.run {
val outPacketSize = bufferInfo.size +
outputBuffer.position(bufferInfo.offset)
outputBuffer.limit(bufferInfo.offset + bufferInfo.size)
val outData = ByteArray(outPacketSize)
addADTStoPacket(outData, outPacketSize);//添加ADTS 代码后面会贴上
outputBuffer.get(outData,,bufferInfo.size)
//读取完成postion回归
outputBuffer.position(bufferInfo.offset)
//写文件
outputAACDelegate?.outputAACPacket(outData);
}
releaseOutputBuffer(outputBufferIndex,false)
outputBufferIndex=dequeueOutputBuffer(bufferInfo,)
}
}
}
这里还需要注意的是MediaCodec编码的是AAC的裸数据,我们需要给它添加ADTS头部,这样播放器就可以直接播放了。我们根据相关信息添加7个字节的头部信息,然后再最终写入文件。
private fun addADTStoPacket(packet: ByteArray, packetLen: Int) {
val profile = // AAC LC
val freqIdx = // 44.1KHz
val chanCfg = // CPE
packet[] = .toByte()
packet[] = .toByte()
packet[] = ((profile - shl ) + (freqIdx shl ) + (chanCfg shr )).toByte()
packet[] = ((chanCfg and shl ) + (packetLen shr )).toByte()
packet[] = (packetLen and shr ).toByte()
packet[] = ((packetLen and shl ) + ) .toByte()
packet[] = .toByte()
}
关于ADTS头部信息详情可以看这篇文章
https://blog.csdn.net/jay100500/article/details/52955232
硬编总体还是比较简单,速度也快,如果不考虑安卓兼容性的话,还是很有优势的。
至此第六章的音频部分终于完结了,接下来是视频的编码部分。
源码地址