天天看点

Android端PCM编码AAC的软编和硬编

第六章的内容有点多,现在学习一下音视频的软编和硬编。

使用libfdk_aac进行编码

书上的源码是采用ffmpeg的api进行编码的,当然你也可单独编译libfdk_aac来进行编码。首先我们还是需要配置采样率,声道,码率等参数进行初始化。

接着我们探测输出文件,让ffmpeg自动根据文件名探测格式。

int ret;
    av_register_all();
    avFormatContext = avformat_alloc_context();
    LOGI("aacFilePath is %s ", aacFilePath);
    //一种方法
    //先探测格式,然后设置到avFormatContext中
//    AVOutputFormat *fmt = av_guess_format(NULL, aacFilePath, NULL);
//    avFormatContext->oformat = fmt;

    //直接根据输出文件的名字来自动探测格式
    if ((ret = avformat_alloc_output_context2(&avFormatContext, nullptr, nullptr, aacFilePath)) !=
        ) {
        LOGI("avFormatContext   alloc   failed : %s", av_err2str(ret));
        return -;
    }
    if (ret = avio_open2(&avFormatContext->pb, aacFilePath, AVIO_FLAG_WRITE, nullptr, nullptr)) {
        LOGI("Could not avio open fail %s", av_err2str(ret));
        return -;
    }
           

接下来我们创建一个音频流,并且获取音频流的编解码器上下文。

AVCodec *codec;
    AVSampleFormat preferedSampleFMT = AV_SAMPLE_FMT_S16;
    int preferedChannels = audioChannels;
    int preferedSampleRate = audioSampleRate;
    audioStream = avformat_new_stream(avFormatContext, nullptr);
    audioStream->id = ;
    avCodecContext = audioStream->codec;
           

配置编解码上下文的参数,主要需要配置的有

  • 编解码器类型
  • 采样率
  • 比特率
  • 解码器的id,从之前探测的oformat可以获取
  • 采样格式
  • 通道数
  • AAC的规格,大概一下集中

    MPEG-2 AAC LC 低复杂度规格(Low Complexity)

    MPEG-2 AAC Main 主规格

    MPEG-2 AAC SSR 可变采样率规格(Scaleable Sample Rate)

    MPEG-4 AAC LC 低复杂度规格(Low Complexity),现在的手机比较常见的 MP4 文件中的音频部份就包括了该规格音频文件

    MPEG-4 AAC Main 主规格

    MPEG-4 AAC SSR 可变采样率规格(Scaleable Sample Rate)

    MPEG-4 AAC LTP 长时期预测规格(Long Term Predicition)

    MPEG-4 AAC LD 低延迟规格(Low Delay)

    MPEG-4 AAC HE 高效率规格(High Efficiency)

avCodecContext->codec_type = AVMEDIA_TYPE_AUDIO;
    avCodecContext->sample_rate = audioSampleRate;
    if (publishBitRate > ) {
        avCodecContext->bit_rate = publishBitRate;
    } else {
        avCodecContext->bit_rate = PUBLISH_BITE_RATE;

    }
    avCodecContext->codec_id = avFormatContext->oformat->audio_codec;
    avCodecContext->sample_fmt = preferedSampleFMT;
    LOGI("audioChannels is %d", audioChannels);
    avCodecContext->channel_layout =
            preferedChannels ==  ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;
    avCodecContext->channels = av_get_channel_layout_nb_channels(avCodecContext->channel_layout);
    //配置编码aac规格
    avCodecContext->profile = FF_PROFILE_AAC_LOW;
    LOGI("avCodecContext->channels is %d", avCodecContext->channels);

    if (avFormatContext->oformat->flags & AVFMT_GLOBALHEADER) {
        avCodecContext->flags |= CODEC_FLAG_GLOBAL_HEADER;
    }
           

找到对应的编解码器,并获取编解码器支持的采样格式,采样率,并进行一定条件下的筛选。书上的代码是如果采样格式不支持就直接使用支持的格式的第一个,采样率选取的是最接近的一个。

//使用之前探测格式来找
    codec = avcodec_find_encoder(avCodecContext->codec_id);
    if (!codec) {
        LOGI("Couldn't find a valid audio codec");
        return -;
    }

    if (codec->sample_fmts) {
        /* check if the prefered sample format for this codec is supported.
         * this is because, depending on the version of libav, and with the whole ffmpeg/libav fork situation,
         * you have various implementations around. float samples in particular are not always supported.
         */
        const enum AVSampleFormat *p = codec->sample_fmts;
        for (; *p != -; p++) {
            if (*p == audioStream->codec->sample_fmt)
                break;
        }
        if (*p == -) {
            LOGI("sample format incompatible with codec. Defaulting to a format known to work.........");
            avCodecContext->sample_fmt = codec->sample_fmts[];
        }

    }

    //从支持的采样率找到最接近的
    if (codec->supported_samplerates) {
        const int *p = codec->supported_samplerates;
        int best = ;
        int best_dist = INT_MAX;
        for (; *p; p++) {
            int dist = abs(audioStream->codec->sample_rate - *p);
            if (dist < best_dist) {
                best_dist = dist;
                best = *p;
            }
        }
        /* best is the closest supported sample rate (same as selected if best_dist == ) */
           

接着如果如果我们输入的PCM规格不支持编解码器的支持的规格,那么就需要进行转码。初始化swrContext,最后就可以打开编解码器了。

//初始化转码
    if (preferedChannels != avCodecContext->channels
        || preferedSampleRate != avCodecContext->sample_rate
        || preferedSampleFMT != avCodecContext->sample_fmt) {
        LOGI("channels is {%d, %d}", preferedChannels, audioStream->codec->channels);
        LOGI("sample_rate is {%d, %d}", preferedSampleRate, audioStream->codec->sample_rate);
        LOGI("sample_fmt is {%d, %d}", preferedSampleFMT, audioStream->codec->sample_fmt);
        LOGI("AV_SAMPLE_FMT_S16P is %d AV_SAMPLE_FMT_S16 is %d AV_SAMPLE_FMT_FLTP is %d",
             AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP);
        swrContext = swr_alloc_set_opts(NULL,
                                        av_get_default_channel_layout(avCodecContext->channels),
                                        (AVSampleFormat) avCodecContext->sample_fmt,
                                        avCodecContext->sample_rate,
                                        av_get_default_channel_layout(preferedChannels),
                                        preferedSampleFMT, preferedSampleRate,
                                        , NULL);
        if (!swrContext || swr_init(swrContext)) {
            if (swrContext)
                swr_free(&swrContext);
            return -;
        }
    }
    if (avcodec_open2(avCodecContext, codec, NULL) < ) {
        LOGI("Couldn't open codec");
        return -;
    }
           

接下来我们先写入流的头信息,接着分配帧的缓存。

if (avformat_write_header(avFormatContext, nullptr) != ) {
        LOGI("Could not write header\n");
        return -;
    }
    this->isWriteHeaderSuccess = true;
    this->alloc_avframe();
           

分配帧缓存主要有输入帧,以及如果需要转码还需要分配转码帧,以及转码时候的数据空间。对于音频帧编码需要手动初始化的有

  • 当前帧的的采样个数(每一个声道)
  • 采样格式
  • 通道数
  • 采样率
  • 帧的缓冲区,分配好后调用avcodec_fill_audio_frame进行填充
int AudioEncoder::alloc_avframe() {
    int ret = ;
    AVSampleFormat preferedSampleFMT = AV_SAMPLE_FMT_S16;
    int preferedChannels = audioChannels;
    int preferedSampleRate = audioSampleRate;
    input_frame = av_frame_alloc();
    if (!input_frame) {
        LOGI("Could not allocate audio frame\n");
        return -;
    }
    input_frame->nb_samples = avCodecContext->frame_size;
    input_frame->format = preferedSampleFMT;
    input_frame->channel_layout = preferedChannels ==  ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;
    input_frame->sample_rate = preferedSampleRate;

    buffer_size = av_samples_get_buffer_size(NULL, av_get_channel_layout_nb_channels(
            input_frame->channel_layout),
                                             input_frame->nb_samples, preferedSampleFMT, );
    samples = static_cast<uint8_t *>(av_malloc(buffer_size));
    samplesCursor = ;
    if (!samples) {
        LOGI("Could not allocate %d bytes for samples buffer\n", buffer_size);
        return -;
    }
    LOGI("allocate %d bytes for samples buffer\n", buffer_size);
    /* setup the data pointers in the AVFrame */
    //绑定avframe的缓冲区
    ret = avcodec_fill_audio_frame(input_frame,
                                   av_get_channel_layout_nb_channels(input_frame->channel_layout),
                                   preferedSampleFMT, samples, buffer_size, );
    if (ret < ) {
        LOGI("Could not setup audio frame\n");
    }

    if (swrContext) {
        if (av_sample_fmt_is_planar(avCodecContext->sample_fmt)) {
            LOGI("Codec Context SampleFormat is Planar...");
        }
        /*分配空间*/
        /* 分配空间 */
        convert_data = (uint8_t **) calloc(avCodecContext->channels,
                                           sizeof(*convert_data));
        av_samples_alloc(convert_data, nullptr, avCodecContext->channels,
                         avCodecContext->frame_size,
                         avCodecContext->sample_fmt, 
        );
        swrBufferSize = av_samples_get_buffer_size(NULL, avCodecContext->channels,
                                                   avCodecContext->frame_size,
                                                   avCodecContext->sample_fmt, );
        swrBuffer = (uint8_t *) av_malloc(swrBufferSize);
        LOGI("After av_malloc swrBuffer");
        swrFrame = av_frame_alloc();
        if (!swrFrame) {
            LOGI("Could not allocate swrFrame frame\n");
            return -;
        }
        swrFrame->nb_samples = avCodecContext->frame_size;
        swrFrame->format = avCodecContext->sample_fmt;
        swrFrame->channel_layout =
                avCodecContext->channels ==  ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;
        swrFrame->sample_rate = avCodecContext->sample_rate;
        ret = avcodec_fill_audio_frame(swrFrame, avCodecContext->channels,
                                       avCodecContext->sample_fmt, (const uint8_t *) swrBuffer,
                                       swrBufferSize, );
        LOGI("After avcodec_fill_audio_frame");
        if (ret < ) {
            LOGI("avcodec_fill_audio_frame error ");
            return -;
        }

    }

    return ret;
}
           

接下来就是编码了,我们从PCM文件中读取数据之后进行编码,如果格式不支持还需要先转码。编码完成之后写入文件,主要的编码流程是初始化AVPacket,它保存的是编码后的数据,然后需要手动设置的参数有

  • 流的index
  • 数据源
  • 数据的大小

    然后我们调用avcodec_encode_audio2进行编码,然后调用

    av_interleaved_write_frame写入文件。

void AudioEncoder::encodePacket() {

    int ret, got_output;
    AVPacket pkt;
    av_init_packet(&pkt);
    AVFrame *encode_frame;
    if (swrContext) {
        long long beginSWRTimeMills = getCurrentTime();
        swr_convert(swrContext, convert_data, avCodecContext->frame_size,
                    (const uint8_t **) input_frame->data, avCodecContext->frame_size);
        int length =
                avCodecContext->frame_size * av_get_bytes_per_sample(avCodecContext->sample_fmt);
        for (int k = ; k < ; ++k) {
            for (int j = ; j < length; ++j) {
                swrFrame->data[k][j] = convert_data[k][j];
            }
        }
        totalSWRTimeMills += (getCurrentTime() - beginSWRTimeMills);
        encode_frame = swrFrame;
    } else {
        encode_frame = input_frame;
    }
    encode_frame->pts = frameIndex++;
    pkt.stream_index = audioStream->index;
//    pkt.duration = (int) AV_NOPTS_VALUE;
//    pkt.pts = pkt.dts = ;
    pkt.data = samples;
    pkt.size = buffer_size;

    ret = avcodec_encode_audio2(avCodecContext, &pkt, encode_frame, &got_output);
    if (ret < ) {
        LOGI("Error encoding audio frame\n");
        return;
    }
    if (got_output) {
        if (avCodecContext->coded_frame && avCodecContext->coded_frame->pts != AV_NOPTS_VALUE) {
            pkt.pts = av_rescale_q(avCodecContext->coded_frame->pts, avCodecContext->time_base,
                                   audioStream->time_base);
        }
        //包含关键帧
        pkt.flags |= AV_PKT_FLAG_KEY;
        this->duration += (pkt.duration * av_q2d(audioStream->time_base));

        //此函数负责交错地输出一个媒体包。如果调用者无法保证来自各个媒体流的包正确交错,则最好调用此函数输出媒体包,反之,可以调用av_write_frame以提高性能。

        int writeCode = av_interleaved_write_frame(avFormatContext, &pkt);

    }

    av_free_packet(&pkt);


}
           

完成之后需要在destory()中调用写入文件尾的方法,av_write_trailer(avFormatContext) 至此使用ffmpeg进行软编码完成。

在运行编码过程中我发现几个问题,不知道是不是自己设置的问题,还望有哪位大佬不吝赐教一下。

1、设置AVPacket的pts只是循环递增,如果不设置encode_frame的pts,AVPacket的pts将会是一个不变的初始值。

2、avCodecContext的time_base,fram_size不需要手动设置,但是书上的源码是手动设置了,而audioStream的time_base初始化后一直是默认1/90000,需要手动设置为1/44100,这个只能自己设置还是能利用api自动设置?

使用MediaCodec编码AAC

首先由采样率,比特率,音频通道数等参数初始化MediaCodec,这些参数都是保留才map中,看key就可以基本明白了意义了,然后根据对应的类型获取MediaCodec,并配置格式以及编解码类型。

init {
        val encodeFormat: MediaFormat = MediaFormat.createAudioFormat(MINE_TYPE, sampleRate, channels)
        encodeFormat.setInteger(MediaFormat.KEY_BIT_RATE, bitRate)
        encodeFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC)
        encodeFormat.setInteger(MediaFormat.KEY_MAX_INPUT_SIZE,  * )
        mediaCodec = MediaCodec.createEncoderByType(MINE_TYPE)
        mediaCodec?.run {
            configure(encodeFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
            start()
            [email protected].inputBuffers = inputBuffers
            [email protected].outputBuffers = outputBuffers
        }.ifNull {
            Log.e("problem", "create mediaEncode failed");
            [email protected]
        }
    }
           

MediaCodec使用原理如图

Android端PCM编码AAC的软编和硬编

简单来说就是,输入和输出不是同一个队列,我们将PCM放入输入队列,编码完成之后我们再输出队列获取AAC数据,所以还是老套路,我们不断从文件里读取PCM数据送入MediaCodec进行编码。主要就是需要注意ByteBuffer的位置偏移问题,其他都比较简单。

fun fireAudio(data: ByteArray, len: Int) {
        mediaCodec?.run {
            val inputBufferIndex = dequeueInputBuffer(-)
            if (inputBufferIndex > ) {
                val inputBuffer = [email protected].inputBuffers!![inputBufferIndex]
                inputBuffer.clear()
                inputBuffer.put(data)
                queueInputBuffer(inputBufferIndex, , len, System.nanoTime(), )
            }
            val bufferInfo = MediaCodec.BufferInfo()
            var outputBufferIndex = dequeueOutputBuffer(bufferInfo, )

            while (outputBufferIndex > ) {
                val outputBuffer [email protected].outputBuffers!![outputBufferIndex]
                outputAACDelegate?.run {
                    val outPacketSize = bufferInfo.size + 
                    outputBuffer.position(bufferInfo.offset)
                    outputBuffer.limit(bufferInfo.offset + bufferInfo.size)
                    val outData = ByteArray(outPacketSize)
                    addADTStoPacket(outData, outPacketSize);//添加ADTS 代码后面会贴上
                    outputBuffer.get(outData,,bufferInfo.size)
                    //读取完成postion回归
                    outputBuffer.position(bufferInfo.offset)
                   //写文件
                   outputAACDelegate?.outputAACPacket(outData);
                }
                releaseOutputBuffer(outputBufferIndex,false)
                outputBufferIndex=dequeueOutputBuffer(bufferInfo,)
            }
        }


    }
           

这里还需要注意的是MediaCodec编码的是AAC的裸数据,我们需要给它添加ADTS头部,这样播放器就可以直接播放了。我们根据相关信息添加7个字节的头部信息,然后再最终写入文件。

private fun addADTStoPacket(packet: ByteArray, packetLen: Int) {
        val profile =  // AAC LC
        val freqIdx =  // 44.1KHz
        val chanCfg = // CPE
        packet[] = .toByte()
        packet[] = .toByte()
        packet[] = ((profile -  shl ) + (freqIdx shl ) + (chanCfg shr )).toByte()
        packet[] = ((chanCfg and  shl ) + (packetLen shr )).toByte()
        packet[] = (packetLen and  shr ).toByte()
        packet[] = ((packetLen and  shl ) + ) .toByte()
        packet[] = .toByte()
    }
           

关于ADTS头部信息详情可以看这篇文章

https://blog.csdn.net/jay100500/article/details/52955232

硬编总体还是比较简单,速度也快,如果不考虑安卓兼容性的话,还是很有优势的。

至此第六章的音频部分终于完结了,接下来是视频的编码部分。

源码地址