天天看点

AVFounction学习笔记之--AudioToolbox音频硬编码AVFounction学习笔记之–AudioToolbox音频硬编码

AVFounction学习笔记之–AudioToolbox音频硬编码

  • 音频编码相关知识概念

AAC是新一代音频有损压缩技术,它通过一些附加的编码技术(PS、SBR)衍生出LC-AAC、HE-AAC、HE-AAC v2三种主要的编码格式。“LC-AAC是比较传统的AAC,相对而言,其主要应用于中高码率场景的编码(≥80Kbit/s);HE-AAC(相当于AAC+SBR)主要应用于中低码率场景的编码(≤80Kbit/s);而新近推出的HE-AAC v2(相当于AAC+SBR+PS)主要应用于低码率场景的编码(≤48Kbit/s)。事实上大部分编码器都设置为≤48Kbit/s自动启用PS技术,而>48Kbit/s则不加PS,相当于普通的HE-AAC。

特点:在小于128Kbit/s的码率下表现优异,并且多用于视频中的音频编码。

pcm:脉冲编码调制 = 量化格式(samoleFormat) + 采样率(sampleRate) + 声道数(channel)

比特率 = 采样率 * 比特 * 声道数

音频采样率:指录音设备在一秒钟对声音信号的采样次数,采用频率越高声音的还原就越真实越自然,采用频率一般分为22.05KHz、44.1KHz、48KHz三个等级。

比特率:每秒传递的比特数(bit)。单位为bbs,比特率越高,传送数据速度越快。

ADTS(Audio Data Transport Stream),是AAC的一种十分常见的传输格式。

AAC解码器都需要把AAC的ES流打包成ADTS的格式,一般是在AAC ES流前添加7个字节的ADTS header.

ADTS头中包括采用率、声道数、帧长度等。一般情况下ADTS的头信息都是7个字节,分为两部分:

adts_fixed_header()、adts_variable_header()

adts_fixed_header() {
    syncword; // 同步头,总是0xFFF,代表ADTS帧的开始
    ID; // mpeg版本 0:MPEG-4 1:MPEG-2
    layer; // 00
    protection_absent;
    profile;
    samping_frequency_index; // 采样率下标
    private_bit;
    channel_configuration; // 声道数
    original_copy;
    home;
}

adts_variable_header() {
    copyright_identification_bit;
    copyright_identification_start;
    aac_frame_length; // 一个ADTS帧的长度包括ADTS和AAC原始流
    adts_buffer_fullness; // 0x7FF说明是码率可变的码流
    number_of_raw_data_blocks_in_frame;
}
           
  • 音频硬编码流程(AudioToolbox)
    AVFounction学习笔记之--AudioToolbox音频硬编码AVFounction学习笔记之–AudioToolbox音频硬编码
  • AudioToolbox示例代码
- (NSFileHandle *)audioFileHandle {
    if (!_audioFileHandle) {
        NSString * filePath = [NSHomeDirectory() stringByAppendingPathComponent:@"/Documents/demo.aac"];
        [[NSFileManager defaultManager] removeItemAtPath:filePath error:nil];
        BOOL createFile = [[NSFileManager defaultManager] createFileAtPath:filePath contents:nil attributes:nil];
        NSAssert(createFile, @"create audio path error");
        _audioFileHandle = [NSFileHandle fileHandleForWritingAtPath:filePath];
    }
    return _audioFileHandle;
}

- (void)dealloc {
    AudioConverterDispose(_audioConverter);
    free(_aacBuffer);
}

- (id)init {
    if (self = [super init]) {
        _encoderQueue = dispatch_queue_create("aac encode queue", DISPATCH_QUEUE_SERIAL);
        _audioConverter = NULL;
        _pcmBufferSize = 0;
        _pcmBuffer = NULL;
        _aacBufferSize = 1024;
        _aacBuffer = malloc(_aacBufferSize * sizeof(uint8_t));
        memset(_aacBuffer, 0, _aacBufferSize);
    }
    return self;
}


- (void)stopEncodeAudio {
    [self.audioFileHandle closeFile];
    self.audioFileHandle = NULL;
}

// 配置编码参数
- (void)setupEncoderFromSampleBuffer:(CMSampleBufferRef)sampleBuffer {
    
    NSLog(@"开始配置编码参数。。。。");
    
    // 获取原音频声音格式设置
    AudioStreamBasicDescription inAudioStreamBasicDescription = *CMAudioFormatDescriptionGetStreamBasicDescription((CMAudioFormatDescriptionRef)CMSampleBufferGetFormatDescription(sampleBuffer));
    AudioStreamBasicDescription outAudioStreamBasicDescription = {0};
    
    // 采样率
    outAudioStreamBasicDescription.mSampleRate = inAudioStreamBasicDescription.mSampleRate;
    // 格式  kAudioFormatMPEG4AAC  = 'aac' ,
    outAudioStreamBasicDescription.mFormatID = kAudioFormatMPEG4AAC;
    // 标签格式 无损编码
    outAudioStreamBasicDescription.mFormatFlags = kMPEG4Object_AAC_LC;
    // 每个Packet 的 Bytes 数量 0:动态大小格
    outAudioStreamBasicDescription.mBytesPerPacket = 0;
    // 每个Packet的帧数量,设置一个较大的固定值 1024
    outAudioStreamBasicDescription.mFramesPerPacket = 1024;
    // 每帧的Bytes数量
    outAudioStreamBasicDescription.mBytesPerFrame = 0;
    // 1 单声道 2: 立体声
    outAudioStreamBasicDescription.mChannelsPerFrame = 1;
    // 语言每采样点占用位数
    outAudioStreamBasicDescription.mBitsPerChannel = 0;
    // 保留参数(对齐当时)
    outAudioStreamBasicDescription.mReserved = 0;
    
    // 获取编码器
    AudioClassDescription * description = [self getAudioClassDescriptionWithType:kAudioFormatMPEG4AAC fromManufacturer:kAppleSoftwareAudioCodecManufacturer];
    
    // 创建编码器
    /*
     inAudioStreamBasicDescription 传入源音频格式
     outAudioStreamBasicDescription 目标音频格式
     第三个参数:传入音频编码器的个数
     description 传入音频编码器的描述
     */
    OSStatus status = AudioConverterNewSpecific(&inAudioStreamBasicDescription, &outAudioStreamBasicDescription, 1, description, &_audioConverter);
    if (status != 0) {
        NSLog(@"创建编码器失败");
    }
    
}


// 获取编码器
- (AudioClassDescription *)getAudioClassDescriptionWithType:(UInt32)type
                                           fromManufacturer:(UInt32)manufacturer
{
    NSLog(@"开始获取编码器。。。。");
    // 选择aac编码
    static AudioClassDescription desc;
    UInt32 encoderS = type;
    OSStatus status;
    UInt32 size;
    /*
     kAudioFormatProperty_Encoders 编码ID
     编码说明大小
     编码说明
     属性当前值的大小
     */
    status = AudioFormatGetPropertyInfo(kAudioFormatProperty_Encoders, sizeof(encoderS), &encoderS, &size);
    if (status) {
        NSLog(@"编码aac错误");
        return nil;
    }
    
    // 计算编码器的个数
    unsigned int count = size / sizeof(AudioClassDescription);
    
    // 定义编码器数组
    AudioClassDescription description[count];
    
    status = AudioFormatGetProperty(kAudioFormatProperty_Encoders, sizeof(encoderS), &encoderS, &size, description);
    
    for (unsigned int i = 0; i < count; i++) {
        if (type == description[i].mSubType && manufacturer == description[i].mManufacturer) {
            // 拷贝编码器到desc
            memcpy(&desc, &description[i], sizeof(desc));
            NSLog(@"找到aac编码器");
            return &desc;
        }
    }
    
    return nil;
}


// 回调函数
OSStatus inInputDataProc(AudioConverterRef inAudioConverter, UInt32 *ioNumberDataPackets, AudioBufferList *ioData, AudioStreamPacketDescription **outDataPacketDescription, void *inUserData)
{
    // 编码器
    AACEncoder *encoder = (__bridge AACEncoder *) inUserData;
    
    // 编码包的数据
    UInt32 requestPackes = *ioNumberDataPackets;
    // 将ioData填充到缓冲区
    size_t cp = [encoder copyPCMSamplesIntoBuffer:ioData];
    if (cp < requestPackes) {
        *ioNumberDataPackets = 0; // 清空
        return -1;
    }
    
    *ioNumberDataPackets = 1;
    return noErr;
}


// pcm -> 缓冲区
- (size_t)copyPCMSamplesIntoBuffer:(AudioBufferList*)ioData {
    // 获取pcm大小
    size_t os = _pcmBufferSize;
    if (!_pcmBufferSize) {
        return 0;
    }
    
    ioData->mBuffers[0].mData = _pcmBuffer;
    ioData->mBuffers[0].mDataByteSize = (int)_pcmBufferSize;
    // 清空
    _pcmBuffer = NULL;
    _pcmBufferSize = 0;
    return os;
}


// 编码数据
- (void)encodeAudioSampleBuffer:(CMSampleBufferRef)sampleBuffer {
    
    CFRetain(sampleBuffer);
    dispatch_sync(_encoderQueue, ^{
        if (!self.audioConverter) {
            // 配置编码参数
            [self setupEncoderFromSampleBuffer:sampleBuffer];
        }
        
        // 获取CMBlockBufferRef
        CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
        CFRetain(blockBuffer);
        
        // 获取_pcmBufferSize 和 _pcmBuffer
        OSStatus status = CMBlockBufferGetDataPointer(blockBuffer, 0, NULL, &self->_pcmBufferSize, &self->_pcmBuffer);
        if (status != kCMBlockBufferNoErr) {
            NSLog(@"获取 pcmBuffer 数据错误");
            return ;
        }
        
        // 清空
        memset(self->_aacBuffer, 0, self->_aacBufferSize);
        
        // 初始化缓冲列表
        AudioBufferList outAudioBufferList = {0}; // 结构体
        // 缓冲区个数
        outAudioBufferList.mNumberBuffers = 1;
        // 渠道个数
        outAudioBufferList.mBuffers[0].mNumberChannels = 1;
        // 缓存区大小
        outAudioBufferList.mBuffers[0].mDataByteSize = (int)self->_aacBufferSize;
        // 缓冲区内容
        outAudioBufferList.mBuffers[0].mData = self->_aacBuffer;
        
        // 编码
        AudioStreamPacketDescription * outPD = NULL;
        UInt32 inPutSize = 1;
        /*
         inInputDataProc 自己实现的编码数据的callback引用
         self 获取的数据
         inPutSize 输出数据的长度
         outAudioBUfferList 输出的数据
         outPD  输出数据的描述
         */
        status = AudioConverterFillComplexBuffer(self->_audioConverter,
                                                 inInputDataProc,
                                                 (__bridge void*)self,
                                                 &inPutSize,
                                                 &outAudioBufferList,
                                                 outPD
                                                 );
        
        // 编码后完成
        NSData * data = nil;
        if (status == noErr) {
            // 获取缓冲区的原始数据acc数据
            NSData * rawAAC = [NSData dataWithBytes:outAudioBufferList.mBuffers[0].mData length:outAudioBufferList.mBuffers[0].mDataByteSize];
            
            // 加头ADTS
            NSData * adtsHeader = [self adtsDataForPacketLength:rawAAC.length];
            NSMutableData * fullData = [NSMutableData dataWithData:adtsHeader];
            [fullData appendData:rawAAC];
            data = fullData;
        } else {
            NSLog(@"数据错误");
            return;
        }
        
        // 回调
        //        if (completionBlock) {
        //            dispatch_async(_callBackQueue, ^{
        //                completionBlock(data, nil);
        //            });
        //        }
        // 写入数据
        [self.audioFileHandle writeData:data];
        
        CFRelease(sampleBuffer);
        CFRelease(blockBuffer);
    });
}



/**
 *  Add ADTS header at the beginning of each and every AAC packet.
 *  This is needed as MediaCodec encoder generates a packet of raw
 *  AAC data.
 *
 *  Note the packetLen must count in the ADTS header itself.
 注意:packetLen 必须在ADTS头身计算
 *  See: http://wiki.multimedia.cx/index.php?title=ADTS
 *  Also: http://wiki.multimedia.cx/index.php?title=MPEG-4_Audio#Channel_Configurations
 **/
- (NSData*)adtsDataForPacketLength:(NSUInteger)packetLength {
    int adtsLength = 7;
    char *packet = malloc(sizeof(char) * adtsLength);
    
    int profile = 2;
    int freqIdx = 4;
    int chanCfg = 1;
    NSUInteger fullLength = adtsLength + packetLength;
    packet[0] = (char)0xFF;
    packet[1] = (char)0xF9;
    packet[2] = (char)(((profile-1)<<6) + (freqIdx<<2) +(chanCfg>>2));
    packet[3] = (char)(((chanCfg&3)<<6) + (fullLength>>11));
    packet[4] = (char)((fullLength&0x7FF) >> 3);
    packet[5] = (char)(((fullLength&7)<<5) + 0x1F);
    packet[6] = (char)0xFC;
    
    NSData *data = [NSData dataWithBytesNoCopy:packet length:adtsLength freeWhenDone:YES];
    return data;
}
@end