天天看点

RGBA 编码为 YUV420SP【NEON】

RGBA >> YUV420SP 

  • C/C++ 朴素实现版
void encodeYUV420SP_CPU(unsigned char *__restrict__ yuv420sp,
                        unsigned char *__restrict__ argb, int width, int height) {
    int frameSize = width * height;
    int yIndex = 0;
    int uvIndex = frameSize;
 
    for (int j = 0; j < height; j++) {
        for (int i = 0; i < width; i++) {
 
            uint8_t R = argb[2];
            uint8_t G = argb[1];
            uint8_t B = argb[0];
            argb += 4;
 
            // well known RGB to YUV algorithm
            uint8_t Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
            uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
            uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
 
            // NV21有一个 Y 平面和 V-U 交叉平面,每一个平面的采样值都是 2
            // 意思是每4个 Y 像素(上下左右,不是横向连续的四个)对应1个 V 和1个 U
            // 像素和其他扫描线。
            yuv420sp[yIndex++] = Y;
            if (j % 2 == 0 && i % 2 == 0) {
                yuv420sp[uvIndex++] = V;
                yuv420sp[uvIndex++] = U;
            }
        }
    }
}      
  • Neon 实现版(Github上找的)
void encodeYUV420SP_NEON_Intrinsics(unsigned char *__restrict__ yuv420sp,
                                    unsigned char *__restrict__ argb, int width, int height) {
    const uint16x8_t u16_rounding = vdupq_n_u16(128);
    const int16x8_t s16_rounding = vdupq_n_s16(128); // +128, u/v 中内层的 +128
    const int8x8_t s8_rounding = vdup_n_s8(
            128); // -128,即 0x80,最高成了符号位,实际只有 7 位用来表示数字,用来处理符号位, u/v 中外层的 +128
    const uint8x16_t offset = vdupq_n_u8(16);
    const uint16x8_t mask = vdupq_n_s16(255);
 
//    测试
//    int16x8_t test = vaddl_s8 (s8_rounding, s8_rounding);// -256
//    int8x8_t test_0 = vdup_n_s8(127); // 正常为 127
//    int8x8_t test_1 = vadd_s8(test_0, test_0); // -2,因为计算溢出到符号位
 
    int frameSize = width * height;
 
    int yIndex = 0;
    int uvIndex = frameSize;
 
    int i;
    int j;
    for (j = 0; j < height; j++) {
        for (i = 0; i < width >> 4; i++) {
            // Load rgb
            uint8x16x4_t pixel_argb = vld4q_u8(argb);
            argb += 4 * 16;
 
            uint8x8x2_t uint8_r;
            uint8x8x2_t uint8_g;
            uint8x8x2_t uint8_b;
            uint8_r.val[0] = vget_low_u8(pixel_argb.val[2]);
            uint8_r.val[1] = vget_high_u8(pixel_argb.val[2]);
            uint8_g.val[0] = vget_low_u8(pixel_argb.val[1]);
            uint8_g.val[1] = vget_high_u8(pixel_argb.val[1]);
            uint8_b.val[0] = vget_low_u8(pixel_argb.val[0]);
            uint8_b.val[1] = vget_high_u8(pixel_argb.val[0]);
 
            // Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
            uint16x8x2_t uint16_y;
 
            uint8x8_t scalar = vdup_n_u8(66);
            uint8x16_t y;
 
            uint16_y.val[0] = vmull_u8(uint8_r.val[0], scalar);
            uint16_y.val[1] = vmull_u8(uint8_r.val[1], scalar);
            scalar = vdup_n_u8(129);
            uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_g.val[0], scalar);
            uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_g.val[1], scalar);
            scalar = vdup_n_u8(25);
            uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_b.val[0], scalar);
            uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_b.val[1], scalar);
 
            uint16_y.val[0] = vaddq_u16(uint16_y.val[0], u16_rounding);
            uint16_y.val[1] = vaddq_u16(uint16_y.val[1], u16_rounding);
 
            y = vcombine_u8(vqshrn_n_u16(uint16_y.val[0], 8), vqshrn_n_u16(uint16_y.val[1], 8));
            y = vaddq_u8(y, offset);
 
            vst1q_u8(yuv420sp + yIndex, y);
            yIndex += 16;
 
            // 在偶数行中计算 U 和 V
            if (j % 2 == 0) {
 
//                uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
//                uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
 
                int16x8_t u_scalar = vdupq_n_s16(-38);
                int16x8_t v_scalar = vdupq_n_s16(112);
 
#if 1
                // 因为 u,v 的值只有 y 的一半,所以只取高位计算
                int16x8_t r = vreinterpretq_s16_u16(
                        vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[2]), mask));
 
//                 测试
//                uint16x8_t test_0 = vreinterpretq_u16_u8(pixel_argb.val[2]);
//                uint16x8_t test_1 = vandq_u16(test_0, mask);
//                int16x8_t  test_2 = vreinterpretq_s16_u16(test_1);
 
                int16x8_t g = vreinterpretq_s16_u16(
                        vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[1]), mask));
                int16x8_t b = vreinterpretq_s16_u16(
                        vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[0]), mask));
#else // 两者效果是一样的,但是没第一种快
                int16x8_t r = vreinterpretq_s16_u16(vmovl_u8(
                        vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[2]), 8), 8)));
                int16x8_t g = vreinterpretq_s16_u16(vmovl_u8(
                        vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[1]), 8), 8)));
                int16x8_t b = vreinterpretq_s16_u16(vmovl_u8(
                        vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[0]), 8), 8)));
 
                // vshlq_n_u16,结果是 uint16x8_t
                // vqshrn_n_u16,结果是 uint8x8_t
                // vmovl_u8,结果是 uint16x8_t
#endif
                int16x8_t u;
                int16x8_t v;
                uint8x8x2_t uv;
 
                u = vmulq_s16(r, u_scalar);
                v = vmulq_s16(r, v_scalar);
 
                u_scalar = vdupq_n_s16(-74);
                v_scalar = vdupq_n_s16(-94);
                u = vmlaq_s16(u, g, u_scalar);
                v = vmlaq_s16(v, g, v_scalar);
 
                u_scalar = vdupq_n_s16(112);
                v_scalar = vdupq_n_s16(-18);
                u = vmlaq_s16(u, b, u_scalar);
                v = vmlaq_s16(v, b, v_scalar);
 
                u = vaddq_s16(u, s16_rounding);
                v = vaddq_s16(v, s16_rounding);
 
                uv.val[1] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(u, 8), s8_rounding));
//                 测试
//                int8x8_t test_3 = vqshrn_n_s16(u, 8);
//                int8x8_t test_4 = vadd_s8(test_3, s8_rounding); //
//                uint8x8_t test_5 = vreinterpret_u8_s8(test_4);
 
                uv.val[0] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(v, 8), s8_rounding));
 
                vst2_u8(yuv420sp + uvIndex, uv);
 
                uvIndex += 2 * 8;
            }
        }
 
        // 处理余数的好办法
        for (i = ((width >> 4) << 4); i < width; i++) {
            uint8_t R = argb[2];
            uint8_t G = argb[1];
            uint8_t B = argb[0];
            argb += 4;
 
            // well known RGB to YUV algorithm
            uint8_t Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
            uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
            uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
 
            // NV21有一个 Y 平面和 V-U 交叉平面,每一个平面的采样值都是 2
            // 意思是每4个 Y 像素(上下左右,不是横向连续的四个)对应1个 V 和1个 U
            // 像素和其他扫描线。
            yuv420sp[yIndex++] = Y;
            if (j % 2 == 0 && i % 2 == 0) {
                yuv420sp[uvIndex++] = V;
                yuv420sp[uvIndex++] = U;
            }
        }
    }
}      

通过 objdump 生成 so 库的反汇编

 命令行如下:

>J:\Programs\Android\sdk\ndk-bundle\toolchains\aarch64-linux-android-4.9\prebuilt\windows-x86_64\bin\aarch64-linux-android-objdump.exe -d libnative-lib.so > objdump_d.txt      

一.   arm-linux-objdump

常用来显示二进制文件信息,常用来查看反汇编代码

二.   常用选项:

1.-b bfdname 指定目标码格式

2.—disassemble或者-d 反汇编可执行段

3.—dissassemble-all或者-D 反汇编所有段

4.-EB,-EL指定字节序

5.—file-headers或者-f 显示文件的整体头部摘要信息

6.—section-headers,--headers或者-h 显示目标文件中各个段的头部摘要信息

7.—info 或者-I 显示支持的目标文件格式和CPU架构

8.—section=name或者-j name显示指定section 的信息

9.—architecture=machine或者-m machine 指定反汇编目标文件时使用的架构

三.   示例

Arm-linux-objdump –D elf_file > dis_file 或者

Arm-linux-objdump –D –b binary –m arm bin_file > dis_file

  •  C/C++  实现版:
000410dc <_Z18encodeYUV420SP_CPUPhS_ii>:
   410dc:   b5b0        push    {r4, r5, r7, lr}
   410de:   af02        add r7, sp, #8
   410e0:   b08f        sub sp, #60 ; 0x3c
   410e2:   469c        mov ip, r3
   410e4:   4696        mov lr, r2
   410e6:   460c        mov r4, r1
   410e8:   4605        mov r5, r0
   410ea:   900e        str r0, [sp, #56]   ; 0x38
   410ec:   910d        str r1, [sp, #52]   ; 0x34
   410ee:   920c        str r2, [sp, #48]   ; 0x30
   410f0:   930b        str r3, [sp, #44]   ; 0x2c
   410f2:   980c        ldr r0, [sp, #48]   ; 0x30
   410f4:   990b        ldr r1, [sp, #44]   ; 0x2c
   410f6:   4348        muls    r0, r1
   410f8:   900a        str r0, [sp, #40]   ; 0x28
   410fa:   2000        movs    r0, #0
   410fc:   9009        str r0, [sp, #36]   ; 0x24
   410fe:   990a        ldr r1, [sp, #40]   ; 0x28
   41100:   9108        str r1, [sp, #32]
   41102:   9007        str r0, [sp, #28]
   41104:   f8cd c00c   str.w   ip, [sp, #12]
   41108:   f8cd e008   str.w   lr, [sp, #8]
   4110c:   9401        str r4, [sp, #4]
   4110e:   9500        str r5, [sp, #0]
   41110:   e7ff        b.n 41112 <_Z18encodeYUV420SP_CPUPhS_ii+0x36>
   41112:   9807        ldr r0, [sp, #28]
   41114:   990b        ldr r1, [sp, #44]   ; 0x2c
   41116:   4288        cmp r0, r1
   41118:   f280 808c   bge.w   41234 <_Z18encodeYUV420SP_CPUPhS_ii+0x158>
   4111c:   e7ff        b.n 4111e <_Z18encodeYUV420SP_CPUPhS_ii+0x42>
   4111e:   2000        movs    r0, #0
   41120:   9006        str r0, [sp, #24]
   41122:   e7ff        b.n 41124 <_Z18encodeYUV420SP_CPUPhS_ii+0x48>
   41124:   9806        ldr r0, [sp, #24]
   41126:   990c        ldr r1, [sp, #48]   ; 0x30
   41128:   4288        cmp r0, r1
   4112a:   da7e        bge.n   4122a <_Z18encodeYUV420SP_CPUPhS_ii+0x14e>
   4112c:   e7ff        b.n 4112e <_Z18encodeYUV420SP_CPUPhS_ii+0x52>
   4112e:   980d        ldr r0, [sp, #52]   ; 0x34
   41130:   7880        ldrb    r0, [r0, #2]
   41132:   f807 0c2d   strb.w  r0, [r7, #-45]
   41136:   980d        ldr r0, [sp, #52]   ; 0x34
   41138:   7840        ldrb    r0, [r0, #1]
   4113a:   f807 0c2e   strb.w  r0, [r7, #-46]
   4113e:   980d        ldr r0, [sp, #52]   ; 0x34
   41140:   7800        ldrb    r0, [r0, #0]
   41142:   f807 0c2f   strb.w  r0, [r7, #-47]
   41146:   980d        ldr r0, [sp, #52]   ; 0x34
   41148:   3004        adds    r0, #4
   4114a:   900d        str r0, [sp, #52]   ; 0x34
   4114c:   f817 0c2d   ldrb.w  r0, [r7, #-45]
   41150:   eb00 1040   add.w   r0, r0, r0, lsl #5
   41154:   f817 1c2e   ldrb.w  r1, [r7, #-46]
   41158:   eb01 11c1   add.w   r1, r1, r1, lsl #7
   4115c:   eb01 0040   add.w   r0, r1, r0, lsl #1
   41160:   f817 1c2f   ldrb.w  r1, [r7, #-47]
   41164:   2219        movs    r2, #25
   41166:   fb11 0002   smlabb  r0, r1, r2, r0
   4116a:   3080        adds    r0, #128    ; 0x80
   4116c:   2110        movs    r1, #16
   4116e:   eb01 2010   add.w   r0, r1, r0, lsr #8
   41172:   f88d 0014   strb.w  r0, [sp, #20]
   41176:   f817 0c2d   ldrb.w  r0, [r7, #-45]
   4117a:   f06f 0125   mvn.w   r1, #37 ; 0x25
   4117e:   fb10 f001   smulbb  r0, r0, r1
   41182:   f817 1c2e   ldrb.w  r1, [r7, #-46]
   41186:   224a        movs    r2, #74 ; 0x4a
   41188:   fb01 0012   mls r0, r1, r2, r0
   4118c:   f817 1c2f   ldrb.w  r1, [r7, #-47]
   41190:   ebc1 01c1   rsb r1, r1, r1, lsl #3
   41194:   eb00 1001   add.w   r0, r0, r1, lsl #4
   41198:   3080        adds    r0, #128    ; 0x80
   4119a:   2180        movs    r1, #128    ; 0x80
   4119c:   eb01 2010   add.w   r0, r1, r0, lsr #8
   411a0:   f807 0c31   strb.w  r0, [r7, #-49]
   411a4:   f817 0c2d   ldrb.w  r0, [r7, #-45]
   411a8:   ebc0 00c0   rsb r0, r0, r0, lsl #3
   411ac:   f817 2c2e   ldrb.w  r2, [r7, #-46]
   411b0:   235e        movs    r3, #94 ; 0x5e
   411b2:   fb12 f203   smulbb  r2, r2, r3
   411b6:   ebc2 1000   rsb r0, r2, r0, lsl #4
   411ba:   f817 2c2f   ldrb.w  r2, [r7, #-47]
   411be:   eb02 02c2   add.w   r2, r2, r2, lsl #3
   411c2:   eba0 0042   sub.w   r0, r0, r2, lsl #1
   411c6:   3080        adds    r0, #128    ; 0x80
   411c8:   eb01 2010   add.w   r0, r1, r0, lsr #8
   411cc:   f807 0c32   strb.w  r0, [r7, #-50]
   411d0:   f89d 0014   ldrb.w  r0, [sp, #20]
   411d4:   990e        ldr r1, [sp, #56]   ; 0x38
   411d6:   9a09        ldr r2, [sp, #36]   ; 0x24
   411d8:   1c53        adds    r3, r2, #1
   411da:   9309        str r3, [sp, #36]   ; 0x24
   411dc:   5488        strb    r0, [r1, r2]
   411de:   9807        ldr r0, [sp, #28]
   411e0:   eb00 71d0   add.w   r1, r0, r0, lsr #31
   411e4:   f021 0101   bic.w   r1, r1, #1
   411e8:   1a40        subs    r0, r0, r1
   411ea:   2800        cmp r0, #0
   411ec:   d118        bne.n   41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
   411ee:   e7ff        b.n 411f0 <_Z18encodeYUV420SP_CPUPhS_ii+0x114>
   411f0:   9806        ldr r0, [sp, #24]
   411f2:   eb00 71d0   add.w   r1, r0, r0, lsr #31
   411f6:   f021 0101   bic.w   r1, r1, #1
   411fa:   1a40        subs    r0, r0, r1
   411fc:   2800        cmp r0, #0
   411fe:   d10f        bne.n   41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
   41200:   e7ff        b.n 41202 <_Z18encodeYUV420SP_CPUPhS_ii+0x126>
   41202:   f817 0c32   ldrb.w  r0, [r7, #-50]
   41206:   990e        ldr r1, [sp, #56]   ; 0x38
   41208:   9a08        ldr r2, [sp, #32]
   4120a:   1c53        adds    r3, r2, #1
   4120c:   9308        str r3, [sp, #32]
   4120e:   5488        strb    r0, [r1, r2]
   41210:   f817 0c31   ldrb.w  r0, [r7, #-49]
   41214:   990e        ldr r1, [sp, #56]   ; 0x38
   41216:   9a08        ldr r2, [sp, #32]
   41218:   1c53        adds    r3, r2, #1
   4121a:   9308        str r3, [sp, #32]
   4121c:   5488        strb    r0, [r1, r2]
   4121e:   e7ff        b.n 41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
   41220:   e7ff        b.n 41222 <_Z18encodeYUV420SP_CPUPhS_ii+0x146>
   41222:   9806        ldr r0, [sp, #24]
   41224:   3001        adds    r0, #1
   41226:   9006        str r0, [sp, #24]
   41228:   e77c        b.n 41124 <_Z18encodeYUV420SP_CPUPhS_ii+0x48>
   4122a:   e7ff        b.n 4122c <_Z18encodeYUV420SP_CPUPhS_ii+0x150>
   4122c:   9807        ldr r0, [sp, #28]
   4122e:   3001        adds    r0, #1
   41230:   9007        str r0, [sp, #28]
   41232:   e76e        b.n 41112 <_Z18encodeYUV420SP_CPUPhS_ii+0x36>
   41234:   b00f        add sp, #60 ; 0x3c
   41236:   bdb0        pop {r4, r5, r7, pc}      
  • Neon 实现版:
00041238 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii>:
   41238:   b5f0        push    {r4, r5, r6, r7, lr}
   4123a:   af03        add r7, sp, #12
   4123c:   e92d 0b00   stmdb   sp!, {r8, r9, fp}
   41240:   f5ad 6d35   sub.w   sp, sp, #2896   ; 0xb50
   41244:   466c        mov r4, sp
   41246:   f36f 0403   bfc r4, #0, #4
   4124a:   46a5        mov sp, r4
   4124c:   f50d 6cd6   add.w   ip, sp, #1712   ; 0x6b0
   41250:   f10d 0e30   add.w   lr, sp, #48 ; 0x30
   41254:   461c        mov r4, r3
   41256:   4615        mov r5, r2
   41258:   460e        mov r6, r1
   4125a:   4680        mov r8, r0
   4125c:   f8df 9c0c   ldr.w   r9, [pc, #3084] ; 41e6c <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc34>
   41260:   44f9        add r9, pc
   41262:   f8d9 9000   ldr.w   r9, [r9]
   41266:   f8d9 9000   ldr.w   r9, [r9]
   4126a:   f8cd 9024   str.w   r9, [sp, #36]   ; 0x24
   4126e:   906f        str r0, [sp, #444]  ; 0x1bc
   41270:   916e        str r1, [sp, #440]  ; 0x1b8
   41272:   926d        str r2, [sp, #436]  ; 0x1b4
   41274:   936c        str r3, [sp, #432]  ; 0x1b0
   41276:   2080        movs    r0, #128    ; 0x80
   41278:   f8ad 01ee   strh.w  r0, [sp, #494]  ; 0x1ee
   4127c:   f50d 71f7   add.w   r1, sp, #494    ; 0x1ee
   41280:   f9e1 0c7f   vld1.16 {d16[]-d17[]}, [r1 :16]
   41284:   a970        add r1, sp, #448    ; 0x1c0
   41286:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   4128a:   f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   4128e:   a974        add r1, sp, #464    ; 0x1d0
   41290:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41294:   f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41298:   a968        add r1, sp, #416    ; 0x1a0
   4129a:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   4129e:   f8ad 0a4e   strh.w  r0, [sp, #2638] ; 0xa4e
   412a2:   f60d 214e   addw    r1, sp, #2638   ; 0xa4e
   412a6:   f9e1 0c7f   vld1.16 {d16[]-d17[]}, [r1 :16]
   412aa:   f50d 6122   add.w   r1, sp, #2592   ; 0xa20
   412ae:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   412b2:   f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   412b6:   f50d 6123   add.w   r1, sp, #2608   ; 0xa30
   412ba:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   412be:   f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   412c2:   a964        add r1, sp, #400    ; 0x190
   412c4:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   412c8:   f88d 0a1f   strb.w  r0, [sp, #2591] ; 0xa1f
   412cc:   f60d 201f   addw    r0, sp, #2591   ; 0xa1f
   412d0:   f9e0 2c0f   vld1.8  {d18[]}, [r0]
   412d4:   edcc 2bd6   vstr    d18, [ip, #856] ; 0x358
   412d8:   eddc 2bd6   vldr    d18, [ip, #856] ; 0x358
   412dc:   edcc 2bd8   vstr    d18, [ip, #864] ; 0x360
   412e0:   eddc 2bd8   vldr    d18, [ip, #864] ; 0x360
   412e4:   edce 2b56   vstr    d18, [lr, #344] ; 0x158
   412e8:   2010        movs    r0, #16
   412ea:   f88d 0a07   strb.w  r0, [sp, #2567] ; 0xa07
   412ee:   f60d 2007   addw    r0, sp, #2567   ; 0xa07
   412f2:   f9e0 0c2f   vld1.8  {d16[]-d17[]}, [r0]
   412f6:   f50d 601e   add.w   r0, sp, #2528   ; 0x9e0
   412fa:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   412fe:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41302:   f50d 601f   add.w   r0, sp, #2544   ; 0x9f0
   41306:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   4130a:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   4130e:   a85c        add r0, sp, #368    ; 0x170
   41310:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41314:   20ff        movs    r0, #255    ; 0xff
   41316:   f8ad 09de   strh.w  r0, [sp, #2526] ; 0x9de
   4131a:   f60d 10de   addw    r0, sp, #2526   ; 0x9de
   4131e:   f9e0 0c7f   vld1.16 {d16[]-d17[]}, [r0 :16]
   41322:   f50d 601b   add.w   r0, sp, #2480   ; 0x9b0
   41326:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   4132a:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   4132e:   f50d 601c   add.w   r0, sp, #2496   ; 0x9c0
   41332:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41336:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   4133a:   a858        add r0, sp, #352    ; 0x160
   4133c:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41340:   986d        ldr r0, [sp, #436]  ; 0x1b4
   41342:   996c        ldr r1, [sp, #432]  ; 0x1b0
   41344:   4348        muls    r0, r1
   41346:   9057        str r0, [sp, #348]  ; 0x15c
   41348:   2000        movs    r0, #0
   4134a:   9056        str r0, [sp, #344]  ; 0x158
   4134c:   9957        ldr r1, [sp, #348]  ; 0x15c
   4134e:   9155        str r1, [sp, #340]  ; 0x154
   41350:   9053        str r0, [sp, #332]  ; 0x14c
   41352:   f8cd c020   str.w   ip, [sp, #32]
   41356:   f8cd e01c   str.w   lr, [sp, #28]
   4135a:   9406        str r4, [sp, #24]
   4135c:   9505        str r5, [sp, #20]
   4135e:   9604        str r6, [sp, #16]
   41360:   f8cd 800c   str.w   r8, [sp, #12]
   41364:   e7ff        b.n 41366 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x12e>
   41366:   9853        ldr r0, [sp, #332]  ; 0x14c
   41368:   996c        ldr r1, [sp, #432]  ; 0x1b0
   4136a:   4288        cmp r0, r1
   4136c:   f280 856d   bge.w   41e4a <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc12>
   41370:   e7ff        b.n 41372 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x13a>
   41372:   2000        movs    r0, #0
   41374:   9054        str r0, [sp, #336]  ; 0x150
   41376:   e7ff        b.n 41378 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x140>
   41378:   9854        ldr r0, [sp, #336]  ; 0x150
   4137a:   996d        ldr r1, [sp, #436]  ; 0x1b4
   4137c:   ebb0 1f21   cmp.w   r0, r1, asr #4
   41380:   f280 84d5   bge.w   41d2e <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaf6>
   41384:   e7ff        b.n 41386 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x14e>
   41386:   986e        ldr r0, [sp, #440]  ; 0x1b8
   41388:   f960 010d   vld4.8  {d16,d18,d20,d22}, [r0]!
   4138c:   f960 110f   vld4.8  {d17,d19,d21,d23}, [r0]
   41390:   ef66 81f6   vorr    q12, q11, q11
   41394:   ef62 a1f2   vorr    q13, q9, q9
   41398:   ef64 c1f4   vorr    q14, q10, q10
   4139c:   ef60 e1f0   vorr    q15, q8, q8
   413a0:   f50d 602c   add.w   r0, sp, #2752   ; 0xac0
   413a4:   f940 eacf   vst1.64 {d30-d31}, [r0]
   413a8:   f100 0120   add.w   r1, r0, #32
   413ac:   f941 cacf   vst1.64 {d28-d29}, [r1]
   413b0:   4602        mov r2, r0
   413b2:   f962 caed   vld1.64 {d28-d29}, [r2 :128]!
   413b6:   f942 aacf   vst1.64 {d26-d27}, [r2]
   413ba:   3030        adds    r0, #48 ; 0x30
   413bc:   f940 8acf   vst1.64 {d24-d25}, [r0]
   413c0:   f50d 6330   add.w   r3, sp, #2816   ; 0xb00
   413c4:   f103 0c20   add.w   ip, r3, #32
   413c8:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   413cc:   f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   413d0:   4619        mov r1, r3
   413d2:   f941 caed   vst1.64 {d28-d29}, [r1 :128]!
   413d6:   f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   413da:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   413de:   f103 0230   add.w   r2, r3, #48 ; 0x30
   413e2:   f960 8aef   vld1.64 {d24-d25}, [r0 :128]
   413e6:   f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   413ea:   986e        ldr r0, [sp, #440]  ; 0x1b8
   413ec:   3040        adds    r0, #64 ; 0x40
   413ee:   906e        str r0, [sp, #440]  ; 0x1b8
   413f0:   f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   413f4:   f50d 601a   add.w   r0, sp, #2464   ; 0x9a0
   413f8:   f940 8aef   vst1.64 {d24-d25}, [r0 :128]
   413fc:   f960 8aef   vld1.64 {d24-d25}, [r0 :128]
   41400:   eeb0 0b68   vmov.f64    d0, d24
   41404:   9808        ldr r0, [sp, #32]
   41406:   ed80 0bba   vstr    d0, [r0, #744]  ; 0x2e8
   4140a:   ed90 0bba   vldr    d0, [r0, #744]  ; 0x2e8
   4140e:   f50d 622b   add.w   r2, sp, #2736   ; 0xab0
   41412:   f902 071d   vst1.8  {d0}, [r2 :64]!
   41416:   f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   4141a:   f50d 6c18   add.w   ip, sp, #2432   ; 0x980
   4141e:   f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41422:   f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   41426:   eeb0 0b69   vmov.f64    d0, d25
   4142a:   ed80 0bb2   vstr    d0, [r0, #712]  ; 0x2c8
   4142e:   ed90 0bb2   vldr    d0, [r0, #712]  ; 0x2c8
   41432:   ed82 0b00   vstr    d0, [r2]
   41436:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4143a:   f50d 6c16   add.w   ip, sp, #2400   ; 0x960
   4143e:   f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41442:   f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   41446:   eeb0 0b68   vmov.f64    d0, d24
   4144a:   ed80 0baa   vstr    d0, [r0, #680]  ; 0x2a8
   4144e:   ed90 0baa   vldr    d0, [r0, #680]  ; 0x2a8
   41452:   f50d 6c2a   add.w   ip, sp, #2720   ; 0xaa0
   41456:   f90c 071d   vst1.8  {d0}, [ip :64]!
   4145a:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4145e:   f50d 6114   add.w   r1, sp, #2368   ; 0x940
   41462:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41466:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4146a:   eeb0 0b69   vmov.f64    d0, d25
   4146e:   ed80 0ba2   vstr    d0, [r0, #648]  ; 0x288
   41472:   ed90 0ba2   vldr    d0, [r0, #648]  ; 0x288
   41476:   ed8c 0b00   vstr    d0, [ip]
   4147a:   f963 8aef   vld1.64 {d24-d25}, [r3 :128]
   4147e:   f50d 6112   add.w   r1, sp, #2336   ; 0x920
   41482:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41486:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4148a:   eeb0 0b68   vmov.f64    d0, d24
   4148e:   ed80 0b9a   vstr    d0, [r0, #616]  ; 0x268
   41492:   ed90 0b9a   vldr    d0, [r0, #616]  ; 0x268
   41496:   f50d 6129   add.w   r1, sp, #2704   ; 0xa90
   4149a:   f901 071d   vst1.8  {d0}, [r1 :64]!
   4149e:   f963 8aef   vld1.64 {d24-d25}, [r3 :128]
   414a2:   f50d 6310   add.w   r3, sp, #2304   ; 0x900
   414a6:   f943 8aef   vst1.64 {d24-d25}, [r3 :128]
   414aa:   f963 8aef   vld1.64 {d24-d25}, [r3 :128]
   414ae:   eeb0 0b69   vmov.f64    d0, d25
   414b2:   ed80 0b92   vstr    d0, [r0, #584]  ; 0x248
   414b6:   ed90 0b92   vldr    d0, [r0, #584]  ; 0x248
   414ba:   ed81 0b00   vstr    d0, [r1]
   414be:   2342        movs    r3, #66 ; 0x42
   414c0:   f88d 38f7   strb.w  r3, [sp, #2295] ; 0x8f7
   414c4:   f60d 03f7   addw    r3, sp, #2295   ; 0x8f7
   414c8:   f9a3 0c0f   vld1.8  {d0[]}, [r3]
   414cc:   ed80 0b8c   vstr    d0, [r0, #560]  ; 0x230
   414d0:   ed90 0b8c   vldr    d0, [r0, #560]  ; 0x230
   414d4:   ed80 0b8e   vstr    d0, [r0, #568]  ; 0x238
   414d8:   ed90 0b8e   vldr    d0, [r0, #568]  ; 0x238
   414dc:   9b07        ldr r3, [sp, #28]
   414de:   ed83 0b44   vstr    d0, [r3, #272]  ; 0x110
   414e2:   f50d 6400   add.w   r4, sp, #2048   ; 0x800
   414e6:   ed94 0bac   vldr    d0, [r4, #688]  ; 0x2b0
   414ea:   ed93 1b44   vldr    d1, [r3, #272]  ; 0x110
   414ee:   ed80 0b8a   vstr    d0, [r0, #552]  ; 0x228
   414f2:   ed80 1b88   vstr    d1, [r0, #544]  ; 0x220
   414f6:   ed90 0b8a   vldr    d0, [r0, #552]  ; 0x228
   414fa:   ed90 1b88   vldr    d1, [r0, #544]  ; 0x220
   414fe:   ffc0 8c01   vmull.u8    q12, d0, d1
   41502:   f50d 6e0c   add.w   lr, sp, #2240   ; 0x8c0
   41506:   f94e 8aef   vst1.64 {d24-d25}, [lr :128]
   4150a:   f96e 8aef   vld1.64 {d24-d25}, [lr :128]
   4150e:   f50d 6e27   add.w   lr, sp, #2672   ; 0xa70
   41512:   f94e 8aef   vst1.64 {d24-d25}, [lr :128]
   41516:   ed92 0b00   vldr    d0, [r2]
   4151a:   ed93 1b44   vldr    d1, [r3, #272]  ; 0x110
   4151e:   ed80 0b82   vstr    d0, [r0, #520]  ; 0x208
   41522:   ed80 1b80   vstr    d1, [r0, #512]  ; 0x200
   41526:   ed90 0b82   vldr    d0, [r0, #520]  ; 0x208
   4152a:   ed90 1b80   vldr    d1, [r0, #512]  ; 0x200
   4152e:   ffc0 8c01   vmull.u8    q12, d0, d1
   41532:   f50d 620a   add.w   r2, sp, #2208   ; 0x8a0
   41536:   f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   4153a:   f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   4153e:   2281        movs    r2, #129    ; 0x81
   41540:   f88d 289f   strb.w  r2, [sp, #2207] ; 0x89f
   41544:   f60d 029f   addw    r2, sp, #2207   ; 0x89f
   41548:   f9a2 0c0f   vld1.8  {d0[]}, [r2]
   4154c:   ed80 0b76   vstr    d0, [r0, #472]  ; 0x1d8
   41550:   ed90 0b76   vldr    d0, [r0, #472]  ; 0x1d8
   41554:   ed80 0b78   vstr    d0, [r0, #480]  ; 0x1e0
   41558:   ed90 0b78   vldr    d0, [r0, #480]  ; 0x1e0
   4155c:   ed83 0b44   vstr    d0, [r3, #272]  ; 0x110
   41560:   4672        mov r2, lr
   41562:   f962 aa6d   vld1.16 {d26-d27}, [r2 :128]!
   41566:   f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   4156a:   ed90 0bfc   vldr    d0, [r0, #1008] ; 0x3f0
   4156e:   ed93 1b44   vldr    d1, [r3, #272]  ; 0x110
   41572:   f50d 6405   add.w   r4, sp, #2128   ; 0x850
   41576:   f944 aaef   vst1.64 {d26-d27}, [r4 :128]
   4157a:   ed80 0b66   vstr    d0, [r0, #408]  ; 0x198
   4157e:   ed80 1b64   vstr    d1, [r0, #400]  ; 0x190
   41582:   f964 8aef   vld1.64 {d24-d25}, [r4 :128]
   41586:   ed90 0b66   vldr    d0, [r0, #408]  ; 0x198
   4158a:   ed90 1b64   vldr    d1, [r0, #400]  ; 0x190
   4158e:   ed80 0b74   vstr    d0, [r0, #464]  ; 0x1d0
   41592:   ed80 1b72   vstr    d1, [r0, #456]  ; 0x1c8
   41596:   ed90 0b74   vldr    d0, [r0, #464]  ; 0x1d0
   4159a:   ed90 1b72   vldr    d1, [r0, #456]  ; 0x1c8
   4159e:   ffc0 ac01   vmull.u8    q13, d0, d1
   415a2:   f50d 6406   add.w   r4, sp, #2144   ; 0x860
   415a6:   f944 aaef   vst1.64 {d26-d27}, [r4 :128]
   415aa:   f964 aaef   vld1.64 {d26-d27}, [r4 :128]
   415ae:   ef58 88ea   vadd.i16    q12, q12, q13
   415b2:   f50d 6403   add.w   r4, sp, #2096   ; 0x830
   415b6:   f944 8aef   vst1.64 {d24-d25}, [r4 :128]
   415ba:   f964 8aef   vld1.64 {d24-d25}, [r4 :128]
   415be:   f94e 8aef   vst1.64 {d24-d25}, [lr :128]
   415c2:   f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   415c6:   ed9c 0b00   vldr    d0, [ip]
   415ca:   ed93 1b44   vldr    d1, [r3, #272]  ; 0x110
   415ce:   f50d 6c00   add.w   ip, sp, #2048   ; 0x800
   415d2:   f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   415d6:   ed80 0b52   vstr    d0, [r0, #328]  ; 0x148
   415da:   ed80 1b50   vstr    d1, [r0, #320]  ; 0x140
   415de:   f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   415e2:   ed90 0b52   vldr    d0, [r0, #328]  ; 0x148
   415e6:   ed90 1b50   vldr    d1, [r0, #320]  ; 0x140
   415ea:   ed80 0b5e   vstr    d0, [r0, #376]  ; 0x178
   415ee:   ed80 1b5c   vstr    d1, [r0, #368]  ; 0x170
   415f2:   ed90 0b5e   vldr    d0, [r0, #376]  ; 0x178
   415f6:   ed90 1b5c   vldr    d1, [r0, #368]  ; 0x170
   415fa:   ffc0 ac01   vmull.u8    q13, d0, d1
   415fe:   f50d 6c01   add.w   ip, sp, #2064   ; 0x810
   41602:   f94c aaef   vst1.64 {d26-d27}, [ip :128]
   41606:   f96c aaef   vld1.64 {d26-d27}, [ip :128]
   4160a:   ef58 88ea   vadd.i16    q12, q12, q13
   4160e:   f50d 6cfc   add.w   ip, sp, #2016   ; 0x7e0
   41612:   f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41616:   f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   4161a:   f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   4161e:   f04f 0c19   mov.w   ip, #25
   41622:   f88d c7df   strb.w  ip, [sp, #2015] ; 0x7df
   41626:   f20d 7cdf   addw    ip, sp, #2015   ; 0x7df
   4162a:   f9ac 0c0f   vld1.8  {d0[]}, [ip]
   4162e:   ed80 0b46   vstr    d0, [r0, #280]  ; 0x118
   41632:   ed90 0b46   vldr    d0, [r0, #280]  ; 0x118
   41636:   ed80 0b48   vstr    d0, [r0, #288]  ; 0x120
   4163a:   ed90 0b48   vldr    d0, [r0, #288]  ; 0x120
   4163e:   ed83 0b44   vstr    d0, [r3, #272]  ; 0x110
   41642:   f96e 8aef   vld1.64 {d24-d25}, [lr :128]
   41646:   ed90 0bf8   vldr    d0, [r0, #992]  ; 0x3e0
   4164a:   ed93 1b44   vldr    d1, [r3, #272]  ; 0x110
   4164e:   f50d 6cf2   add.w   ip, sp, #1936   ; 0x790
   41652:   f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41656:   ed80 0b36   vstr    d0, [r0, #216]  ; 0xd8
   4165a:   ed80 1b34   vstr    d1, [r0, #208]  ; 0xd0
   4165e:   f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   41662:   ed90 0b36   vldr    d0, [r0, #216]  ; 0xd8
   41666:   ed90 1b34   vldr    d1, [r0, #208]  ; 0xd0
   4166a:   ed80 0b44   vstr    d0, [r0, #272]  ; 0x110
   4166e:   ed80 1b42   vstr    d1, [r0, #264]  ; 0x108
   41672:   ed90 0b44   vldr    d0, [r0, #272]  ; 0x110
   41676:   ed90 1b42   vldr    d1, [r0, #264]  ; 0x108
   4167a:   ffc0 ac01   vmull.u8    q13, d0, d1
   4167e:   f50d 6cf4   add.w   ip, sp, #1952   ; 0x7a0
   41682:   f94c aaef   vst1.64 {d26-d27}, [ip :128]
   41686:   f96c aaef   vld1.64 {d26-d27}, [ip :128]
   4168a:   ef58 88ea   vadd.i16    q12, q12, q13
   4168e:   f50d 6cee   add.w   ip, sp, #1904   ; 0x770
   41692:   f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41696:   f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   4169a:   f94e 8aef   vst1.64 {d24-d25}, [lr :128]
   4169e:   f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   416a2:   ed91 0b00   vldr    d0, [r1]
   416a6:   ed93 1b44   vldr    d1, [r3, #272]  ; 0x110
   416aa:   f50d 61e8   add.w   r1, sp, #1856   ; 0x740
   416ae:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   416b2:   ed80 0b22   vstr    d0, [r0, #136]  ; 0x88
   416b6:   ed80 1b20   vstr    d1, [r0, #128]  ; 0x80
   416ba:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   416be:   ed90 0b22   vldr    d0, [r0, #136]  ; 0x88
   416c2:   ed90 1b20   vldr    d1, [r0, #128]  ; 0x80
   416c6:   ed80 0b2e   vstr    d0, [r0, #184]  ; 0xb8
   416ca:   ed80 1b2c   vstr    d1, [r0, #176]  ; 0xb0
   416ce:   ed90 0b2e   vldr    d0, [r0, #184]  ; 0xb8
   416d2:   ed90 1b2c   vldr    d1, [r0, #176]  ; 0xb0
   416d6:   ffc0 ac01   vmull.u8    q13, d0, d1
   416da:   f50d 61ea   add.w   r1, sp, #1872   ; 0x750
   416de:   f941 aaef   vst1.64 {d26-d27}, [r1 :128]
   416e2:   f961 aaef   vld1.64 {d26-d27}, [r1 :128]
   416e6:   ef58 88ea   vadd.i16    q12, q12, q13
   416ea:   f50d 61e4   add.w   r1, sp, #1824   ; 0x720
   416ee:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   416f2:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   416f6:   f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   416fa:   f96e 8aef   vld1.64 {d24-d25}, [lr :128]
   416fe:   a968        add r1, sp, #416    ; 0x1a0
   41700:   f961 aaef   vld1.64 {d26-d27}, [r1 :128]
   41704:   f50d 6ce2   add.w   ip, sp, #1808   ; 0x710
   41708:   f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   4170c:   f50d 64e0   add.w   r4, sp, #1792   ; 0x700
   41710:   f944 aaef   vst1.64 {d26-d27}, [r4 :128]
   41714:   f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   41718:   f964 aaef   vld1.64 {d26-d27}, [r4 :128]
   4171c:   ef58 88ea   vadd.i16    q12, q12, q13
   41720:   f50d 6cde   add.w   ip, sp, #1776   ; 0x6f0
   41724:   f94c 8aef   vst1.64 {d24-d25}, [ip :128]
   41728:   f96c 8aef   vld1.64 {d24-d25}, [ip :128]
   4172c:   f94e 8aef   vst1.64 {d24-d25}, [lr :128]
   41730:   f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   41734:   f961 aaef   vld1.64 {d26-d27}, [r1 :128]
   41738:   f50d 61dc   add.w   r1, sp, #1760   ; 0x6e0
   4173c:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41740:   f50d 6cda   add.w   ip, sp, #1744   ; 0x6d0
   41744:   f94c aaef   vst1.64 {d26-d27}, [ip :128]
   41748:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4174c:   f96c aaef   vld1.64 {d26-d27}, [ip :128]
   41750:   ef58 88ea   vadd.i16    q12, q12, q13
   41754:   f50d 61d8   add.w   r1, sp, #1728   ; 0x6c0
   41758:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   4175c:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   41760:   f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   41764:   f96e 8aef   vld1.64 {d24-d25}, [lr :128]
   41768:   a948        add r1, sp, #288    ; 0x120
   4176a:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   4176e:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   41772:   ff88 0938   vqshrn.u16  d0, q12, #8
   41776:   ed83 0b3a   vstr    d0, [r3, #232]  ; 0xe8
   4177a:   ed93 0b3a   vldr    d0, [r3, #232]  ; 0xe8
   4177e:   ed83 0b38   vstr    d0, [r3, #224]  ; 0xe0
   41782:   ed93 0b38   vldr    d0, [r3, #224]  ; 0xe0
   41786:   f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   4178a:   a940        add r1, sp, #256    ; 0x100
   4178c:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41790:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   41794:   ff88 1938   vqshrn.u16  d1, q12, #8
   41798:   ed83 1b32   vstr    d1, [r3, #200]  ; 0xc8
   4179c:   ed93 1b32   vldr    d1, [r3, #200]  ; 0xc8
   417a0:   ed83 1b30   vstr    d1, [r3, #192]  ; 0xc0
   417a4:   ed93 1b30   vldr    d1, [r3, #192]  ; 0xc0
   417a8:   ed80 0b02   vstr    d0, [r0, #8]
   417ac:   ed80 1b00   vstr    d1, [r0]
   417b0:   ed90 0b02   vldr    d0, [r0, #8]
   417b4:   ed90 1b00   vldr    d1, [r0]
   417b8:   eef0 8b40   vmov.f64    d24, d0
   417bc:   eef0 9b41   vmov.f64    d25, d1
   417c0:   f50d 61d4   add.w   r1, sp, #1696   ; 0x6a0
   417c4:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   417c8:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   417cc:   a94c        add r1, sp, #304    ; 0x130
   417ce:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   417d2:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   417d6:   aa5c        add r2, sp, #368    ; 0x170
   417d8:   f962 aaef   vld1.64 {d26-d27}, [r2 :128]
   417dc:   f50d 62d2   add.w   r2, sp, #1680   ; 0x690
   417e0:   f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   417e4:   f50d 6cd0   add.w   ip, sp, #1664   ; 0x680
   417e8:   f94c aaef   vst1.64 {d26-d27}, [ip :128]
   417ec:   f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   417f0:   f96c aaef   vld1.64 {d26-d27}, [ip :128]
   417f4:   ef48 88ea   vadd.i8 q12, q12, q13
   417f8:   f50d 62ce   add.w   r2, sp, #1648   ; 0x670
   417fc:   f942 8aef   vst1.64 {d24-d25}, [r2 :128]
   41800:   f962 8aef   vld1.64 {d24-d25}, [r2 :128]
   41804:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41808:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4180c:   a938        add r1, sp, #224    ; 0xe0
   4180e:   f941 8aef   vst1.64 {d24-d25}, [r1 :128]
   41812:   9a6f        ldr r2, [sp, #444]  ; 0x1bc
   41814:   f8dd c158   ldr.w   ip, [sp, #344]  ; 0x158
   41818:   4462        add r2, ip
   4181a:   f961 8aef   vld1.64 {d24-d25}, [r1 :128]
   4181e:   f942 8a0f   vst1.8  {d24-d25}, [r2]
   41822:   9956        ldr r1, [sp, #344]  ; 0x158
   41824:   3110        adds    r1, #16
   41826:   9156        str r1, [sp, #344]  ; 0x158
   41828:   9953        ldr r1, [sp, #332]  ; 0x14c
   4182a:   eb01 72d1   add.w   r2, r1, r1, lsr #31
   4182e:   f022 0201   bic.w   r2, r2, #1
   41832:   1a89        subs    r1, r1, r2
   41834:   2900        cmp r1, #0
   41836:   f040 8274   bne.w   41d22 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaea>
   4183a:   e7ff        b.n 4183c <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x604>
   4183c:   f64f 70da   movw    r0, #65498  ; 0xffda
   41840:   f8ad 066e   strh.w  r0, [sp, #1646] ; 0x66e
   41844:   f20d 606e   addw    r0, sp, #1646   ; 0x66e
   41848:   f9e0 0c7f   vld1.16 {d16[]-d17[]}, [r0 :16]
   4184c:   f50d 60c8   add.w   r0, sp, #1600   ; 0x640
   41850:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41854:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41858:   f50d 60ca   add.w   r0, sp, #1616   ; 0x650
   4185c:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41860:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41864:   a834        add r0, sp, #208    ; 0xd0
   41866:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   4186a:   2170        movs    r1, #112    ; 0x70
   4186c:   f8ad 163e   strh.w  r1, [sp, #1598] ; 0x63e
   41870:   f20d 623e   addw    r2, sp, #1598   ; 0x63e
   41874:   f9e2 0c7f   vld1.16 {d16[]-d17[]}, [r2 :16]
   41878:   f50d 62c2   add.w   r2, sp, #1552   ; 0x610
   4187c:   f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   41880:   f962 0aef   vld1.64 {d16-d17}, [r2 :128]
   41884:   f50d 62c4   add.w   r2, sp, #1568   ; 0x620
   41888:   f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   4188c:   f962 0aef   vld1.64 {d16-d17}, [r2 :128]
   41890:   aa30        add r2, sp, #192    ; 0xc0
   41892:   f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   41896:   f50d 6330   add.w   r3, sp, #2816   ; 0xb00
   4189a:   f103 0c20   add.w   ip, r3, #32
   4189e:   f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   418a2:   f50d 6cc0   add.w   ip, sp, #1536   ; 0x600
   418a6:   f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   418aa:   f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   418ae:   f50d 6cbe   add.w   ip, sp, #1520   ; 0x5f0
   418b2:   f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   418b6:   f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   418ba:   f50d 7cb0   add.w   ip, sp, #352    ; 0x160
   418be:   f96c 2aef   vld1.64 {d18-d19}, [ip :128]
   418c2:   f50d 6ebc   add.w   lr, sp, #1504   ; 0x5e0
   418c6:   f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   418ca:   f50d 64ba   add.w   r4, sp, #1488   ; 0x5d0
   418ce:   f944 2aef   vst1.64 {d18-d19}, [r4 :128]
   418d2:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   418d6:   f964 2aef   vld1.64 {d18-d19}, [r4 :128]
   418da:   ef40 01f2   vand    q8, q8, q9
   418de:   f50d 6eb8   add.w   lr, sp, #1472   ; 0x5c0
   418e2:   f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   418e6:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   418ea:   f50d 6eb6   add.w   lr, sp, #1456   ; 0x5b0
   418ee:   f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   418f2:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   418f6:   f50d 6eb4   add.w   lr, sp, #1440   ; 0x5a0
   418fa:   f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   418fe:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41902:   f10d 0eb0   add.w   lr, sp, #176    ; 0xb0
   41906:   f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   4190a:   f103 0410   add.w   r4, r3, #16
   4190e:   f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41912:   f50d 64b2   add.w   r4, sp, #1424   ; 0x590
   41916:   f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   4191a:   f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   4191e:   f50d 64b0   add.w   r4, sp, #1408   ; 0x580
   41922:   f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41926:   f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   4192a:   f96c 2aef   vld1.64 {d18-d19}, [ip :128]
   4192e:   f50d 64ae   add.w   r4, sp, #1392   ; 0x570
   41932:   f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41936:   f50d 65ac   add.w   r5, sp, #1376   ; 0x560
   4193a:   f945 2aef   vst1.64 {d18-d19}, [r5 :128]
   4193e:   f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41942:   f965 2aef   vld1.64 {d18-d19}, [r5 :128]
   41946:   ef40 01f2   vand    q8, q8, q9
   4194a:   f50d 64aa   add.w   r4, sp, #1360   ; 0x550
   4194e:   f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41952:   f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41956:   f50d 64a8   add.w   r4, sp, #1344   ; 0x540
   4195a:   f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   4195e:   f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41962:   f50d 64a6   add.w   r4, sp, #1328   ; 0x530
   41966:   f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   4196a:   f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   4196e:   ac28        add r4, sp, #160    ; 0xa0
   41970:   f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41974:   f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   41978:   f50d 63a4   add.w   r3, sp, #1312   ; 0x520
   4197c:   f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   41980:   f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   41984:   f50d 63a2   add.w   r3, sp, #1296   ; 0x510
   41988:   f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   4198c:   f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   41990:   f96c 2aef   vld1.64 {d18-d19}, [ip :128]
   41994:   f50d 63a0   add.w   r3, sp, #1280   ; 0x500
   41998:   f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   4199c:   f50d 6c9e   add.w   ip, sp, #1264   ; 0x4f0
   419a0:   f94c 2aef   vst1.64 {d18-d19}, [ip :128]
   419a4:   f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   419a8:   f96c 2aef   vld1.64 {d18-d19}, [ip :128]
   419ac:   ef40 01f2   vand    q8, q8, q9
   419b0:   f50d 639c   add.w   r3, sp, #1248   ; 0x4e0
   419b4:   f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   419b8:   f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   419bc:   f50d 639a   add.w   r3, sp, #1232   ; 0x4d0
   419c0:   f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   419c4:   f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   419c8:   f50d 6398   add.w   r3, sp, #1216   ; 0x4c0
   419cc:   f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   419d0:   f963 0aef   vld1.64 {d16-d17}, [r3 :128]
   419d4:   ab24        add r3, sp, #144    ; 0x90
   419d6:   f943 0aef   vst1.64 {d16-d17}, [r3 :128]
   419da:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   419de:   f960 2aef   vld1.64 {d18-d19}, [r0 :128]
   419e2:   f50d 6c96   add.w   ip, sp, #1200   ; 0x4b0
   419e6:   f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   419ea:   f50d 6594   add.w   r5, sp, #1184   ; 0x4a0
   419ee:   f945 2aef   vst1.64 {d18-d19}, [r5 :128]
   419f2:   f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   419f6:   f965 2aef   vld1.64 {d18-d19}, [r5 :128]
   419fa:   ef50 09f2   vmul.i16    q8, q8, q9
   419fe:   f50d 6c92   add.w   ip, sp, #1168   ; 0x490
   41a02:   f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   41a06:   f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   41a0a:   f10d 0c80   add.w   ip, sp, #128    ; 0x80
   41a0e:   f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   41a12:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41a16:   f962 2aef   vld1.64 {d18-d19}, [r2 :128]
   41a1a:   f50d 6e90   add.w   lr, sp, #1152   ; 0x480
   41a1e:   f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41a22:   f50d 658e   add.w   r5, sp, #1136   ; 0x470
   41a26:   f945 2aef   vst1.64 {d18-d19}, [r5 :128]
   41a2a:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41a2e:   f965 2aef   vld1.64 {d18-d19}, [r5 :128]
   41a32:   ef50 09f2   vmul.i16    q8, q8, q9
   41a36:   f50d 6e8c   add.w   lr, sp, #1120   ; 0x460
   41a3a:   f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41a3e:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41a42:   f10d 0e70   add.w   lr, sp, #112    ; 0x70
   41a46:   f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41a4a:   f64f 75b6   movw    r5, #65462  ; 0xffb6
   41a4e:   f8ad 545e   strh.w  r5, [sp, #1118] ; 0x45e
   41a52:   f20d 455e   addw    r5, sp, #1118   ; 0x45e
   41a56:   f9e5 0c7f   vld1.16 {d16[]-d17[]}, [r5 :16]
   41a5a:   f50d 6586   add.w   r5, sp, #1072   ; 0x430
   41a5e:   f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41a62:   f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41a66:   f50d 6588   add.w   r5, sp, #1088   ; 0x440
   41a6a:   f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41a6e:   f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41a72:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41a76:   f64f 75a2   movw    r5, #65442  ; 0xffa2
   41a7a:   f8ad 542e   strh.w  r5, [sp, #1070] ; 0x42e
   41a7e:   f20d 452e   addw    r5, sp, #1070   ; 0x42e
   41a82:   f9e5 0c7f   vld1.16 {d16[]-d17[]}, [r5 :16]
   41a86:   f50d 6580   add.w   r5, sp, #1024   ; 0x400
   41a8a:   f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41a8e:   f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41a92:   f50d 6582   add.w   r5, sp, #1040   ; 0x410
   41a96:   f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41a9a:   f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41a9e:   f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   41aa2:   f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   41aa6:   f964 2aef   vld1.64 {d18-d19}, [r4 :128]
   41aaa:   f960 4aef   vld1.64 {d20-d21}, [r0 :128]
   41aae:   adfc        add r5, sp, #1008   ; 0x3f0
   41ab0:   f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41ab4:   aef8        add r6, sp, #992    ; 0x3e0
   41ab6:   f946 2aef   vst1.64 {d18-d19}, [r6 :128]
   41aba:   f50d 7874   add.w   r8, sp, #976    ; 0x3d0
   41abe:   f948 4aef   vst1.64 {d20-d21}, [r8 :128]
   41ac2:   f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41ac6:   f966 2aef   vld1.64 {d18-d19}, [r6 :128]
   41aca:   f968 4aef   vld1.64 {d20-d21}, [r8 :128]
   41ace:   ef52 09e4   vmla.i16    q8, q9, q10
   41ad2:   adf0        add r5, sp, #960    ; 0x3c0
   41ad4:   f945 0aef   vst1.64 {d16-d17}, [r5 :128]
   41ad8:   f965 0aef   vld1.64 {d16-d17}, [r5 :128]
   41adc:   f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   41ae0:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41ae4:   f964 2aef   vld1.64 {d18-d19}, [r4 :128]
   41ae8:   f962 4aef   vld1.64 {d20-d21}, [r2 :128]
   41aec:   acec        add r4, sp, #944    ; 0x3b0
   41aee:   f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41af2:   ade8        add r5, sp, #928    ; 0x3a0
   41af4:   f945 2aef   vst1.64 {d18-d19}, [r5 :128]
   41af8:   aee4        add r6, sp, #912    ; 0x390
   41afa:   f946 4aef   vst1.64 {d20-d21}, [r6 :128]
   41afe:   f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41b02:   f965 2aef   vld1.64 {d18-d19}, [r5 :128]
   41b06:   f966 4aef   vld1.64 {d20-d21}, [r6 :128]
   41b0a:   ef52 09e4   vmla.i16    q8, q9, q10
   41b0e:   ace0        add r4, sp, #896    ; 0x380
   41b10:   f944 0aef   vst1.64 {d16-d17}, [r4 :128]
   41b14:   f964 0aef   vld1.64 {d16-d17}, [r4 :128]
   41b18:   f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41b1c:   f8ad 137e   strh.w  r1, [sp, #894]  ; 0x37e
   41b20:   f20d 317e   addw    r1, sp, #894    ; 0x37e
   41b24:   f9e1 0c7f   vld1.16 {d16[]-d17[]}, [r1 :16]
   41b28:   a9d4        add r1, sp, #848    ; 0x350
   41b2a:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41b2e:   f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41b32:   a9d8        add r1, sp, #864    ; 0x360
   41b34:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41b38:   f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41b3c:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41b40:   f64f 71ee   movw    r1, #65518  ; 0xffee
   41b44:   f8ad 134e   strh.w  r1, [sp, #846]  ; 0x34e
   41b48:   f20d 314e   addw    r1, sp, #846    ; 0x34e
   41b4c:   f9e1 0c7f   vld1.16 {d16[]-d17[]}, [r1 :16]
   41b50:   a9c8        add r1, sp, #800    ; 0x320
   41b52:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41b56:   f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41b5a:   a9cc        add r1, sp, #816    ; 0x330
   41b5c:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41b60:   f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41b64:   f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   41b68:   f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   41b6c:   f963 2aef   vld1.64 {d18-d19}, [r3 :128]
   41b70:   f960 4aef   vld1.64 {d20-d21}, [r0 :128]
   41b74:   a8c4        add r0, sp, #784    ; 0x310
   41b76:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41b7a:   a9c0        add r1, sp, #768    ; 0x300
   41b7c:   f941 2aef   vst1.64 {d18-d19}, [r1 :128]
   41b80:   acbc        add r4, sp, #752    ; 0x2f0
   41b82:   f944 4aef   vst1.64 {d20-d21}, [r4 :128]
   41b86:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41b8a:   f961 2aef   vld1.64 {d18-d19}, [r1 :128]
   41b8e:   f964 4aef   vld1.64 {d20-d21}, [r4 :128]
   41b92:   ef52 09e4   vmla.i16    q8, q9, q10
   41b96:   a8b8        add r0, sp, #736    ; 0x2e0
   41b98:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41b9c:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41ba0:   f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   41ba4:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41ba8:   f963 2aef   vld1.64 {d18-d19}, [r3 :128]
   41bac:   f962 4aef   vld1.64 {d20-d21}, [r2 :128]
   41bb0:   a8b4        add r0, sp, #720    ; 0x2d0
   41bb2:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41bb6:   a9b0        add r1, sp, #704    ; 0x2c0
   41bb8:   f941 2aef   vst1.64 {d18-d19}, [r1 :128]
   41bbc:   aaac        add r2, sp, #688    ; 0x2b0
   41bbe:   f942 4aef   vst1.64 {d20-d21}, [r2 :128]
   41bc2:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41bc6:   f961 2aef   vld1.64 {d18-d19}, [r1 :128]
   41bca:   f962 4aef   vld1.64 {d20-d21}, [r2 :128]
   41bce:   ef52 09e4   vmla.i16    q8, q9, q10
   41bd2:   a8a8        add r0, sp, #672    ; 0x2a0
   41bd4:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41bd8:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41bdc:   f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41be0:   f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   41be4:   a864        add r0, sp, #400    ; 0x190
   41be6:   f960 2aef   vld1.64 {d18-d19}, [r0 :128]
   41bea:   a9a4        add r1, sp, #656    ; 0x290
   41bec:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41bf0:   aaa0        add r2, sp, #640    ; 0x280
   41bf2:   f942 2aef   vst1.64 {d18-d19}, [r2 :128]
   41bf6:   f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41bfa:   f962 2aef   vld1.64 {d18-d19}, [r2 :128]
   41bfe:   ef50 08e2   vadd.i16    q8, q8, q9
   41c02:   a99c        add r1, sp, #624    ; 0x270
   41c04:   f941 0aef   vst1.64 {d16-d17}, [r1 :128]
   41c08:   f961 0aef   vld1.64 {d16-d17}, [r1 :128]
   41c0c:   f94c 0aef   vst1.64 {d16-d17}, [ip :128]
   41c10:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41c14:   f960 2aef   vld1.64 {d18-d19}, [r0 :128]
   41c18:   a898        add r0, sp, #608    ; 0x260
   41c1a:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41c1e:   a994        add r1, sp, #592    ; 0x250
   41c20:   f941 2aef   vst1.64 {d18-d19}, [r1 :128]
   41c24:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41c28:   f961 2aef   vld1.64 {d18-d19}, [r1 :128]
   41c2c:   ef50 08e2   vadd.i16    q8, q8, q9
   41c30:   a890        add r0, sp, #576    ; 0x240
   41c32:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41c36:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41c3a:   f94e 0aef   vst1.64 {d16-d17}, [lr :128]
   41c3e:   f96c 0aef   vld1.64 {d16-d17}, [ip :128]
   41c42:   a818        add r0, sp, #96 ; 0x60
   41c44:   f940 0aef   vst1.64 {d16-d17}, [r0 :128]
   41c48:   f960 0aef   vld1.64 {d16-d17}, [r0 :128]
   41c4c:   efc8 6930   vqshrn.s16  d22, q8, #8
   41c50:   9807        ldr r0, [sp, #28]
   41c52:   edc0 6b0a   vstr    d22, [r0, #40]  ; 0x28
   41c56:   edd0 6b0a   vldr    d22, [r0, #40]  ; 0x28
   41c5a:   edc0 6b08   vstr    d22, [r0, #32]
   41c5e:   edd0 6b08   vldr    d22, [r0, #32]
   41c62:   edd0 7b56   vldr    d23, [r0, #344] ; 0x158
   41c66:   edc0 6b82   vstr    d22, [r0, #520] ; 0x208
   41c6a:   edc0 7b80   vstr    d23, [r0, #512] ; 0x200
   41c6e:   edd0 6b82   vldr    d22, [r0, #520] ; 0x208
   41c72:   edd0 7b80   vldr    d23, [r0, #512] ; 0x200
   41c76:   ef46 68a7   vadd.i8 d22, d22, d23
   41c7a:   edc0 6b7e   vstr    d22, [r0, #504] ; 0x1f8
   41c7e:   edd0 6b7e   vldr    d22, [r0, #504] ; 0x1f8
   41c82:   edc0 6b7c   vstr    d22, [r0, #496] ; 0x1f0
   41c86:   edd0 6b7c   vldr    d22, [r0, #496] ; 0x1f0
   41c8a:   edc0 6b7a   vstr    d22, [r0, #488] ; 0x1e8
   41c8e:   edd0 6b7a   vldr    d22, [r0, #488] ; 0x1e8
   41c92:   9908        ldr r1, [sp, #32]
   41c94:   edc1 6bee   vstr    d22, [r1, #952] ; 0x3b8
   41c98:   f96e 0aef   vld1.64 {d16-d17}, [lr :128]
   41c9c:   aa10        add r2, sp, #64 ; 0x40
   41c9e:   f942 0aef   vst1.64 {d16-d17}, [r2 :128]
   41ca2:   f962 0aef   vld1.64 {d16-d17}, [r2 :128]
   41ca6:   efc8 6930   vqshrn.s16  d22, q8, #8
   41caa:   edc0 6b02   vstr    d22, [r0, #8]
   41cae:   edd0 6b02   vldr    d22, [r0, #8]
   41cb2:   edc0 6b00   vstr    d22, [r0]
   41cb6:   edd0 6b00   vldr    d22, [r0]
   41cba:   edd0 7b56   vldr    d23, [r0, #344] ; 0x158
   41cbe:   edc0 6b78   vstr    d22, [r0, #480] ; 0x1e0
   41cc2:   edc0 7b76   vstr    d23, [r0, #472] ; 0x1d8
   41cc6:   edd0 6b78   vldr    d22, [r0, #480] ; 0x1e0
   41cca:   edd0 7b76   vldr    d23, [r0, #472] ; 0x1d8
   41cce:   ef46 68a7   vadd.i8 d22, d22, d23
   41cd2:   edc0 6b74   vstr    d22, [r0, #464] ; 0x1d0
   41cd6:   edd0 6b74   vldr    d22, [r0, #464] ; 0x1d0
   41cda:   edc0 6b72   vstr    d22, [r0, #456] ; 0x1c8
   41cde:   edd0 6b72   vldr    d22, [r0, #456] ; 0x1c8
   41ce2:   edc0 6b70   vstr    d22, [r0, #448] ; 0x1c0
   41ce6:   edd0 6b70   vldr    d22, [r0, #448] ; 0x1c0
   41cea:   edc1 6bec   vstr    d22, [r1, #944] ; 0x3b0
   41cee:   f50d 6226   add.w   r2, sp, #2656   ; 0xa60
   41cf2:   f962 0acf   vld1.64 {d16-d17}, [r2]
   41cf6:   f50d 6225   add.w   r2, sp, #2640   ; 0xa50
   41cfa:   f942 0acf   vst1.64 {d16-d17}, [r2]
   41cfe:   9b6f        ldr r3, [sp, #444]  ; 0x1bc
   41d00:   f8dd c154   ldr.w   ip, [sp, #340]  ; 0x154
   41d04:   4463        add r3, ip
   41d06:   f962 671d   vld1.8  {d22}, [r2 :64]!
   41d0a:   edd2 7b00   vldr    d23, [r2]
   41d0e:   eef0 0b66   vmov.f64    d16, d22
   41d12:   eef0 1b67   vmov.f64    d17, d23
   41d16:   f943 080f   vst2.8  {d16-d17}, [r3]
   41d1a:   9a55        ldr r2, [sp, #340]  ; 0x154
   41d1c:   3210        adds    r2, #16
   41d1e:   9255        str r2, [sp, #340]  ; 0x154
   41d20:   e7ff        b.n 41d22 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaea>
   41d22:   e7ff        b.n 41d24 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaec>
   41d24:   9854        ldr r0, [sp, #336]  ; 0x150
   41d26:   3001        adds    r0, #1
   41d28:   9054        str r0, [sp, #336]  ; 0x150
   41d2a:   f7ff bb25   b.w 41378 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x140>
   41d2e:   986d        ldr r0, [sp, #436]  ; 0x1b4
   41d30:   f020 000f   bic.w   r0, r0, #15
   41d34:   9054        str r0, [sp, #336]  ; 0x150
   41d36:   e7ff        b.n 41d38 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb00>
   41d38:   9854        ldr r0, [sp, #336]  ; 0x150
   41d3a:   996d        ldr r1, [sp, #436]  ; 0x1b4
   41d3c:   4288        cmp r0, r1
   41d3e:   da7e        bge.n   41e3e <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc06>
   41d40:   e7ff        b.n 41d42 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb0a>
   41d42:   986e        ldr r0, [sp, #440]  ; 0x1b8
   41d44:   7880        ldrb    r0, [r0, #2]
   41d46:   f88d 002f   strb.w  r0, [sp, #47]   ; 0x2f
   41d4a:   986e        ldr r0, [sp, #440]  ; 0x1b8
   41d4c:   7840        ldrb    r0, [r0, #1]
   41d4e:   f88d 002e   strb.w  r0, [sp, #46]   ; 0x2e
   41d52:   986e        ldr r0, [sp, #440]  ; 0x1b8
   41d54:   7800        ldrb    r0, [r0, #0]
   41d56:   f88d 002d   strb.w  r0, [sp, #45]   ; 0x2d
   41d5a:   986e        ldr r0, [sp, #440]  ; 0x1b8
   41d5c:   3004        adds    r0, #4
   41d5e:   906e        str r0, [sp, #440]  ; 0x1b8
   41d60:   f89d 002f   ldrb.w  r0, [sp, #47]   ; 0x2f
   41d64:   eb00 1040   add.w   r0, r0, r0, lsl #5
   41d68:   f89d 102e   ldrb.w  r1, [sp, #46]   ; 0x2e
   41d6c:   eb01 11c1   add.w   r1, r1, r1, lsl #7
   41d70:   eb01 0040   add.w   r0, r1, r0, lsl #1
   41d74:   f89d 102d   ldrb.w  r1, [sp, #45]   ; 0x2d
   41d78:   2219        movs    r2, #25
   41d7a:   fb11 0002   smlabb  r0, r1, r2, r0
   41d7e:   3080        adds    r0, #128    ; 0x80
   41d80:   2110        movs    r1, #16
   41d82:   eb01 2010   add.w   r0, r1, r0, lsr #8
   41d86:   f88d 002c   strb.w  r0, [sp, #44]   ; 0x2c
   41d8a:   f89d 002f   ldrb.w  r0, [sp, #47]   ; 0x2f
   41d8e:   f06f 0125   mvn.w   r1, #37 ; 0x25
   41d92:   fb10 f001   smulbb  r0, r0, r1
   41d96:   f89d 102e   ldrb.w  r1, [sp, #46]   ; 0x2e
   41d9a:   224a        movs    r2, #74 ; 0x4a
   41d9c:   fb01 0012   mls r0, r1, r2, r0
   41da0:   f89d 102d   ldrb.w  r1, [sp, #45]   ; 0x2d
   41da4:   ebc1 01c1   rsb r1, r1, r1, lsl #3
   41da8:   eb00 1001   add.w   r0, r0, r1, lsl #4
   41dac:   3080        adds    r0, #128    ; 0x80
   41dae:   2180        movs    r1, #128    ; 0x80
   41db0:   eb01 2010   add.w   r0, r1, r0, lsr #8
   41db4:   f88d 002b   strb.w  r0, [sp, #43]   ; 0x2b
   41db8:   f89d 002f   ldrb.w  r0, [sp, #47]   ; 0x2f
   41dbc:   ebc0 00c0   rsb r0, r0, r0, lsl #3
   41dc0:   f89d 202e   ldrb.w  r2, [sp, #46]   ; 0x2e
   41dc4:   235e        movs    r3, #94 ; 0x5e
   41dc6:   fb12 f203   smulbb  r2, r2, r3
   41dca:   ebc2 1000   rsb r0, r2, r0, lsl #4
   41dce:   f89d 202d   ldrb.w  r2, [sp, #45]   ; 0x2d
   41dd2:   eb02 02c2   add.w   r2, r2, r2, lsl #3
   41dd6:   eba0 0042   sub.w   r0, r0, r2, lsl #1
   41dda:   3080        adds    r0, #128    ; 0x80
   41ddc:   eb01 2010   add.w   r0, r1, r0, lsr #8
   41de0:   f88d 002a   strb.w  r0, [sp, #42]   ; 0x2a
   41de4:   f89d 002c   ldrb.w  r0, [sp, #44]   ; 0x2c
   41de8:   996f        ldr r1, [sp, #444]  ; 0x1bc
   41dea:   9a56        ldr r2, [sp, #344]  ; 0x158
   41dec:   1c53        adds    r3, r2, #1
   41dee:   9356        str r3, [sp, #344]  ; 0x158
   41df0:   5488        strb    r0, [r1, r2]
   41df2:   9853        ldr r0, [sp, #332]  ; 0x14c
   41df4:   eb00 71d0   add.w   r1, r0, r0, lsr #31
   41df8:   f021 0101   bic.w   r1, r1, #1
   41dfc:   1a40        subs    r0, r0, r1
   41dfe:   2800        cmp r0, #0
   41e00:   d118        bne.n   41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
   41e02:   e7ff        b.n 41e04 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbcc>
   41e04:   9854        ldr r0, [sp, #336]  ; 0x150
   41e06:   eb00 71d0   add.w   r1, r0, r0, lsr #31
   41e0a:   f021 0101   bic.w   r1, r1, #1
   41e0e:   1a40        subs    r0, r0, r1
   41e10:   2800        cmp r0, #0
   41e12:   d10f        bne.n   41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
   41e14:   e7ff        b.n 41e16 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbde>
   41e16:   f89d 002a   ldrb.w  r0, [sp, #42]   ; 0x2a
   41e1a:   996f        ldr r1, [sp, #444]  ; 0x1bc
   41e1c:   9a55        ldr r2, [sp, #340]  ; 0x154
   41e1e:   1c53        adds    r3, r2, #1
   41e20:   9355        str r3, [sp, #340]  ; 0x154
   41e22:   5488        strb    r0, [r1, r2]
   41e24:   f89d 002b   ldrb.w  r0, [sp, #43]   ; 0x2b
   41e28:   996f        ldr r1, [sp, #444]  ; 0x1bc
   41e2a:   9a55        ldr r2, [sp, #340]  ; 0x154
   41e2c:   1c53        adds    r3, r2, #1
   41e2e:   9355        str r3, [sp, #340]  ; 0x154
   41e30:   5488        strb    r0, [r1, r2]
   41e32:   e7ff        b.n 41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
   41e34:   e7ff        b.n 41e36 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfe>
   41e36:   9854        ldr r0, [sp, #336]  ; 0x150
   41e38:   3001        adds    r0, #1
   41e3a:   9054        str r0, [sp, #336]  ; 0x150
   41e3c:   e77c        b.n 41d38 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb00>
   41e3e:   e7ff        b.n 41e40 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc08>
   41e40:   9853        ldr r0, [sp, #332]  ; 0x14c
   41e42:   3001        adds    r0, #1
   41e44:   9053        str r0, [sp, #332]  ; 0x14c
   41e46:   f7ff ba8e   b.w 41366 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x12e>
   41e4a:   4809        ldr r0, [pc, #36]   ; (41e70 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc38>)
   41e4c:   4478        add r0, pc
   41e4e:   6800        ldr r0, [r0, #0]
   41e50:   6800        ldr r0, [r0, #0]
   41e52:   9909        ldr r1, [sp, #36]   ; 0x24
   41e54:   4288        cmp r0, r1
   41e56:   d106        bne.n   41e66 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc2e>
   41e58:   e7ff        b.n 41e5a <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc22>
   41e5a:   f1a7 0418   sub.w   r4, r7, #24
   41e5e:   46a5        mov sp, r4
   41e60:   e8bd 0b00   ldmia.w sp!, {r8, r9, fp}
   41e64:   bdf0        pop {r4, r5, r6, r7, pc}
   41e66:   f7f8 ec6e   blx 3a744 <__stack_chk_fail@plt>
   41e6a:   bf00        nop
   41e6c:   001ef0f8    .word   0x001ef0f8
   41e70:   001ee50c    .word   0x001ee50c      

Neon 版在我的手机上是 “负优化” 。。。跑得比朴素 CPU 还慢。。。看来网上给的 NEON 代码也未必靠谱,还是得亲自实践对比!

RGBA 编码为 YUV420SP【NEON】

经过我的优化后,NEON 版达到了 53ms 左右,展开(一次)版是51 ms左右,原图大小为 1600*1873 

RGBA 编码为 YUV420SP【NEON】

继续阅读