天天看點

堆記憶體double free問題分析報告

【NE現場】

pid: 1044, tid: 1073, name: Binder_1  >>> com.android.systemui <<<
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x543dc400
    r0 543dc3f4  r1 20ed7351  r2 00011ff8  r3 aaaaaaab
    r4 b5008680  r5 b0600000  r6 000000d9  r7 56752275
    r8 b6f526b4  r9 b6f52fbc  sl 00000000  fp b5008760
    ip 6becebf1  sp a5eb52f8  lr 035f675f  pc b6f1f546  cpsr 60030030
backtrace:
    #00 pc 0003d546  /system/lib/libc.so (arena_run_dalloc+97)
    #01 pc 0003e5e9  /system/lib/libc.so (je_arena_dalloc_large+24)
    #02 pc 000463d9  /system/lib/libc.so (ifree+700)
    #03 pc 0000fa13  /system/lib/libc.so (free+10)
    #04 pc 00018f15  /system/lib/libandroidfw.so (android::StreamingZipInflater::~StreamingZipInflater()+44)
    #05 pc 0000dc3f  /system/lib/libandroidfw.so (android::_CompressedAsset::getBuffer(bool)+62)
    #06 pc 000177c3  /system/lib/libandroidfw.so (android::ResTable::add(android::Asset*, android::Asset*, int, bool)+22)
    #07 pc 00010a33  /system/lib/libandroidfw.so (android::AssetManager::appendPathToResTable(android::AssetManager::asset_path const&, unsigned int*) const+270)
    #08 pc 00010e5f  /system/lib/libandroidfw.so (android::AssetManager::getResTable(bool) const+102)
    #09 pc 00080935  /system/lib/libandroid_runtime.so
    #10 pc 0001a963  /data/dalvik-cache/arm/[email protected]@boot.oat      

【分析問題】

凡是調用棧裡有malloc和free的,基本上都是堆記憶體問題。對于這類問題我們必須有core檔案才能分析。

(gdb) bt
#0  0xb6f6f546 in arena_run_dalloc ([email protected]=0xb5127040, run=<optimized out>, [email protected]=true, [email protected]=false) at external/jemalloc/src/arena.c:1270
#1  0xb6f705c2 in je_arena_dalloc_large_locked ([email protected]=0xb5127040, [email protected]=0x8a900000, [email protected]=0x8a90a000) at external/jemalloc/src/arena.c:2063
#2  0xb6f705ec in je_arena_dalloc_large (arena=0xb5127040, [email protected]=0x8a900000, [email protected]=0x8a90a000) at external/jemalloc/src/arena.c:2071
#3  0xb6f783dc in je_arena_dalloc (try_tcache=true, ptr=0x8a90a000, chunk=0x8a900000) at external/jemalloc/include/jemalloc/internal/arena.h:1168
#4  je_idalloct (try_tcache=true, ptr=0x8a90a000) at external/jemalloc/include/jemalloc/internal/jemalloc_internal.h:774
#5  je_iqalloct (try_tcache=true, ptr=0x8a90a000) at external/jemalloc/include/jemalloc/internal/jemalloc_internal.h:793
#6  je_iqalloc (ptr=0x8a90a000) at external/jemalloc/include/jemalloc/internal/jemalloc_internal.h:800
#7  ifree (ptr=0x8a90a000) at external/jemalloc/src/jemalloc.c:1228
#8  0xb6f41a14 in free (mem=<optimized out>) at bionic/libc/bionic/malloc_debug_common.cpp:251
#9  0xb6c74f06 in android::StreamingZipInflater::~StreamingZipInflater (this=0x8eb24280, __in_chrg=<optimized out>) at frameworks/base/libs/androidfw/StreamingZipInflater.cpp:93
#10 0xb6c69c42 in android::_CompressedAsset::getBuffer (this=0x8eb32330) at frameworks/base/libs/androidfw/Asset.cpp:887
#11 0xb6c737c4 in android::ResTable::add (this=0x8e870c00, [email protected]=0x8eb32330, [email protected]=0x0, [email protected]=5, [email protected]=false)
    at frameworks/base/libs/androidfw/ResourceTypes.cpp:3371
#12 0xb6c6ca36 in android::AssetManager::appendPathToResTable ([email protected]=0x8e838390, ap=..., [email protected]=0x89de3444) at frameworks/base/libs/androidfw/AssetManager.cpp:678
#13 0xb6c6ce62 in android::AssetManager::getResTable (this=0x8e838390, required=<optimized out>) at frameworks/base/libs/androidfw/AssetManager.cpp:727
#14 0xb6c6cebc in android::AssetManager::getResources (this=<optimized out>, [email protected]=true) at frameworks/base/libs/androidfw/AssetManager.cpp:813
#15 0xb6ea5b20 in android::android_content_AssetManager_getStringBlockCount (env=<optimized out>, clazz=<optimized out>) at frameworks/base/core/jni/android_util_AssetManager.cpp:886       

從#9,#10層可以看到,是android::_CompressedAsset::getBuffer()中釋放StreamingZipInflater對象時挂掉的。對應代碼為:

const void* _CompressedAsset::getBuffer(bool)
{
    unsigned char* buf = NULL;
 
    if (mBuf != NULL)
        return mBuf;
 
    /*
     * Allocate a buffer and read the file into it.
     */
    buf = new unsigned char[mUncompressedLen];
    if (buf == NULL) {
        ALOGW("alloc %ld bytes failed\n", (long) mUncompressedLen);
        goto bail;
    }
 
    if (mMap != NULL) {
        if (!ZipUtils::inflateToBuffer(mMap->getDataPtr(), buf,
                mUncompressedLen, mCompressedLen))
            goto bail;
    } else {
        assert(mFd >= 0);
 
        /*
         * Seek to the start of the compressed data.
         */
        if (lseek(mFd, mStart, SEEK_SET) != mStart)
            goto bail;
 
        /*
         * Expand the data into it.
         */
        if (!ZipUtils::inflateToBuffer(mFd, buf, mUncompressedLen,
                mCompressedLen))
            goto bail;
    }
 
    /*
     * Success - now that we have the full asset in RAM we
     * no longer need the streaming inflater
     */
    delete mZipInflater;        // <<<<<<<這裡挂掉的
    mZipInflater = NULL;
    
    mBuf = buf;
    buf = NULL;
 
 bail:
    delete[] buf;
    return mBuf;
}      

 #0層對應的代碼如下:

@external/jemalloc/src/arena.c

static void
arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
{
        arena_chunk_t *chunk;
        size_t size, run_ind, run_pages, flag_dirty;
 
        chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
        run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
        assert(run_ind >= map_bias);
        assert(run_ind < chunk_npages);
        if (arena_mapbits_large_get(chunk, run_ind) != 0) {
                size = arena_mapbits_large_size_get(chunk, run_ind);
                assert(size == PAGE ||
                    arena_mapbits_large_size_get(chunk,
                    run_ind+(size>>LG_PAGE)-1) == 0);
        } else {
                size_t binind = arena_bin_index(arena, run->bin);
                arena_bin_info_t *bin_info = &arena_bin_info[binind]; 
                size = bin_info->run_size;     // <<<<<<<<<<<<<<<<<<<<#0層
        }      

NE的點是size = bin_info->run_size,

arena_mapbits_large_get(chunk, run_ind)的判斷表示目前記憶體快是否是large塊。這裡走了else,表示目前塊是small塊。

而調用棧裡#2中je_arena_dalloc_large表示這裡是large塊。其對應的判斷在#3層:

@external/jemalloc/include/jemalloc/internal/arena.h

JEMALLOC_ALWAYS_INLINE void
arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache)
{
        size_t pageind, mapbits;
        tcache_t *tcache;
 
        assert(ptr != NULL);
        assert(CHUNK_ADDR2BASE(ptr) != ptr);
 
        pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
        mapbits = arena_mapbits_get(chunk, pageind);
        assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
        if ((mapbits & CHUNK_MAP_LARGE) == 0) {
                /* Small allocation. */
                if (try_tcache && (tcache = tcache_get(false)) != NULL) {
                        size_t binind;
 
                        binind = arena_ptr_small_binind_get(ptr, mapbits);
                        tcache_dalloc_small(tcache, ptr, binind);
                } else
                        arena_dalloc_small(chunk->arena, chunk, ptr, pageind);
        } else {
                size_t size = arena_mapbits_large_size_get(chunk, pageind);
 
                assert(((uintptr_t)ptr & PAGE_MASK) == 0);
 
                if (try_tcache && size <= tcache_maxclass && (tcache =
                    tcache_get(false)) != NULL) {
                        tcache_dalloc_large(tcache, ptr, size);
                } else
                        arena_dalloc_large(chunk->arena, chunk, ptr);    // <<<<<<<<<<<<<<<<<<<<#3層
        }
}      

arena.h

中的

(mapbits & CHUNK_MAP_LARGE) 和

arena.c中的arena_mapbits_large_get(chunk, run_ind) 是同樣的判斷。

如下面代碼:

JEMALLOC_ALWAYS_INLINE size_t
arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
{
        size_t mapbits;
 
        mapbits = arena_mapbits_get(chunk, pageind);
        return (mapbits & CHUNK_MAP_LARGE);
}      

為什麼同樣的判斷在時刻得到的值不一樣呢?很可能是同一時刻有别的執行流在操作這個值!

查找其他線程的目前調用棧,發現确實存可疑線程:

(gdb) t 24
[Switching to thread 24 (LWP 4234)]
#0  syscall () at bionic/libc/arch-arm/bionic/syscall.S:44

(gdb) bt
#0  syscall () at bionic/libc/arch-arm/bionic/syscall.S:44
#1  0xb6f46016 in __futex (timeout=0x0, value=2, op=128, ftx=0xb5127048) at bionic/libc/private/bionic_futex.h:45
#2  __futex_wait_ex ([email protected]=0xb5127048, [email protected]=false, [email protected]=2, timeout=0x0) at bionic/libc/private/bionic_futex.h:66
#3  0xb6f463ac in _normal_lock (shared=0, mutex=0xb5127048) at bionic/libc/bionic/pthread_mutex.cpp:337
#4  pthread_mutex_lock ([email protected]=0xb5127048) at bionic/libc/bionic/pthread_mutex.cpp:457
#5  0xb6f6fb78 in je_malloc_mutex_lock (mutex=0xb5127048) at external/jemalloc/include/jemalloc/internal/mutex.h:77
#6  arena_bin_nonfull_run_get (bin=0xb5127300, arena=0xb5127040) at external/jemalloc/src/arena.c:1468
#7  arena_bin_malloc_hard ([email protected]=0xb5127040, [email protected]=0xb5127300) at external/jemalloc/src/arena.c:1515
#8  0xb6f6fdf6 in je_arena_tcache_fill_small (arena=0xb5127040, [email protected]=0x8eb0e098, [email protected]=5, [email protected]=0) at external/jemalloc/src/arena.c:1573
#9  0xb6f7d97a in je_tcache_alloc_small_hard ([email protected]=0x8eb0e000, [email protected]=0x8eb0e098, [email protected]=5) at external/jemalloc/src/tcache.c:72
#10 0xb6f79552 in je_tcache_alloc_small (zero=false, size=<optimized out>, tcache=0x8eb0e000) at external/jemalloc/include/jemalloc/internal/tcache.h:272
#11 je_arena_malloc (try_tcache=true, zero=false, size=<optimized out>, arena=0x0) at external/jemalloc/include/jemalloc/internal/arena.h:1074
#12 je_imalloct (arena=0x0, try_tcache=true, size=<optimized out>) at external/jemalloc/include/jemalloc/internal/jemalloc_internal.h:647
#13 je_imalloc (size=<optimized out>) at external/jemalloc/include/jemalloc/internal/jemalloc_internal.h:656
#14 imalloc_body (usize=<synthetic pointer>, size=<optimized out>) at external/jemalloc/src/jemalloc.c:920
#15 je_malloc (size=<optimized out>) at external/jemalloc/src/jemalloc.c:932
#16 0xb6f41a40 in malloc (bytes=<optimized out>) at bionic/libc/bionic/malloc_debug_common.cpp:259
#17 0xb6f0eb0c in operator new ([email protected]=44) at bionic/libc/bionic/new.cpp:26
#18 0xb6c73230 in android::ResTable::parsePackage ([email protected]=0x8eb85180, [email protected]=0x8a923b88, header=0x8eb58040) at frameworks/base/libs/androidfw/ResourceTypes.cpp:5806
#19 0xb6c73600 in android::ResTable::addInternal (th[email protected]=0x8eb85180, data=<optimized out>, [email protected]=0x8a91a000, dataSize=<optimized out>, [email protected]=0x0,
    [email protected]=0, [email protected]=5, [email protected]=false) at frameworks/base/libs/androidfw/ResourceTypes.cpp:3539
#20 0xb6c73824 in android::ResTable::add (this=0x8eb85180, [email protected]=0x8eb32330, [email protected]=0x0, [email protected]=5, [email protected]=false)
    at frameworks/base/libs/androidfw/ResourceTypes.cpp:3389
#21 0xb6c6ca36 in android::AssetManager::appendPathToResTable ([email protected]=0x8eb2c160, ap=..., [email protected]=0x8b8d4444) at frameworks/base/libs/androidfw/AssetManager.cpp:678
#22 0xb6c6ce62 in android::AssetManager::getResTable (this=0x8eb2c160, required=<optimized out>) at frameworks/base/libs/androidfw/AssetManager.cpp:727
#23 0xb6c6cebc in android::AssetManager::getResources (this=<optimized out>, [email protected]=true) at frameworks/base/libs/androidfw/AssetManager.cpp:813
#24 0xb6ea5b20 in android::android_content_AssetManager_getStringBlockCount (env=<optimized out>, clazz=<optimized out>) at frameworks/base/core/jni/android_util_AssetManager.cpp:886      

對比兩個線程的調用棧:

1073線程:

#11 0xb6c737c4 in android::ResTable::add (this=0x8e870c00, [email protected]=0x8eb32330, [email protected]=0x0, [email protected]=5, [email protected]=false)
    at frameworks/base/libs/androidfw/ResourceTypes.cpp:3371       

4234

線程:

#20 0xb6c73824 in android::ResTable::add (this=0x8eb85180, [email protected]=0x8eb32330 [email protected]=0x0, [email protected]=5, [email protected]=false)
    at frameworks/base/libs/androidfw/ResourceTypes.cpp:3389       

就是說兩個線程都在執行相同的代碼,隻是4234線程比1073線程快一點。

@frameworks/base/libs/androidfw/ResourceTypes.cpp

status_t ResTable::add(Asset* asset, Asset* idmapAsset, const int32_t cookie, bool copyData) {
    const void* data = asset->getBuffer(true);
    if (data == NULL) {
        ALOGW("Unable to get buffer of resource asset file");
        return UNKNOWN_ERROR;
    }
 
    size_t idmapSize = 0;
    const void* idmapData = NULL;
    if (idmapAsset != NULL) {
        idmapData = idmapAsset->getBuffer(true);
        if (idmapData == NULL) {
            ALOGW("Unable to get buffer of idmap asset file");
            return UNKNOWN_ERROR;
        }
        idmapSize = static_cast<size_t>(idmapAsset->getLength());
    }
 
    return addInternal(data, static_cast<size_t>(asset->getLength()),
            idmapData, idmapSize, cookie, copyData);
}      

asset->getBuffer(

true

)

前是沒有任何判斷的,

是以4234必定調用過 asset->getBuffer(),且兩個執行流操作的都是同一個asset對象(位址都是0x8eb32330)。

至此确定為重複釋放。

【解決方案】

  是以解決方案是:

_CompressedAsset::getBuffer()中做加鎖保護。      

轉載于:https://www.cnblogs.com/YYPapa/p/6838109.html