天天看点

常用的几种内存池技术

   几乎所有应用程序中都会有内存的分配和释放,而频繁的分配和释放内存无疑会产生内存碎片,降低系统性能,尤其对性能要求较高的程序比较明显。下面介绍几种常见的内存池技术。

    一  环形缓存

    环形缓存的基本原理如图:

    初始化状态(wpos_ = rpos_):

常用的几种内存池技术

    写了部分数据,同时读了一部分数据(wpos_ > rpos_):

常用的几种内存池技术

    wpos_写数据到尾部后,又从头开始,rpos_还读到尾部(wpos_

常用的几种内存池技术

    rpos_读了N(N>= 1)圈后,赶上了wpos_,也就是说没有数据可读了(wpos_ ):

常用的几种内存池技术

   综合起来,看起来像这样子:

常用的几种内存池技术

 需要注意的是:

    #1    wpos_

    #2    如果 | wpos_ -  rpos |  

    部分实现代码如下:

点击(此处)折叠或打开

#define EXTRA_BUFFER_SIZE        64

namespace easy

{

    templateclass _Type,class _Alloc >

    class EasyRingbuffer

    {

    public:

        typedef _Alloc allocator_type;

        explicit EasyRingbuffer(size_t size):

        size_(size),

            wpos_(0),

            rpos_(0)

        {

            buffer_ = _allocate(size_);

        }

        ~EasyRingbuffer() { _deallocate(buffer_,size_); }

        templatetypename T> void append(T val)

            append((easy_uint8*)&val,sizeof(val));

        void append(const easy_uint8* src, size_t cnt)

            if (!cnt)

            {

                return;

            }

            //    case 1: rpos_ = wpos_

            if (rpos_ = wpos_)

                if (size_ - wpos_ >= cnt)

                {

                    memmove(buffer_ + wpos_,src,cnt);

                    wpos_ += cnt;

                    return;

                }

                else

                    if (size_ - wpos_ + rpos_ > cnt)    // >= is ok>

                    {

                        memmove(buffer_ + wpos_, src, size_ - wpos_);

                        memmove(buffer_, src + size_ - wpos_, cnt - (size_ - wpos_));

                        wpos_ = cnt - (size_ - wpos_);

                        return;

                    }

                    else

                        _Type* new_buffer = _allocate(size_ + cnt - (size_ - wpos_));

                        memmove(new_buffer,buffer_,wpos_);

                        memmove(new_buffer + wpos_, src, cnt);

                        _deallocate(buffer_,size_);

                        size_ = size_ + cnt - (size_ - wpos_);

                        wpos_ += cnt;

                        buffer_ = new_buffer;

            //    case 2: rpos_ > wpos_

            else if(rpos_ > wpos_)

                if (rpos_ - wpos_ > cnt)    // >= is ok ?

                    if (rpos_ - wpos_ > cnt)

                        memmove(buffer_ + wpos_,src,cnt);

                        _Type* new_buffer = _allocate(size_ + cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE);

                        memmove(new_buffer + wpos_,src,cnt);

                        memmove(new_buffer + wpos_ + cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE,buffer_ + rpos_,size_ - rpos_);

                        rpos_ += cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE;

                        size_ = size_ + cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE;

        EasyRingbuffer& operator (easy_bool val)

            appendeasy_bool>(val);

            return *this;

        EasyRingbuffer& operator (easy_uint8 val)

            appendeasy_uint8>(val);

        EasyRingbuffer& operator (easy_uint16 val)

            appendeasy_uint16>(val);

        EasyRingbuffer& operator (easy_uint32 val)

            appendeasy_uint32>(val);

        EasyRingbuffer& operator (easy_uint64 val)

            appendeasy_uint64>(val);

        EasyRingbuffer& operator (easy_int8 val)

            appendeasy_int8>(val);

        EasyRingbuffer& operator (easy_int16 val)

            appendeasy_int16>(val);

        EasyRingbuffer& operator (easy_int32 val)

            appendeasy_int32>(val);

        EasyRingbuffer& operator (easy_int64 val)

            appendeasy_int64>(val);

        EasyRingbuffer& operator (easy_float val)

            appendeasy_float>(val);

        EasyRingbuffer& operator (easy_double val)

            appendeasy_double>(val);

        EasyRingbuffer& operator (const std::string& val)

            append((easy_uint8 const*)val.c_str(),val.length());

        EasyRingbuffer& operator (const char* val)

            append((easy_uint8 const *)val, val ? strlen(val) : 0);

        templatetypename T> T read()

            T r;

            read((easy_uint8*)&r,sizeof(T));

            return r;

        void read(easy_uint8* des,size_t len)

            if (_read_finish())

            if (rpos_ wpos_)

                if (wpos_ - rpos_ >= len)

                    memmove(des,buffer_ + rpos_,len);

                    rpos_ += len;

                //    else just skip

            else if (rpos_ > wpos_)

                if (size_ - rpos_ >= len)

                    memmove(des,buffer_ + rpos_, size_ - rpos_);

                    memmove(des + size_ - rpos_, buffer_, len - (size_ - rpos_));

                    rpos_ = len - (size_ - rpos_);

        EasyRingbuffer& operator >> (easy_bool& val)

            val = readeasy_bool>();

        EasyRingbuffer& operator >> (easy_uint8& val)

            val = readeasy_uint8>();

        EasyRingbuffer& operator >> (easy_uint16& val)

            val = readeasy_uint16>();

        EasyRingbuffer& operator >> (easy_uint32& val)

            val = readeasy_uint32>();

        EasyRingbuffer& operator >> (easy_uint64& val)

            val = readeasy_uint64>();

        EasyRingbuffer& operator >> (easy_int8& val)

            val = readeasy_int8>();

        EasyRingbuffer& operator >> (easy_int16& val)

            val = readeasy_int16>();

        EasyRingbuffer& operator >> (easy_int32& val)

            val = readeasy_int32>();

        EasyRingbuffer& operator >> (easy_int64& val)

            val = readeasy_int64>();

        EasyRingbuffer& operator >> (easy_float& val)

            val = readeasy_float>();

        EasyRingbuffer& operator >> (easy_double& val)

            val = readeasy_double>();

        size_t size() const { return size_; }

        size_t rpos() const { return rpos_; }

        size_t wpos() const { return wpos_; }

    private:

        _Type* _allocate(size_t size)

            _Type* res = 0;

            res = static_cast_Type*>(alloc_type_.allocate(size));

            return res;

        void _deallocate(void* p,size_t size)

            alloc_type_.deallocate(p,size);

        void _reallocate(void* p,size_t old_size,size_t new_size) { alloc_type_.reallocate(p,old_size,new_size); }

        easy_bool _read_finish() { return wpos_ == rpos_; }

        EasyRingbuffer ( const EasyRingbuffer& );

        EasyRingbuffer& operator = ( const EasyRingbuffer& );

        size_t            size_;

        _Type*            buffer_;

        size_t            wpos_;

        size_t            rpos_;

        allocator_type    alloc_type_;

    };

}

  二 空闲列表

    空闲列表的原理比较简单,一般用于比较大的对象,可预分配一定数量的对象,需要时直接空闲列表中取,使用完后收回,如果空闲列表中已空,则需要重新设置大小了;也可使用时分配,使用完后收回。实现代码如下:

// use stl

    templatetypename _Type, typename _Lock,typename _StorageType /*= std::list_Type*>*/>

    class lock_queue     

        typedef typename _Type::_Key                _Key;

        static const size_t MAX_POOL_SIZE = _Type::MAX_POOL_SIZE;

        _Type* allocate(_Key __key)

            _Type* __ret = 0;

            if (free_list_.empty())

                __ret = new _Type(__key);

            else

                lock_.acquire_lock();

                __ret = free_list_.back();

                free_list_.pop_back();

                lock_.release_lock();

            return __ret;

        void deallcate(_Type* __val)

            if (!__val)

            if (MAX_POOL_SIZE free_list_.size())

                delete __val;

            lock_.acquire_lock();

            free_list_.push_back(__val);

            lock_.release_lock();

        size_t free_size() /*const*/

            size_t __size = 0;

            __size = free_list_.size();

            return __size;

        void clear()

            for (typename _StorageType::iterator __it = free_list_.begin(); __it != free_list_.end(); ++__it)

                if ((*__it))

                    delete (*__it);

                    (*__it) = NULL;

            free_list_.clear();

            _StorageType().swap(free_list_);

        ~lock_queue()

            clear();

        _Lock                        lock_;

        _StorageType                free_list_;

//anther way,use use stl

template typename T, int DEFAULT_BLOCK_NUM = 1024 >

class CMemoryPool

public:

    static VOID* operator new ( std::size_t nAllocLength )

        Assert( sizeof(T) == nAllocLength );

        Assert( sizeof(T) >= sizeof(UCHAR*) );

        if ( !m_sNewPointer )

            allocBlock();

        UCHAR* ucReturnPointer = m_sNewPointer;

        //the head of 4 bytes is explain the next pointer of memory force,

        //and m_NewPointer just point the next block of memory,when used the next allocation

        m_sNewPointer = *reinterpret_castUCHAR**>( ucReturnPointer);

        return ucReturnPointer;

    }

    static VOID operator delete( void* vpDeletePointer )

        *reinterpret_castUCHAR**>( vpDeletePointer) = m_sNewPointer;    

        m_sNewPointer = static_castUCHAR*>(vpDeletePointer);

    static VOID allocBlock()

        m_sNewPointer = new UCHAR[sizeof(T) * DEFAULT_BLOCK_NUM];

        //casting dual pointer force,that will change the meaning of the head of 4 byte memory

        UCHAR **ppCurent = reinterpret_castUCHAR**>( m_sNewPointer );

        UCHAR *ppNext = m_sNewPointer;

        for( int i = 0; i DEFAULT_BLOCK_NUM-1; i++ )

            ppNext += sizeof(T);

            *ppCurent = ppNext;

            //the head of 4 bytes is explain the next pointer of memory force,a memory list in form.

            ppCurent = reinterpret_castUCHAR**>( ppNext );

        //if the last memory bock, the head of 4 byte is null

        *ppCurent = 0;

protected:

    virtual ~CMemoryPool()

private:

    static UCHAR *m_sNewPointer;

};

templateclass T, int BLOCK_NUM >

UCHAR *CMemoryPoolT, BLOCK_NUM >::m_sNewPointer;

    三  stl的二级分配器

    stl内部实现的分配器分两种情况:一种是大于128byte的分配,直接使用系统的内存分配函数malloc/free;另外一种为小于128byte的,也就是上面说的二级分配器,它采用了某些技术来管来内存,避免频繁分配释放。简单的说,就是将内存按8字节对齐,分别建立固定值倍数大小的内存池,如8, 8*2 ,8*3..., 当需要分配内存时,根据分配内存的大小,算出所需内存大小的内存池索引,然后根据这个索引找到那块内存池,并从中取出一块返回;同样,内存使用完后,按类似的方法回收。这种方案一般适用于比较小的内存分配的情况,大的可以考虑其他的方案。其流程如下:

常用的几种内存池技术

下面是具体代码:

template bool threads, int inst >

    class __default_alloc_template

        enum {_ALIGN = 8};

        enum {_MAX_BYTES = 128};

        enum {_NFREELISTS = 16}; // _MAX_BYTES/_ALIGN

        static size_t _S_round_up(size_t __bytes) { return (((__bytes) + (size_t) _ALIGN-1) & ~((size_t) _ALIGN - 1)); }

        static size_t _S_freelist_index(size_t __bytes) { return (((__bytes) + (size_t)_ALIGN-1)/(size_t)_ALIGN - 1); }

        union _Obj

            union _Obj* _M_free_list_link;

            char _M_client_data[1]; /* The client sees this. */

        };

        static _Obj* volatile _S_free_list[_NFREELISTS];

        // Returns an object of size __n, and optionally adds to size __n free list.

        static void* _S_refill(size_t __n);

        // Allocates a chunk for nobjs of size size. nobjs may be reduced

        // if it is inconvenient to allocate the requested number.

        static char* _S_chunk_alloc(size_t __size, int& __nobjs);

        static void* reallocate(void* __p, size_t __old_sz, size_t __new_sz);

        // Chunk allocation state.

        static char*    _S_start_free;

        static char*    _S_end_free;

        static size_t    _S_heap_size;

        static void* allocate(size_t __n)

            void* __ret = 0;

            if (__n > (size_t) _MAX_BYTES)

                __ret = malloc_alloc::allocate(__n);

                mutex_lock    __lock;

                __lock.acquire_lock();

                _Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__n);

                _Obj* volatile __result = *__my_free_list;

                if (__result == 0)

                    __ret = _S_refill(_S_round_up(__n));

                    *__my_free_list = __result -> _M_free_list_link;

                    __ret = __result;

                __lock.release_lock();

        /* __p may not be 0 */

        static void deallocate(void* __p, size_t __n)

                 malloc_alloc::deallocate(__p, __n);

                 _Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__n);

                 _Obj* __q = (_Obj*)__p;

                 __q -> _M_free_list_link = *__my_free_list;

                 *__my_free_list = __q;

                 __lock.release_lock();

    template bool __threads, int __inst>

    inline bool operator==(const __default_alloc_template__threads, __inst>&,

        const __default_alloc_template__threads, __inst>&)

        return true;

    inline bool operator!=(const __default_alloc_template__threads, __inst>&,

        return false;

    /* We allocate memory in large chunks in order to avoid fragmenting */

    /* the malloc heap too much. */

    /* We assume that size is properly aligned. */

    /* We hold the allocation lock. */

    char*    __default_alloc_template__threads, __inst>::_S_chunk_alloc(size_t __size, int& __nobjs)

        //::_set_new_handler(_out_of_memory);

        char* __result;

        size_t __total_bytes = __size * __nobjs;

        size_t __bytes_left = _S_end_free - _S_start_free;

        //    enough memory to alloc

        if (__bytes_left >= __total_bytes)

            __result = _S_start_free;

            _S_start_free += __total_bytes;

            return(__result);

        //    only more than __size can be alloc

        else if (__bytes_left >= __size)

            __nobjs = (int)(__bytes_left/__size);

            __total_bytes = __size * __nobjs;

        else

            size_t __bytes_to_get = 2 * __total_bytes + _S_round_up(_S_heap_size >> 4);

            // Try to make use of the left-over piece.

            if (__bytes_left > 0)

                _Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__bytes_left);

                ((_Obj*)_S_start_free) -> _M_free_list_link = *__my_free_list;

                *__my_free_list = (_Obj*)_S_start_free;

            //    alloc __bytes_to_get again

            _S_start_free = (char*)malloc(__bytes_to_get);

            //    alloc failed

            if (0 == _S_start_free)

                size_t __i;

                _Obj* volatile* __my_free_list;

                _Obj* __p;

                // Try to make do with what we have. That can't

                // hurt. We do not try smaller requests, since that tends

                // to result in disaster on multi-process machines.

                for (__i = __size; __i = (size_t) _MAX_BYTES; __i += (size_t) _ALIGN)

                    __my_free_list = _S_free_list + _S_freelist_index(__i);

                    __p = *__my_free_list;

                    if (0 != __p)

                        *__my_free_list = __p -> _M_free_list_link;

                        _S_start_free = (char*)__p;

                        _S_end_free = _S_start_free + __i;

                        return(_S_chunk_alloc(__size, __nobjs));

                        // Any leftover piece will eventually make it to the

                        // right free list.

                _S_end_free = 0;    // In case of exception.

                _S_start_free = (char*) malloc(__bytes_to_get);

                // This should either throw an

                // exception or remedy the situation. Thus we assume it

                // succeeded.

            _S_heap_size += __bytes_to_get;

            _S_end_free = _S_start_free + __bytes_to_get;

            return(_S_chunk_alloc(__size, __nobjs));

    /* Returns an object of size __n, and optionally adds to size __n free list.*/

    /* We assume that __n is properly aligned. */

    void* __default_alloc_template__threads, __inst>::_S_refill(size_t __n)

        int __nobjs = 20;

        char* __chunk = _S_chunk_alloc(__n, __nobjs);

        _Obj* volatile* __my_free_list;

        _Obj* __result;

        _Obj* __current_obj;

        _Obj* __next_obj;

        int __i;

        if (1 == __nobjs)

            return(__chunk);

        __my_free_list = _S_free_list + _S_freelist_index(__n);

        /* Build free list in chunk */

        __result = (_Obj*)__chunk;

        *__my_free_list = __next_obj = (_Obj*)(__chunk + __n);

        for (__i = 1; ; __i++)

            __current_obj = __next_obj;

            __next_obj = (_Obj*)((char*)__next_obj + __n);

            if (__nobjs - 1 == __i)

                __current_obj -> _M_free_list_link = 0;

                break;

                __current_obj -> _M_free_list_link = __next_obj;

        return(__result);

    template bool threads, int inst>

    void* __default_alloc_templatethreads, inst>::reallocate(void* __p, size_t __old_sz, size_t __new_sz)

        mutex_lock    __lock;

        __lock.acquire_lock();

        void* __result;

        size_t __copy_sz;

        if (__old_sz > (size_t) _MAX_BYTES && __new_sz > (size_t) _MAX_BYTES)

            __lock.release_lock();

            return(realloc(__p, __new_sz));

        if (_S_round_up(__old_sz) == _S_round_up(__new_sz))

            return(__p);

        __result = allocate(__new_sz);

        __copy_sz = __new_sz > __old_sz? __old_sz : __new_sz;

        memcpy(__result, __p, __copy_sz);

        deallocate(__p, __old_sz);

        __lock.release_lock();

    template bool threads, int inst >

    char* __default_alloc_templatethreads, inst>::_S_start_free = 0;

    char* __default_alloc_templatethreads, inst>::_S_end_free = 0;

    size_t __default_alloc_templatethreads, inst>::_S_heap_size = 0;

    typename __default_alloc_template__threads, __inst>::_Obj* volatile

        __default_alloc_template__threads, __inst> ::_S_free_list[_NFREELISTS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, };

 参考:

    sqi stl

继续阅读