常用的几种内存池技术

几乎所有应用程序中都会有内存的分配和释放，而频繁的分配和释放内存无疑会产生内存碎片，降低系统性能，尤其对性能要求较高的程序比较明显。下面介绍几种常见的内存池技术。

一环形缓存

环形缓存的基本原理如图：

初始化状态(wpos_ = rpos_)：

写了部分数据，同时读了一部分数据（wpos_ > rpos_）：

wpos_写数据到尾部后，又从头开始，rpos_还读到尾部（wpos_

rpos_读了N（N>= 1）圈后，赶上了wpos_,也就是说没有数据可读了(wpos_ )：

综合起来，看起来像这样子：

需要注意的是：

#1 wpos_

#2 如果 | wpos_ - rpos |

部分实现代码如下：

点击(此处)折叠或打开

#define EXTRA_BUFFER_SIZE 64

namespace easy

{

templateclass _Type,class _Alloc >

class EasyRingbuffer

{

public:

typedef _Alloc allocator_type;

explicit EasyRingbuffer(size_t size):

size_(size),

wpos_(0),

rpos_(0)

{

buffer_ = _allocate(size_);

}

~EasyRingbuffer() { _deallocate(buffer_,size_); }

templatetypename T> void append(T val)

append((easy_uint8*)&val,sizeof(val));

void append(const easy_uint8* src, size_t cnt)

if (!cnt)

{

return;

}

// case 1: rpos_ = wpos_

if (rpos_ = wpos_)

if (size_ - wpos_ >= cnt)

{

memmove(buffer_ + wpos_,src,cnt);

wpos_ += cnt;

return;

}

else

if (size_ - wpos_ + rpos_ > cnt) // >= is ok>

{

memmove(buffer_ + wpos_, src, size_ - wpos_);

memmove(buffer_, src + size_ - wpos_, cnt - (size_ - wpos_));

wpos_ = cnt - (size_ - wpos_);

return;

}

else

_Type* new_buffer = _allocate(size_ + cnt - (size_ - wpos_));

memmove(new_buffer,buffer_,wpos_);

memmove(new_buffer + wpos_, src, cnt);

_deallocate(buffer_,size_);

size_ = size_ + cnt - (size_ - wpos_);

wpos_ += cnt;

buffer_ = new_buffer;

// case 2: rpos_ > wpos_

else if(rpos_ > wpos_)

if (rpos_ - wpos_ > cnt) // >= is ok ?

if (rpos_ - wpos_ > cnt)

memmove(buffer_ + wpos_,src,cnt);

_Type* new_buffer = _allocate(size_ + cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE);

memmove(new_buffer + wpos_,src,cnt);

memmove(new_buffer + wpos_ + cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE,buffer_ + rpos_,size_ - rpos_);

rpos_ += cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE;

size_ = size_ + cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE;

EasyRingbuffer& operator (easy_bool val)

appendeasy_bool>(val);

return *this;

EasyRingbuffer& operator (easy_uint8 val)

appendeasy_uint8>(val);

EasyRingbuffer& operator (easy_uint16 val)

appendeasy_uint16>(val);

EasyRingbuffer& operator (easy_uint32 val)

appendeasy_uint32>(val);

EasyRingbuffer& operator (easy_uint64 val)

appendeasy_uint64>(val);

EasyRingbuffer& operator (easy_int8 val)

appendeasy_int8>(val);

EasyRingbuffer& operator (easy_int16 val)

appendeasy_int16>(val);

EasyRingbuffer& operator (easy_int32 val)

appendeasy_int32>(val);

EasyRingbuffer& operator (easy_int64 val)

appendeasy_int64>(val);

EasyRingbuffer& operator (easy_float val)

appendeasy_float>(val);

EasyRingbuffer& operator (easy_double val)

appendeasy_double>(val);

EasyRingbuffer& operator (const std::string& val)

append((easy_uint8 const*)val.c_str(),val.length());

EasyRingbuffer& operator (const char* val)

append((easy_uint8 const *)val, val ? strlen(val) : 0);

templatetypename T> T read()

T r;

read((easy_uint8*)&r,sizeof(T));

return r;

void read(easy_uint8* des,size_t len)

if (_read_finish())

if (rpos_ wpos_)

if (wpos_ - rpos_ >= len)

memmove(des,buffer_ + rpos_,len);

rpos_ += len;

// else just skip

else if (rpos_ > wpos_)

if (size_ - rpos_ >= len)

memmove(des,buffer_ + rpos_, size_ - rpos_);

memmove(des + size_ - rpos_, buffer_, len - (size_ - rpos_));

rpos_ = len - (size_ - rpos_);

EasyRingbuffer& operator >> (easy_bool& val)

val = readeasy_bool>();

EasyRingbuffer& operator >> (easy_uint8& val)

val = readeasy_uint8>();

EasyRingbuffer& operator >> (easy_uint16& val)

val = readeasy_uint16>();

EasyRingbuffer& operator >> (easy_uint32& val)

val = readeasy_uint32>();

EasyRingbuffer& operator >> (easy_uint64& val)

val = readeasy_uint64>();

EasyRingbuffer& operator >> (easy_int8& val)

val = readeasy_int8>();

EasyRingbuffer& operator >> (easy_int16& val)

val = readeasy_int16>();

EasyRingbuffer& operator >> (easy_int32& val)

val = readeasy_int32>();

EasyRingbuffer& operator >> (easy_int64& val)

val = readeasy_int64>();

EasyRingbuffer& operator >> (easy_float& val)

val = readeasy_float>();

EasyRingbuffer& operator >> (easy_double& val)

val = readeasy_double>();

size_t size() const { return size_; }

size_t rpos() const { return rpos_; }

size_t wpos() const { return wpos_; }

private:

_Type* _allocate(size_t size)

_Type* res = 0;

res = static_cast_Type*>(alloc_type_.allocate(size));

return res;

void _deallocate(void* p,size_t size)

alloc_type_.deallocate(p,size);

void _reallocate(void* p,size_t old_size,size_t new_size) { alloc_type_.reallocate(p,old_size,new_size); }

easy_bool _read_finish() { return wpos_ == rpos_; }

EasyRingbuffer ( const EasyRingbuffer& );

EasyRingbuffer& operator = ( const EasyRingbuffer& );

size_t size_;

_Type* buffer_;

size_t wpos_;

size_t rpos_;

allocator_type alloc_type_;

};

}

二空闲列表

空闲列表的原理比较简单，一般用于比较大的对象，可预分配一定数量的对象，需要时直接空闲列表中取，使用完后收回，如果空闲列表中已空，则需要重新设置大小了；也可使用时分配，使用完后收回。实现代码如下：

// use stl

templatetypename _Type, typename _Lock,typename _StorageType /*= std::list_Type*>*/>

class lock_queue

typedef typename _Type::_Key _Key;

static const size_t MAX_POOL_SIZE = _Type::MAX_POOL_SIZE;

_Type* allocate(_Key __key)

_Type* __ret = 0;

if (free_list_.empty())

__ret = new _Type(__key);

else

lock_.acquire_lock();

__ret = free_list_.back();

free_list_.pop_back();

lock_.release_lock();

return __ret;

void deallcate(_Type* __val)

if (!__val)

if (MAX_POOL_SIZE free_list_.size())

delete __val;

lock_.acquire_lock();

free_list_.push_back(__val);

lock_.release_lock();

size_t free_size() /*const*/

size_t __size = 0;

__size = free_list_.size();

return __size;

void clear()

for (typename _StorageType::iterator __it = free_list_.begin(); __it != free_list_.end(); ++__it)

if ((*__it))

delete (*__it);

(*__it) = NULL;

free_list_.clear();

_StorageType().swap(free_list_);

~lock_queue()

clear();

_Lock lock_;

_StorageType free_list_;

//anther way,use use stl

template typename T, int DEFAULT_BLOCK_NUM = 1024 >

class CMemoryPool

public:

static VOID* operator new ( std::size_t nAllocLength )

Assert( sizeof(T) == nAllocLength );

Assert( sizeof(T) >= sizeof(UCHAR*) );

if ( !m_sNewPointer )

allocBlock();

UCHAR* ucReturnPointer = m_sNewPointer;

//the head of 4 bytes is explain the next pointer of memory force,

//and m_NewPointer just point the next block of memory,when used the next allocation

m_sNewPointer = *reinterpret_castUCHAR**>( ucReturnPointer);

return ucReturnPointer;

}

static VOID operator delete( void* vpDeletePointer )

*reinterpret_castUCHAR**>( vpDeletePointer) = m_sNewPointer;

m_sNewPointer = static_castUCHAR*>(vpDeletePointer);

static VOID allocBlock()

m_sNewPointer = new UCHAR[sizeof(T) * DEFAULT_BLOCK_NUM];

//casting dual pointer force,that will change the meaning of the head of 4 byte memory

UCHAR **ppCurent = reinterpret_castUCHAR**>( m_sNewPointer );

UCHAR *ppNext = m_sNewPointer;

for( int i = 0; i DEFAULT_BLOCK_NUM-1; i++ )

ppNext += sizeof(T);

*ppCurent = ppNext;

//the head of 4 bytes is explain the next pointer of memory force,a memory list in form.

ppCurent = reinterpret_castUCHAR**>( ppNext );

//if the last memory bock, the head of 4 byte is null

*ppCurent = 0;

protected:

virtual ~CMemoryPool()

private:

static UCHAR *m_sNewPointer;

};

templateclass T, int BLOCK_NUM >

UCHAR *CMemoryPoolT, BLOCK_NUM >::m_sNewPointer;

三 stl的二级分配器

stl内部实现的分配器分两种情况：一种是大于128byte的分配，直接使用系统的内存分配函数malloc/free;另外一种为小于128byte的，也就是上面说的二级分配器，它采用了某些技术来管来内存，避免频繁分配释放。简单的说，就是将内存按8字节对齐，分别建立固定值倍数大小的内存池，如8, 8*2 ,8*3..., 当需要分配内存时，根据分配内存的大小，算出所需内存大小的内存池索引，然后根据这个索引找到那块内存池，并从中取出一块返回；同样，内存使用完后，按类似的方法回收。这种方案一般适用于比较小的内存分配的情况，大的可以考虑其他的方案。其流程如下：

下面是具体代码：

template bool threads, int inst >

class __default_alloc_template

enum {_ALIGN = 8};

enum {_MAX_BYTES = 128};

enum {_NFREELISTS = 16}; // _MAX_BYTES/_ALIGN

static size_t _S_round_up(size_t __bytes) { return (((__bytes) + (size_t) _ALIGN-1) & ~((size_t) _ALIGN - 1)); }

static size_t _S_freelist_index(size_t __bytes) { return (((__bytes) + (size_t)_ALIGN-1)/(size_t)_ALIGN - 1); }

union _Obj

union _Obj* _M_free_list_link;

char _M_client_data[1]; /* The client sees this. */

};

static _Obj* volatile _S_free_list[_NFREELISTS];

// Returns an object of size __n, and optionally adds to size __n free list.

static void* _S_refill(size_t __n);

// Allocates a chunk for nobjs of size size. nobjs may be reduced

// if it is inconvenient to allocate the requested number.

static char* _S_chunk_alloc(size_t __size, int& __nobjs);

static void* reallocate(void* __p, size_t __old_sz, size_t __new_sz);

// Chunk allocation state.

static char* _S_start_free;

static char* _S_end_free;

static size_t _S_heap_size;

static void* allocate(size_t __n)

void* __ret = 0;

if (__n > (size_t) _MAX_BYTES)

__ret = malloc_alloc::allocate(__n);

mutex_lock __lock;

__lock.acquire_lock();

_Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__n);

_Obj* volatile __result = *__my_free_list;

if (__result == 0)

__ret = _S_refill(_S_round_up(__n));

*__my_free_list = __result -> _M_free_list_link;

__ret = __result;

__lock.release_lock();

/* __p may not be 0 */

static void deallocate(void* __p, size_t __n)

malloc_alloc::deallocate(__p, __n);

_Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__n);

_Obj* __q = (_Obj*)__p;

__q -> _M_free_list_link = *__my_free_list;

*__my_free_list = __q;

__lock.release_lock();

template bool __threads, int __inst>

inline bool operator==(const __default_alloc_template__threads, __inst>&,

const __default_alloc_template__threads, __inst>&)

return true;

inline bool operator!=(const __default_alloc_template__threads, __inst>&,

return false;

/* We allocate memory in large chunks in order to avoid fragmenting */

/* the malloc heap too much. */

/* We assume that size is properly aligned. */

/* We hold the allocation lock. */

char* __default_alloc_template__threads, __inst>::_S_chunk_alloc(size_t __size, int& __nobjs)

//::_set_new_handler(_out_of_memory);

char* __result;

size_t __total_bytes = __size * __nobjs;

size_t __bytes_left = _S_end_free - _S_start_free;

// enough memory to alloc

if (__bytes_left >= __total_bytes)

__result = _S_start_free;

_S_start_free += __total_bytes;

return(__result);

// only more than __size can be alloc

else if (__bytes_left >= __size)

__nobjs = (int)(__bytes_left/__size);

__total_bytes = __size * __nobjs;

else

size_t __bytes_to_get = 2 * __total_bytes + _S_round_up(_S_heap_size >> 4);

// Try to make use of the left-over piece.

if (__bytes_left > 0)

_Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__bytes_left);

((_Obj*)_S_start_free) -> _M_free_list_link = *__my_free_list;

*__my_free_list = (_Obj*)_S_start_free;

// alloc __bytes_to_get again

_S_start_free = (char*)malloc(__bytes_to_get);

// alloc failed

if (0 == _S_start_free)

size_t __i;

_Obj* volatile* __my_free_list;

_Obj* __p;

// Try to make do with what we have. That can't

// hurt. We do not try smaller requests, since that tends

// to result in disaster on multi-process machines.

for (__i = __size; __i = (size_t) _MAX_BYTES; __i += (size_t) _ALIGN)

__my_free_list = _S_free_list + _S_freelist_index(__i);

__p = *__my_free_list;

if (0 != __p)

*__my_free_list = __p -> _M_free_list_link;

_S_start_free = (char*)__p;

_S_end_free = _S_start_free + __i;

return(_S_chunk_alloc(__size, __nobjs));

// Any leftover piece will eventually make it to the

// right free list.

_S_end_free = 0; // In case of exception.

_S_start_free = (char*) malloc(__bytes_to_get);

// This should either throw an

// exception or remedy the situation. Thus we assume it

// succeeded.

_S_heap_size += __bytes_to_get;

_S_end_free = _S_start_free + __bytes_to_get;

return(_S_chunk_alloc(__size, __nobjs));

/* Returns an object of size __n, and optionally adds to size __n free list.*/

/* We assume that __n is properly aligned. */

void* __default_alloc_template__threads, __inst>::_S_refill(size_t __n)

int __nobjs = 20;

char* __chunk = _S_chunk_alloc(__n, __nobjs);

_Obj* volatile* __my_free_list;

_Obj* __result;

_Obj* __current_obj;

_Obj* __next_obj;

int __i;

if (1 == __nobjs)

return(__chunk);

__my_free_list = _S_free_list + _S_freelist_index(__n);

/* Build free list in chunk */

__result = (_Obj*)__chunk;

*__my_free_list = __next_obj = (_Obj*)(__chunk + __n);

for (__i = 1; ; __i++)

__current_obj = __next_obj;

__next_obj = (_Obj*)((char*)__next_obj + __n);

if (__nobjs - 1 == __i)

__current_obj -> _M_free_list_link = 0;

break;

__current_obj -> _M_free_list_link = __next_obj;

return(__result);

template bool threads, int inst>

void* __default_alloc_templatethreads, inst>::reallocate(void* __p, size_t __old_sz, size_t __new_sz)

mutex_lock __lock;

__lock.acquire_lock();

void* __result;

size_t __copy_sz;

if (__old_sz > (size_t) _MAX_BYTES && __new_sz > (size_t) _MAX_BYTES)

__lock.release_lock();

return(realloc(__p, __new_sz));

if (_S_round_up(__old_sz) == _S_round_up(__new_sz))

return(__p);

__result = allocate(__new_sz);

__copy_sz = __new_sz > __old_sz? __old_sz : __new_sz;

memcpy(__result, __p, __copy_sz);

deallocate(__p, __old_sz);

__lock.release_lock();

template bool threads, int inst >

char* __default_alloc_templatethreads, inst>::_S_start_free = 0;

char* __default_alloc_templatethreads, inst>::_S_end_free = 0;

size_t __default_alloc_templatethreads, inst>::_S_heap_size = 0;

typename __default_alloc_template__threads, __inst>::_Obj* volatile

__default_alloc_template__threads, __inst> ::_S_free_list[_NFREELISTS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, };

参考：

sqi stl

常用的几种内存池技术

继续阅读

C语言第四章自述2第四章选择结构程序设计

面试题:vector和map的区别，异同。空间分布，100万数据存哪个比较合适。一、迭代器区别二、vector三、Map、Set四、vector_map 为什么比map效率高五、如何选择六、容器选择原则七、效率对比

C++ 多线程用条件变量确定线程的执行顺序而不是使用 sleep(1)

POJ 1284 Primitive Roots (欧拉函数&原根定理)

CQ V1.0分词bates(基于双数组tire树)—应该是目前最快的中文分词算法

成员函数初始化列表

2021-08-13c++——类之操作符重载

swmm与lisflood-fp源码如何一起编译 CMake命令

Windows下VS开发环境环境安装工程项目设置关于Debug和Release的提示

一文看懂字符串的加减乘除

C++ 第十五周报告1--《冒泡法排序》

C++实现简单顺序表

C经典书籍笔记——C陷阱与缺陷②(语法陷阱之优先级)一、错误案列二、优先级规律

线性表之顺序表的实现

C++判断素数、求最大公约数代码判断一个数是否为素数求两个数的最大公约数

SequoiaDB巨杉数据库C++驱动概述