几乎所有应用程序中都会有内存的分配和释放,而频繁的分配和释放内存无疑会产生内存碎片,降低系统性能,尤其对性能要求较高的程序比较明显。下面介绍几种常见的内存池技术。
一 环形缓存
环形缓存的基本原理如图:
初始化状态(wpos_ = rpos_):
写了部分数据,同时读了一部分数据(wpos_ > rpos_):
wpos_写数据到尾部后,又从头开始,rpos_还读到尾部(wpos_
rpos_读了N(N>= 1)圈后,赶上了wpos_,也就是说没有数据可读了(wpos_ ):
综合起来,看起来像这样子:
需要注意的是:
#1 wpos_
#2 如果 | wpos_ - rpos |
部分实现代码如下:
点击(此处)折叠或打开
#define EXTRA_BUFFER_SIZE 64
namespace easy
{
templateclass _Type,class _Alloc >
class EasyRingbuffer
{
public:
typedef _Alloc allocator_type;
explicit EasyRingbuffer(size_t size):
size_(size),
wpos_(0),
rpos_(0)
{
buffer_ = _allocate(size_);
}
~EasyRingbuffer() { _deallocate(buffer_,size_); }
templatetypename T> void append(T val)
append((easy_uint8*)&val,sizeof(val));
void append(const easy_uint8* src, size_t cnt)
if (!cnt)
{
return;
}
// case 1: rpos_ = wpos_
if (rpos_ = wpos_)
if (size_ - wpos_ >= cnt)
{
memmove(buffer_ + wpos_,src,cnt);
wpos_ += cnt;
return;
}
else
if (size_ - wpos_ + rpos_ > cnt) // >= is ok>
{
memmove(buffer_ + wpos_, src, size_ - wpos_);
memmove(buffer_, src + size_ - wpos_, cnt - (size_ - wpos_));
wpos_ = cnt - (size_ - wpos_);
return;
}
else
_Type* new_buffer = _allocate(size_ + cnt - (size_ - wpos_));
memmove(new_buffer,buffer_,wpos_);
memmove(new_buffer + wpos_, src, cnt);
_deallocate(buffer_,size_);
size_ = size_ + cnt - (size_ - wpos_);
wpos_ += cnt;
buffer_ = new_buffer;
// case 2: rpos_ > wpos_
else if(rpos_ > wpos_)
if (rpos_ - wpos_ > cnt) // >= is ok ?
if (rpos_ - wpos_ > cnt)
memmove(buffer_ + wpos_,src,cnt);
_Type* new_buffer = _allocate(size_ + cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE);
memmove(new_buffer + wpos_,src,cnt);
memmove(new_buffer + wpos_ + cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE,buffer_ + rpos_,size_ - rpos_);
rpos_ += cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE;
size_ = size_ + cnt - (rpos_ - wpos_) + EXTRA_BUFFER_SIZE;
EasyRingbuffer& operator (easy_bool val)
appendeasy_bool>(val);
return *this;
EasyRingbuffer& operator (easy_uint8 val)
appendeasy_uint8>(val);
EasyRingbuffer& operator (easy_uint16 val)
appendeasy_uint16>(val);
EasyRingbuffer& operator (easy_uint32 val)
appendeasy_uint32>(val);
EasyRingbuffer& operator (easy_uint64 val)
appendeasy_uint64>(val);
EasyRingbuffer& operator (easy_int8 val)
appendeasy_int8>(val);
EasyRingbuffer& operator (easy_int16 val)
appendeasy_int16>(val);
EasyRingbuffer& operator (easy_int32 val)
appendeasy_int32>(val);
EasyRingbuffer& operator (easy_int64 val)
appendeasy_int64>(val);
EasyRingbuffer& operator (easy_float val)
appendeasy_float>(val);
EasyRingbuffer& operator (easy_double val)
appendeasy_double>(val);
EasyRingbuffer& operator (const std::string& val)
append((easy_uint8 const*)val.c_str(),val.length());
EasyRingbuffer& operator (const char* val)
append((easy_uint8 const *)val, val ? strlen(val) : 0);
templatetypename T> T read()
T r;
read((easy_uint8*)&r,sizeof(T));
return r;
void read(easy_uint8* des,size_t len)
if (_read_finish())
if (rpos_ wpos_)
if (wpos_ - rpos_ >= len)
memmove(des,buffer_ + rpos_,len);
rpos_ += len;
// else just skip
else if (rpos_ > wpos_)
if (size_ - rpos_ >= len)
memmove(des,buffer_ + rpos_, size_ - rpos_);
memmove(des + size_ - rpos_, buffer_, len - (size_ - rpos_));
rpos_ = len - (size_ - rpos_);
EasyRingbuffer& operator >> (easy_bool& val)
val = readeasy_bool>();
EasyRingbuffer& operator >> (easy_uint8& val)
val = readeasy_uint8>();
EasyRingbuffer& operator >> (easy_uint16& val)
val = readeasy_uint16>();
EasyRingbuffer& operator >> (easy_uint32& val)
val = readeasy_uint32>();
EasyRingbuffer& operator >> (easy_uint64& val)
val = readeasy_uint64>();
EasyRingbuffer& operator >> (easy_int8& val)
val = readeasy_int8>();
EasyRingbuffer& operator >> (easy_int16& val)
val = readeasy_int16>();
EasyRingbuffer& operator >> (easy_int32& val)
val = readeasy_int32>();
EasyRingbuffer& operator >> (easy_int64& val)
val = readeasy_int64>();
EasyRingbuffer& operator >> (easy_float& val)
val = readeasy_float>();
EasyRingbuffer& operator >> (easy_double& val)
val = readeasy_double>();
size_t size() const { return size_; }
size_t rpos() const { return rpos_; }
size_t wpos() const { return wpos_; }
private:
_Type* _allocate(size_t size)
_Type* res = 0;
res = static_cast_Type*>(alloc_type_.allocate(size));
return res;
void _deallocate(void* p,size_t size)
alloc_type_.deallocate(p,size);
void _reallocate(void* p,size_t old_size,size_t new_size) { alloc_type_.reallocate(p,old_size,new_size); }
easy_bool _read_finish() { return wpos_ == rpos_; }
EasyRingbuffer ( const EasyRingbuffer& );
EasyRingbuffer& operator = ( const EasyRingbuffer& );
size_t size_;
_Type* buffer_;
size_t wpos_;
size_t rpos_;
allocator_type alloc_type_;
};
}
二 空闲列表
空闲列表的原理比较简单,一般用于比较大的对象,可预分配一定数量的对象,需要时直接空闲列表中取,使用完后收回,如果空闲列表中已空,则需要重新设置大小了;也可使用时分配,使用完后收回。实现代码如下:
// use stl
templatetypename _Type, typename _Lock,typename _StorageType /*= std::list_Type*>*/>
class lock_queue
typedef typename _Type::_Key _Key;
static const size_t MAX_POOL_SIZE = _Type::MAX_POOL_SIZE;
_Type* allocate(_Key __key)
_Type* __ret = 0;
if (free_list_.empty())
__ret = new _Type(__key);
else
lock_.acquire_lock();
__ret = free_list_.back();
free_list_.pop_back();
lock_.release_lock();
return __ret;
void deallcate(_Type* __val)
if (!__val)
if (MAX_POOL_SIZE free_list_.size())
delete __val;
lock_.acquire_lock();
free_list_.push_back(__val);
lock_.release_lock();
size_t free_size() /*const*/
size_t __size = 0;
__size = free_list_.size();
return __size;
void clear()
for (typename _StorageType::iterator __it = free_list_.begin(); __it != free_list_.end(); ++__it)
if ((*__it))
delete (*__it);
(*__it) = NULL;
free_list_.clear();
_StorageType().swap(free_list_);
~lock_queue()
clear();
_Lock lock_;
_StorageType free_list_;
//anther way,use use stl
template typename T, int DEFAULT_BLOCK_NUM = 1024 >
class CMemoryPool
public:
static VOID* operator new ( std::size_t nAllocLength )
Assert( sizeof(T) == nAllocLength );
Assert( sizeof(T) >= sizeof(UCHAR*) );
if ( !m_sNewPointer )
allocBlock();
UCHAR* ucReturnPointer = m_sNewPointer;
//the head of 4 bytes is explain the next pointer of memory force,
//and m_NewPointer just point the next block of memory,when used the next allocation
m_sNewPointer = *reinterpret_castUCHAR**>( ucReturnPointer);
return ucReturnPointer;
}
static VOID operator delete( void* vpDeletePointer )
*reinterpret_castUCHAR**>( vpDeletePointer) = m_sNewPointer;
m_sNewPointer = static_castUCHAR*>(vpDeletePointer);
static VOID allocBlock()
m_sNewPointer = new UCHAR[sizeof(T) * DEFAULT_BLOCK_NUM];
//casting dual pointer force,that will change the meaning of the head of 4 byte memory
UCHAR **ppCurent = reinterpret_castUCHAR**>( m_sNewPointer );
UCHAR *ppNext = m_sNewPointer;
for( int i = 0; i DEFAULT_BLOCK_NUM-1; i++ )
ppNext += sizeof(T);
*ppCurent = ppNext;
//the head of 4 bytes is explain the next pointer of memory force,a memory list in form.
ppCurent = reinterpret_castUCHAR**>( ppNext );
//if the last memory bock, the head of 4 byte is null
*ppCurent = 0;
protected:
virtual ~CMemoryPool()
private:
static UCHAR *m_sNewPointer;
};
templateclass T, int BLOCK_NUM >
UCHAR *CMemoryPoolT, BLOCK_NUM >::m_sNewPointer;
三 stl的二级分配器
stl内部实现的分配器分两种情况:一种是大于128byte的分配,直接使用系统的内存分配函数malloc/free;另外一种为小于128byte的,也就是上面说的二级分配器,它采用了某些技术来管来内存,避免频繁分配释放。简单的说,就是将内存按8字节对齐,分别建立固定值倍数大小的内存池,如8, 8*2 ,8*3..., 当需要分配内存时,根据分配内存的大小,算出所需内存大小的内存池索引,然后根据这个索引找到那块内存池,并从中取出一块返回;同样,内存使用完后,按类似的方法回收。这种方案一般适用于比较小的内存分配的情况,大的可以考虑其他的方案。其流程如下:
下面是具体代码:
template bool threads, int inst >
class __default_alloc_template
enum {_ALIGN = 8};
enum {_MAX_BYTES = 128};
enum {_NFREELISTS = 16}; // _MAX_BYTES/_ALIGN
static size_t _S_round_up(size_t __bytes) { return (((__bytes) + (size_t) _ALIGN-1) & ~((size_t) _ALIGN - 1)); }
static size_t _S_freelist_index(size_t __bytes) { return (((__bytes) + (size_t)_ALIGN-1)/(size_t)_ALIGN - 1); }
union _Obj
union _Obj* _M_free_list_link;
char _M_client_data[1]; /* The client sees this. */
};
static _Obj* volatile _S_free_list[_NFREELISTS];
// Returns an object of size __n, and optionally adds to size __n free list.
static void* _S_refill(size_t __n);
// Allocates a chunk for nobjs of size size. nobjs may be reduced
// if it is inconvenient to allocate the requested number.
static char* _S_chunk_alloc(size_t __size, int& __nobjs);
static void* reallocate(void* __p, size_t __old_sz, size_t __new_sz);
// Chunk allocation state.
static char* _S_start_free;
static char* _S_end_free;
static size_t _S_heap_size;
static void* allocate(size_t __n)
void* __ret = 0;
if (__n > (size_t) _MAX_BYTES)
__ret = malloc_alloc::allocate(__n);
mutex_lock __lock;
__lock.acquire_lock();
_Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__n);
_Obj* volatile __result = *__my_free_list;
if (__result == 0)
__ret = _S_refill(_S_round_up(__n));
*__my_free_list = __result -> _M_free_list_link;
__ret = __result;
__lock.release_lock();
/* __p may not be 0 */
static void deallocate(void* __p, size_t __n)
malloc_alloc::deallocate(__p, __n);
_Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__n);
_Obj* __q = (_Obj*)__p;
__q -> _M_free_list_link = *__my_free_list;
*__my_free_list = __q;
__lock.release_lock();
template bool __threads, int __inst>
inline bool operator==(const __default_alloc_template__threads, __inst>&,
const __default_alloc_template__threads, __inst>&)
return true;
inline bool operator!=(const __default_alloc_template__threads, __inst>&,
return false;
/* We allocate memory in large chunks in order to avoid fragmenting */
/* the malloc heap too much. */
/* We assume that size is properly aligned. */
/* We hold the allocation lock. */
char* __default_alloc_template__threads, __inst>::_S_chunk_alloc(size_t __size, int& __nobjs)
//::_set_new_handler(_out_of_memory);
char* __result;
size_t __total_bytes = __size * __nobjs;
size_t __bytes_left = _S_end_free - _S_start_free;
// enough memory to alloc
if (__bytes_left >= __total_bytes)
__result = _S_start_free;
_S_start_free += __total_bytes;
return(__result);
// only more than __size can be alloc
else if (__bytes_left >= __size)
__nobjs = (int)(__bytes_left/__size);
__total_bytes = __size * __nobjs;
else
size_t __bytes_to_get = 2 * __total_bytes + _S_round_up(_S_heap_size >> 4);
// Try to make use of the left-over piece.
if (__bytes_left > 0)
_Obj* volatile* __my_free_list = _S_free_list + _S_freelist_index(__bytes_left);
((_Obj*)_S_start_free) -> _M_free_list_link = *__my_free_list;
*__my_free_list = (_Obj*)_S_start_free;
// alloc __bytes_to_get again
_S_start_free = (char*)malloc(__bytes_to_get);
// alloc failed
if (0 == _S_start_free)
size_t __i;
_Obj* volatile* __my_free_list;
_Obj* __p;
// Try to make do with what we have. That can't
// hurt. We do not try smaller requests, since that tends
// to result in disaster on multi-process machines.
for (__i = __size; __i = (size_t) _MAX_BYTES; __i += (size_t) _ALIGN)
__my_free_list = _S_free_list + _S_freelist_index(__i);
__p = *__my_free_list;
if (0 != __p)
*__my_free_list = __p -> _M_free_list_link;
_S_start_free = (char*)__p;
_S_end_free = _S_start_free + __i;
return(_S_chunk_alloc(__size, __nobjs));
// Any leftover piece will eventually make it to the
// right free list.
_S_end_free = 0; // In case of exception.
_S_start_free = (char*) malloc(__bytes_to_get);
// This should either throw an
// exception or remedy the situation. Thus we assume it
// succeeded.
_S_heap_size += __bytes_to_get;
_S_end_free = _S_start_free + __bytes_to_get;
return(_S_chunk_alloc(__size, __nobjs));
/* Returns an object of size __n, and optionally adds to size __n free list.*/
/* We assume that __n is properly aligned. */
void* __default_alloc_template__threads, __inst>::_S_refill(size_t __n)
int __nobjs = 20;
char* __chunk = _S_chunk_alloc(__n, __nobjs);
_Obj* volatile* __my_free_list;
_Obj* __result;
_Obj* __current_obj;
_Obj* __next_obj;
int __i;
if (1 == __nobjs)
return(__chunk);
__my_free_list = _S_free_list + _S_freelist_index(__n);
/* Build free list in chunk */
__result = (_Obj*)__chunk;
*__my_free_list = __next_obj = (_Obj*)(__chunk + __n);
for (__i = 1; ; __i++)
__current_obj = __next_obj;
__next_obj = (_Obj*)((char*)__next_obj + __n);
if (__nobjs - 1 == __i)
__current_obj -> _M_free_list_link = 0;
break;
__current_obj -> _M_free_list_link = __next_obj;
return(__result);
template bool threads, int inst>
void* __default_alloc_templatethreads, inst>::reallocate(void* __p, size_t __old_sz, size_t __new_sz)
mutex_lock __lock;
__lock.acquire_lock();
void* __result;
size_t __copy_sz;
if (__old_sz > (size_t) _MAX_BYTES && __new_sz > (size_t) _MAX_BYTES)
__lock.release_lock();
return(realloc(__p, __new_sz));
if (_S_round_up(__old_sz) == _S_round_up(__new_sz))
return(__p);
__result = allocate(__new_sz);
__copy_sz = __new_sz > __old_sz? __old_sz : __new_sz;
memcpy(__result, __p, __copy_sz);
deallocate(__p, __old_sz);
__lock.release_lock();
template bool threads, int inst >
char* __default_alloc_templatethreads, inst>::_S_start_free = 0;
char* __default_alloc_templatethreads, inst>::_S_end_free = 0;
size_t __default_alloc_templatethreads, inst>::_S_heap_size = 0;
typename __default_alloc_template__threads, __inst>::_Obj* volatile
__default_alloc_template__threads, __inst> ::_S_free_list[_NFREELISTS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, };
参考:
sqi stl