【資料結構】處理哈希沖突的開鍊法（哈希桶）算法實作

實作哈希表時，我們常見的方法是線性探測、二次探測，這兩個算法也很簡單。若有興趣，可以檢視我的部落格 http://10740184.blog.51cto.com/10730184/1771160。但是，這兩個算法有一個共同點就是：空間使用率低。為什麼這麼說呢？線性探測、二次探測的高效性很大程度上要取決于它的載荷因子，載荷因子即：存放關鍵字個數/空間大小。

通過查閱資料，我發現，使用素數做除數可以減少哈希沖突（具體原因不詳，大師專研的，發現很好用，就在這裡分享給大家）。見下：

----素數表

// 使用素數表對齊做哈希表的容量，降低哈希沖突

const int _PrimeSize = 28;

static const unsigned long _PrimeList [_PrimeSize] =

{

53ul, 97ul, 193ul, 389ul, 769ul,

1543ul, 3079ul, 6151ul, 12289ul, 24593ul,

49157ul, 98317ul, 196613ul, 393241ul, 786433ul,

1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,

50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,

1610612741ul, 3221225473ul, 4294967291ul

};

開鍊法（哈希桶）結構：

而哈希桶實作時，我們可以将載荷因子設成1.

#define _CRT_SECURE_NO_WARNINGS 1
#include<iostream>
using namespace std;

#include<vector>

template<class K,class V>
struct HashTableNode
{
    K _key;
    V _value;
    HashTableNode* _next;
    HashTableNode(const K& key,const V& value)
        :_key(key)
        , _value(value)
        , _next(NULL)
    {}
};

template<class K,class V>
class HashTable
{
public:
    typedef HashTableNode<K,V> Node;

    HashTable()
        :_table(NULL)
        , _size()
    {}


    size_t _HashFunc(const K& key)
    {
        //_table.size()表示哈希桶的空間大小
        return key % _table.size();
    }
    
    
    //拷貝構造
    HashTable(const HashTable& ht)
    {
        //将哈希表ht拷貝給this
        this->_table.resize(ht._table.size());
        for (int i = 0; i < ht._table.size(); i++)
        {
            Node* cur = ht._table[i];
            while (cur)
            {
                Node* tmp = new Node(cur->_key, cur->_value);
                tmp->_next = _table[i];
                _table[i] = tmp;
                this->_size++;

                cur = cur->_next;
            }
        }    
    }


    HashTable<K, V> operator=(const HashTable<K, V>& ht)
    {    
        if (&ht != this)
        {
            //删除哈希表this
            for (int i = 0; i < this->_table.size(); i++)
            {
                Node* cur = _table[i];
                while (cur)
                {
                    Node* del = cur;
                    cur = cur->_next;
                    /*delete del;
                    del = NULL;*/
                    Remove(del->_key);
                }
            }

            //将哈希表ht拷貝給this
            this->_table.resize(ht._table.size());
            for (int i = 0; i < ht._table.size(); i++)
            {
                Node* cur = ht._table[i];
                while (cur)
                {
                    Node* tmp = new Node(cur->_key, cur->_value);
                    tmp->_next = _table[i];
                    _table[i] = tmp;
                    this->_size++;

                    cur = cur->_next;
                }
            }        
        }
        return *this;
    }


    //指派運算符重載的現代寫法
    HashTable<K, V> operator=(HashTable<K, V> ht)
    {
        if (&ht != this)
        {
            swap(_table, ht._table);
            swap(_size, ht._size);
        }    
        return *this;
    }


    ~HashTable()
    {
        //删除哈希表ht
        if (this->_table.size() !=0)
        {
            for (int i = 0; i < this->_table.size(); i++)
            {
                Node* cur = _table[i];
                while (cur)
                {
                    Node* del = cur;
                    cur = cur->_next;
                    delete del;
                    del = NULL;
                }
            }
        }
    }


    //擷取新的哈希表容量大小
    size_t _GetnewSize()
    {
        static const int _PrimeSize = 28;
        static const unsigned long _PrimeList[_PrimeSize] =
        {
            53ul, 97ul, 193ul, 389ul, 769ul,
            1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
            49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
            1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
            50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
            1610612741ul, 3221225473ul, 4294967291ul
        };

        for (int i = 0; i < _PrimeSize; i++)
        {
            if (_PrimeList[i]> _table.size())
            {
                return _PrimeList[i];
            }
        }
        return _PrimeList[_PrimeSize - 1];
    }


    //給哈希桶擴容
    void _ExpandCapacity()
    {        
        //開辟新的更大容量的哈希表
        size_t newSize = _GetnewSize();
        vector<Node*> newTable;
        newTable.resize(newSize);

        //将每處順序表上的單連結清單元素摘下來插入到新的順序表上
        for (int i = 0; i < _table.size(); i++)
        {
            Node* cur = _table[i];
            while (cur)
            {
                Node* tmp = cur;
                cur = cur->_next;
                int index = _HashFunc(tmp->_key);
                //頭插法插插節點
                tmp->_next = newTable[index];
                newTable[index] = tmp;
            }
            _table[i] = NULL;
        }
        _table.swap(newTable);
    }


    //插入關鍵字
    bool Insert(const K& key,const V& value)
    {
        //檢查載荷因子，考慮是否擴容
        //哈希桶的載荷因子設定為1
        if (_size == _table.size())
        {
            _ExpandCapacity();
        }

        //往順序表的index處插入節點
        size_t index = _HashFunc(key);
        Node* begin = _table[index];
        while (begin)
        {
            //設計成不可出現重複元素
            if (begin->_key == key)
            {
                return false;
            }

            begin = begin->_next;
        }

        //考慮到同一條單連結清單上，無所謂元素存放順序，且較尾插簡單。--》頭插
        Node* tmp = new Node(key, value);
        tmp->_next =_table[index];
        _table[index] = tmp;
        _size++;
        return true;
    }


    //查找關鍵字
    Node* Find(const K& key)
    {
        int index = _HashFunc(key);
        Node* cur = _table[index];
        while (cur)
        {
            if (cur->_key == key)
                return cur;
            cur = cur->_next;
        }
        return NULL;
    }


    //删除關鍵字
    bool Remove(const K& key)
    {
        int index = _HashFunc(key);
        Node* cur = _table[index];
        Node* prev = NULL;
        while (cur)
        {
            if (cur->_key == key)
                break;
            prev = cur;
            cur = cur->_next;
        }

        if (cur)
        {
            if (cur == _table[index])
            {            
                _table[index] = cur->_next;
            }
            else
            {
                Node* next = cur->_next;
                prev->_next = next;
            }
            delete cur;
            cur = NULL;
            --_size;
            return true;        
        }
        return false;
    }


    //列印哈希桶
    void PrintHashTable()
    {
        for (int i = 0; i < _table.size(); i++)
        {
            Node* cur = _table[i];
            cout << i<<":" ;
            while (cur)
            {
                cout << cur->_key << "->";
                cur = cur->_next;
            }
            cout << "NULL" << endl;
        }
        cout << endl;
    }
    
private:
    vector<Node*> _table;
    size_t _size;//資料個數
};


void TestHashTableBucket()
{
    typedef HashTableNode<int, char> Node;

    HashTable<int, char> ht;
    ht.Insert(1, 'a');
    ht.Insert(2, 'b');
    ht.Insert(3, 'c');
    ht.Insert(4, 'd');
    ht.Insert(5, 'd');
    ht.Insert(54, 'x');
    ht.Insert(55, 'y');
    ht.Insert(56, 'z');

    ht.PrintHashTable();


    /*Node* ret = ht.Find(5);
    cout << ret->_value << endl;

    ht.Remove(1);
    ht.Remove(6);
    ht.PrintHashTable();*/

    /*HashTable<int, char> ht1(ht);
    ht1.PrintHashTable();*/

    HashTable<int, char> ht2;
    ht2.Insert(54, 'x');
    ht2.Insert(55, 'y');
    ht2.Insert(56, 'z');
    ht2.Insert(1, 'a');
    ht2.Insert(2, 'b');
    ht2.Insert(3, 'c');
    ht2.Insert(4, 'd');
    ht2.Insert(5, 'd');

    ht2.PrintHashTable();

    ht = ht2;
    ht.PrintHashTable();

}


int main()
{
    TestHashTableBucket();
    system("pause");
    return 0;
}

【資料結構】處理哈希沖突的開鍊法（哈希桶）算法實作

繼續閱讀

樹的基本概念(定義、基本術語、性質)

GSL--GNU Scientific Library

D5|哈希表，善用資料結構

如何成為一名.net 工程師?

BMP檔案結構及圖像每行位元組計算方法

D. Ehab the Xorcist(構造+思維)

查找算法之二分查找查找算法之二分查找

JAVA高效程式設計指南

CQ V1.0分詞bates(基于雙數組tire樹)—應該是目前最快的中文分詞算法

【資料結構】醫院選址

[轉]ISUP信令的IAM消息詳細内容

筆試面試題目：滑動視窗(二)

交通/城市相關的公開的資料集上學時整理的Xie et al., 2020收集的

資料結構與算法（27）——排序（二）

GitHub連夜封殺！這份阿裡 10W 字内部 Java 字面試手冊到底有多強？

Linux裝置模型（中）之上層容器