天天看點

資料結構C++作業查找排序哈夫曼編碼

查找

查找:二分法查找(非遞歸和遞歸)、哈希查找

哈希表解決碰撞的方式為線性探測法。

一、英語單詞及中文解釋。ENword.h

通過将英語單詞全部轉換為小寫格式,才可以進行比較。

#include <string>
#include <iostream>
#include <cctype>
#include <algorithm>//tramsform
using namespace std;
struct ENword
{
    string enword;
    vector<string> chexp;
    ENword(string ew):enword(ew) {};
    ENword() {};
};

bool operator<(const ENword& a, const ENword& b)
{
    string x=a.enword;
    string y=b.enword;
    transform(a.enword.begin(),a.enword.end(),x.begin(),::tolower);
    transform(b.enword.begin(),b.enword.end(),y.begin(),::tolower);
    if(x<y) return true;
    else return false;
}
bool operator>(const ENword& a, const ENword& b)
{
    string x=a.enword;
    string y=b.enword;
    transform(a.enword.begin(),a.enword.end(),x.begin(),::tolower);
    transform(b.enword.begin(),b.enword.end(),y.begin(),::tolower);
    if(x>y) return true;
    else return false;
}
           

二、查找算法Search.h

template <typename T>
class BinSearch1{
private:
    int num_cmp;
public:
    BinSearch1():num_cmp(){};
    int Search(vector<T>& A,int n, T k)//A store data from 1
    {
        int low =, high=n;
        int mid;
        while(low <= high)
        {
            mid = (low + high)/;
            if(++num_cmp && k < A[mid])high=mid-;
            else if(++num_cmp && k>A[mid])low=mid+;
            else return mid;
        }
        cout << "not found" << endl;
        return -;
    }
    void PrintCmpNum(){cout<<"BinSearch1 number of compare: "
    <<num_cmp<<endl;};
    void Clear(){num_cmp=;};
};
template <typename T>
class BinSearch2{
private:
    int num_cmp;
public:
    BinSearch2():num_cmp(){};
    int Search(vector<T>& A,int low, int high, T k)
    {
        int mid;
        if (low > high) {
            cout << "not found" << endl; return -;
        }
        else{
            mid = (low + high)/;
            if(++num_cmp && k < A[mid])return Search(A, low, mid-, k);
            else if(++num_cmp && k> A[mid])return Search(A,mid+,high,k);
            else return mid;
        }
    }
    void PrintCmpNum(){cout<<"BinSearch2 number of compare: "
    <<num_cmp<<endl;};
    void Clear(){num_cmp=;};
};
class BKDRHash{
private:
    int num_cmp;
public:
    BKDRHash():num_cmp(){};
    unsigned int basicHash(string eword, int arrLen);
    unsigned int setHash(string eword, int arrLen, const ENword* array);
    unsigned int search(string eword, int arrLen, const ENword* array);
    void PrintCmpNum() {
        cout << "hash search number of compare: "
            << num_cmp << endl;
    };
    void Clear(){num_cmp=;};
};
unsigned int BKDRHash::
basicHash(string eword, int arrLen)
{
    unsigned int seed = ;
    unsigned int hash = ;
    for(int i=; i<eword.size(); i++)
    {
        hash = hash*seed+eword[i];
    }
    return hash=hash%arrLen;
}
unsigned int BKDRHash::
setHash(string eword, int arrLen, const ENword* array)
{
    unsigned int hash=basicHash(eword, arrLen);
    for(int d=; d<arrLen; d++)
    {
        if(array[(hash+d)%arrLen].enword == "")return (hash+d)%arrLen;
    }
    cout<<"no space for this element: "<<eword<<endl;
    return arrLen;
}
unsigned int BKDRHash::
search(string eword, int arrLen, const ENword* array)
{
    int hash = basicHash(eword, arrLen);
    for(int d=; d<arrLen; d++)
    {
        ++num_cmp;
        if(array[(hash+d)%arrLen].enword == eword) return (hash+d)%arrLen;
    }
    cout<<eword <<" not found"<<endl;
    return arrLen;
}
           

三、主函數main.cpp

// prj.cpp: 定義控制台應用程式的入口點。
//

#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <vector>
#include <locale>
#include "ENword.h"
#include "Search.h"

int main()
{
    string line, cword;
    wstring fENwords = L"英語單詞詞庫.txt";
    ifstream ifile;
    vector<ENword> ENwords;
    setlocale(LC_ALL, "Chinese-simplified");
    ifile.open(fENwords);
    while (getline(ifile, line))
    {
        ENword temp;
        istringstream record(line);
        record >> temp.enword;
        while (record >> cword)temp.chexp.push_back(cword);
        ENwords.push_back(temp);
    }
    ifile.close();
    ENword ew1("a"),ew2("Monday");
    BinSearch1<ENword> bs1;
    cout << ew1.enword << ", word position: " << bs1.Search(ENwords, ENwords.size(), ew1) <<endl;
    bs1.PrintCmpNum();
    bs1.Clear();

    cout << ew2.enword << ", word position: " << bs1.Search(ENwords, ENwords.size(), ew2) << endl;
    bs1.PrintCmpNum();
    bs1.Clear();
    cout << endl;

    BinSearch2<ENword> bs2;
    cout << ew1.enword << ", word position: " << bs2.Search(ENwords, , ENwords.size(), ew1) << endl;
    bs2.PrintCmpNum();
    bs2.Clear();

    cout << ew2.enword << ", word position: " << bs2.Search(ENwords, , ENwords.size(), ew2) << endl;
    bs2.PrintCmpNum();
    bs2.Clear();
    cout << endl;
    //BKDR hash
    ENword head[];
    BKDRHash hash;
    ifile.open(fENwords);
    setlocale(LC_ALL, "Chinese-simplified");
    while (getline(ifile, line))
    {
        ENword temp;
        istringstream record(line);
        record >> temp.enword;
        while (record >> cword)temp.chexp.push_back(cword);
        if (hash.setHash(temp.enword, , head) != )
        {
            int i = hash.setHash(temp.enword, , head);
            head[i] = temp;
        }
    }
    ifile.close();
    setlocale(LC_ALL, "C");
    int num;
    if (hash.search("Monday", , head) != )
    {
        num = hash.search("Monday", , head);//
        hash.PrintCmpNum();
        hash.Clear();
        cout << "Monday. position: " << num << endl;
        cout << head[num].enword << ": ";
        for (vector<string>::iterator iter = head[num].chexp.begin()
            ; iter != head[num].chexp.end();
            iter++)
            cout << *iter;
        cout << endl;
    }
    int num_space_used = ;
    int num_sapce_not_used = ;
    for (int i = ; i < ; i++)
    {
        if (head[i].enword == "")++num_sapce_not_used;
        else ++num_space_used;
    }
    cout <<"the number of total words: "<<ENwords.size()<<". array head size: 2000. "<<
        num_space_used<<" space used. "<<
        num_sapce_not_used<<" space not used." << endl;
    return ;
}
           

排序

作業内容

将《英語單詞詞庫打亂.txt》中的文本用排序算法排序。

實驗環境:vs2017,将main.cpp檔案儲存為gb2312編碼格式

一、單詞及中文釋義構成的類檔案ENword.h

#pragma once
#include <string>
using namespace std;
struct ENword
{
    string enword;//未知重寫說明符.加上了頭檔案和命名空間
    vector<string> chexp;
};
bool operator<(const ENword& a, const ENword& b)
{
    if (a.enword < b.enword)return true;
    else return false;
}
bool operator<=(const ENword& a, const ENword& b)
{
    if (a.enword <= b.enword)return true;
    else return false;
}
           

二、排序算法

#pragma once
template <typename T>
void insert_sort(vector<T>& A)//插入排序
{
    int num_cmp=;
    int num_swap=;
    for (int i = ; i<A.size(); i++)
    {

        for (int j = i; (j>)//start: i=1
            && ++num_cmp
            && (A[j]<A[j - ]); //elems before j has sorted,
            //ensure whether tmp need sorted 
            j--)
            {
                swap(A[j],A[j - ]);
                num_swap++;
            }
    }
    cout<<"insert sort ";
    cout<<"the number of comparison: "<<num_cmp
    <<"; the number of swap: "<<num_swap<<endl;
}
template <typename T>
void selection_sort(vector<T>& A)//選擇排序
{
    int num_cmp=;
    int num_swap=;
    for (int i=; i < A.size()-; i++)
    {
        int lowindex = i;
        for (int j = A.size() - ; j > i; j--)
        {
            num_cmp++;
            if (A[j] < A[lowindex])
                lowindex = j;
        }
        ++num_swap;
        swap(A[i], A[lowindex]);
    }
    cout<<"selection sort ";
    cout<<"the number of comparison: "<<num_cmp
    <<"; the number of swap: "<<num_swap<<endl; 
}
template <typename T>
void bubble_sort(vector<T>& A)//冒泡排序
{
    int num_cmp=;
    int num_swap=;
    for(int i=;i<A.size()-;i++)
        for(int j=A.size()-; j>i;j--)
        {
            num_cmp++;//the number of comparison is a constant:n(n-1)/2
            if(A[j]<A[j-])
            {
                swap(A[j],A[j-]);
                num_swap++;
            }
        }
    cout<<"bubble sort ";
    cout<<"the number of comparison: "<<num_cmp
    <<"; the number of swap: "<<num_swap<<endl; 
}
template <typename T>
void shell_insert_sort(vector<T>& A,int d,int& num_cmp,int& num_swap)
{

    for (int i = d; i<A.size(); i++)
    {

        for (int j = i; (j>=d)//start: i=1
            && ++num_cmp
            && (A[j]<A[j - d]); //elems before j has sorted,
            //ensure whether tmp need sorted 
            j-=d)
            {
                swap(A[j],A[j - d]);
                num_swap++;
            }
    }

}
template <typename T>
void shell_sort(vector<T>& A)//希爾排序
{
    int d=A.size()/;
    int num_cmp=;
    int num_swap=;
    while(d>=)
    {
        shell_insert_sort(A, d, num_cmp, num_swap);
        d = d / ;
    }
    cout<<"shell sort ";
    cout<<"the number of comparison: "<<num_cmp
    <<"; the number of swap: "<<num_swap<<endl; 
}
template <typename T>
class QuickSort//快速排序
{
private:
    int num_cmp;
    int num_swap;
    int SelectPivot(int left, int right);
    int Partition(vector<T>& A, int left,int right);
public:
    QuickSort():num_cmp(),num_swap() {};
    void Sort(vector<T>& A,int left, int right);
    void PrintData();
};
template <typename T>
void QuickSort<T>::
Sort(vector<T>& A, int left, int right)
{
    if(right<=left) return;//if there are only one elem in vector, return
    int pivot;
    //int pivot=SelectPivot(left,right);
    //swap(A[pivot],A[right]);
    pivot=Partition(A, left, right);
    if (pivot != )
    {
        Sort(A, left, pivot-);
        Sort(A, pivot + , right);
    }
    else
    {
        Sort(A,left,right);
    }
}
template <typename T>
int QuickSort<T>::
SelectPivot(int left, int right) 
{return (left+right)/;}
template <typename T>
int QuickSort<T>::
Partition(vector<T>& A, int left, int right)
{
    T tmp;
    int i=left;
    int j=right;
    tmp=A[j];//基準為A[j],從i開始,尋找比temp=A[j]大的數字填A[j]
    ++num_swap;
    while(i!=j)
    {
        //i向右,填充A[j]
        while(++num_cmp &&
        A[i]<tmp && j>i)//temp<A[i]時停止,則A[i]<temp時,i++
            i++;
        if(i<j)
        {
            ++num_swap;
            A[j]=A[i];
            j--;
        }
        //j向左,填充A[i]
        while(++num_cmp &&
        tmp<=A[j] && j>i)//A[j]<temp時停止,A[i]=A[j]
            j--;
        if(i<j)
        {
            ++num_swap;
            A[i]=A[j];
            i++;
        }
    }
    A[i]=tmp;
    return i;
}
template <typename T>
void QuickSort<T>::
PrintData()
{
    cout << "quick sort ";
    cout << "the number of comparison: " << num_cmp
        << "; the number of swap: " << num_swap << endl;
}
           

三、main.cpp

#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <locale>
#include "ENword.h"
#include "sort.h"
using namespace std;
int main()
{
    std::string line,cword;
    std::wstring fENwords = L"英語單詞詞庫打亂.txt";
    std::wstring fENwords_sorted = L"英語單詞排序.txt";
    std::ifstream ifile;
    std::ofstream ofile;
    std::vector<ENword> ENwords_origin,ENwords;
    setlocale(LC_ALL,"Chinese-simplified");
    ifile.open(fENwords);
    while (getline(ifile,line)) 
    {

        ENword temp;
        istringstream record(line);
        record >> temp.enword;
        while (record >> cword)temp.chexp.push_back(cword);
        ENwords_origin.push_back(temp);
    }
    ENwords = ENwords_origin;
    insert_sort(ENwords);
    ENwords = ENwords_origin;
    bubble_sort(ENwords);
    ENwords = ENwords_origin;
    selection_sort(ENwords);
    ENwords = ENwords_origin;
    shell_sort(ENwords);
    ENwords = ENwords_origin;
    QuickSort<ENword> s;
    s.Sort(ENwords, , ENwords.size()-);
    s.PrintData();
    setlocale(LC_ALL, "C");
    for (vector<ENword>::iterator iter = ENwords.begin();
        iter != ENwords.end(); iter++)
    {
        cout << iter->enword<<" ";
        for (vector<string>::iterator iter2 = iter->chexp.begin();
            iter2 != iter->chexp.end(); iter2++)
            cout << *iter2;
        cout << endl;
    }
    ifile.close();
    setlocale(LC_ALL, "Chinese-simplified");
    ofile.open(fENwords_sorted);
    setlocale(LC_ALL, "C");
    for (vector<ENword>::iterator iter = ENwords.begin();
        iter != ENwords.end(); iter++)
    {
        ofile << iter->enword << " ";
        for (vector<string>::iterator iter2 = iter->chexp.begin();
            iter2 != iter->chexp.end(); iter2++)
            ofile << *iter2;
        ofile<< endl;
    }
    ofile.close();
    return ;
}
           

哈夫曼編碼

#include <iostream>
#include <queue>
#include <map>
#include <climits> // for CHAR_BIT,<climits>頭檔案定義的符号常量,256 bits
#include <iterator>
#include <algorithm>

const int UniqueSymbols =  << CHAR_BIT;
const char* SampleString = "huffman encoding";

typedef std::vector<bool> HuffCode;
typedef std::map<char, HuffCode> HuffCodeMap;
//Map是c++的一個标準容器,她提供了很好一對一的關系,在一些程式中建立一個map可以起到事半功倍的效果,總結了一些map基本簡單實用的操作!
class INode
{
public:
    const int f;

    virtual ~INode() {}

protected:
    INode(int f) : f(f) {}
};

class InternalNode : public INode
{
public:
    INode *const left;
    INode *const right;

    InternalNode(INode* c0, INode* c1) : INode(c0->f + c1->f), left(c0), right(c1) {}
    ~InternalNode()
    {
        delete left;
        delete right;
    }
};

class LeafNode : public INode
{
public:
    const char c;

    LeafNode(int f, char c) : INode(f), c(c) {}
};

struct NodeCmp
{//()重載
    bool operator()(const INode* lhs, const INode* rhs) const { return lhs->f > rhs->f; }
};

INode* BuildTree(const int(&frequencies)[UniqueSymbols])
{
    //priority_queue 對于基本類型的使用方法相對簡單。他的模闆聲明帶有三個參數,priority_queue<Type, Container, Functional>
    //Type 為資料類型, Container 為儲存資料的容器,Functional 為元素比較方式。
    std::priority_queue<INode*, std::vector<INode*>, NodeCmp> trees;

    for (int i = ; i < UniqueSymbols; ++i)
    {
        if (frequencies[i] != )
            trees.push(new LeafNode(frequencies[i], (char)i));
    }
    while (trees.size() > )
    {
        INode* childR = trees.top();
        trees.pop();

        INode* childL = trees.top();
        trees.pop();

        INode* parent = new InternalNode(childR, childL);
        trees.push(parent);
    }
    return trees.top();
}

void GenerateCodes(const INode* node, const HuffCode& prefix, HuffCodeMap& outCodes)
{
/* dynamic_cast<> 的使用方法。*/ 

/*作用:将一個基類對象指針(或引用)cast到繼承類指針,dynamic_cast會根據基類指針是否真正指向繼承類指針來做相應處理, 
       即會作一定的判斷。 
       對指針進行dynamic_cast,失敗傳回null,成功傳回正常cast後的對象指針; 
       對引用進行dynamic_cast,失敗抛出一個異常,成功傳回正常cast後的對象引用。 
*/
    if (const LeafNode* lf = dynamic_cast<const LeafNode*>(node))
    {
        outCodes[lf->c] = prefix;//<c,prefix>
    }
    else if (const InternalNode* in = dynamic_cast<const InternalNode*>(node))
    {
        HuffCode leftPrefix = prefix;
        leftPrefix.push_back(false);//左0
        GenerateCodes(in->left, leftPrefix, outCodes);

        HuffCode rightPrefix = prefix;
        rightPrefix.push_back(true);//右1
        GenerateCodes(in->right, rightPrefix, outCodes);
    }
}

int main()
{
    // Build frequency table
    int frequencies[UniqueSymbols] = {  };
    const char* ptr = SampleString;
    while (*ptr != '\0')
        ++frequencies[*ptr++];

    INode* root = BuildTree(frequencies);

    HuffCodeMap codes;
    GenerateCodes(root, HuffCode(), codes);
    delete root;

    for (HuffCodeMap::const_iterator it = codes.begin(); it != codes.end(); ++it)
    //const_iterator,傳遞過來一個const類型的容器,隻能用const_iterator來周遊。
    {
        //map容器的疊代器裡面 有個first 和 second
        std::cout << it->first << " ";//<char, HuffCode>,char
        std::copy(it->second.begin(), it->second.end(),
            std::ostream_iterator<bool>(std::cout));
            //Ostream iterators are output iterators that write sequentially to an output stream (such as cout).
            //copy 将數值複制到輸出流中,參數依次是開始、結束,輸出流
        std::cout << std::endl;
    }
    return ;
}
           

繼續閱讀