天天看点

数据结构C++作业查找排序哈夫曼编码

查找

查找:二分法查找(非递归和递归)、哈希查找

哈希表解决碰撞的方式为线性探测法。

一、英语单词及中文解释。ENword.h

通过将英语单词全部转换为小写格式,才可以进行比较。

#include <string>
#include <iostream>
#include <cctype>
#include <algorithm>//tramsform
using namespace std;
struct ENword
{
    string enword;
    vector<string> chexp;
    ENword(string ew):enword(ew) {};
    ENword() {};
};

bool operator<(const ENword& a, const ENword& b)
{
    string x=a.enword;
    string y=b.enword;
    transform(a.enword.begin(),a.enword.end(),x.begin(),::tolower);
    transform(b.enword.begin(),b.enword.end(),y.begin(),::tolower);
    if(x<y) return true;
    else return false;
}
bool operator>(const ENword& a, const ENword& b)
{
    string x=a.enword;
    string y=b.enword;
    transform(a.enword.begin(),a.enword.end(),x.begin(),::tolower);
    transform(b.enword.begin(),b.enword.end(),y.begin(),::tolower);
    if(x>y) return true;
    else return false;
}
           

二、查找算法Search.h

template <typename T>
class BinSearch1{
private:
    int num_cmp;
public:
    BinSearch1():num_cmp(){};
    int Search(vector<T>& A,int n, T k)//A store data from 1
    {
        int low =, high=n;
        int mid;
        while(low <= high)
        {
            mid = (low + high)/;
            if(++num_cmp && k < A[mid])high=mid-;
            else if(++num_cmp && k>A[mid])low=mid+;
            else return mid;
        }
        cout << "not found" << endl;
        return -;
    }
    void PrintCmpNum(){cout<<"BinSearch1 number of compare: "
    <<num_cmp<<endl;};
    void Clear(){num_cmp=;};
};
template <typename T>
class BinSearch2{
private:
    int num_cmp;
public:
    BinSearch2():num_cmp(){};
    int Search(vector<T>& A,int low, int high, T k)
    {
        int mid;
        if (low > high) {
            cout << "not found" << endl; return -;
        }
        else{
            mid = (low + high)/;
            if(++num_cmp && k < A[mid])return Search(A, low, mid-, k);
            else if(++num_cmp && k> A[mid])return Search(A,mid+,high,k);
            else return mid;
        }
    }
    void PrintCmpNum(){cout<<"BinSearch2 number of compare: "
    <<num_cmp<<endl;};
    void Clear(){num_cmp=;};
};
class BKDRHash{
private:
    int num_cmp;
public:
    BKDRHash():num_cmp(){};
    unsigned int basicHash(string eword, int arrLen);
    unsigned int setHash(string eword, int arrLen, const ENword* array);
    unsigned int search(string eword, int arrLen, const ENword* array);
    void PrintCmpNum() {
        cout << "hash search number of compare: "
            << num_cmp << endl;
    };
    void Clear(){num_cmp=;};
};
unsigned int BKDRHash::
basicHash(string eword, int arrLen)
{
    unsigned int seed = ;
    unsigned int hash = ;
    for(int i=; i<eword.size(); i++)
    {
        hash = hash*seed+eword[i];
    }
    return hash=hash%arrLen;
}
unsigned int BKDRHash::
setHash(string eword, int arrLen, const ENword* array)
{
    unsigned int hash=basicHash(eword, arrLen);
    for(int d=; d<arrLen; d++)
    {
        if(array[(hash+d)%arrLen].enword == "")return (hash+d)%arrLen;
    }
    cout<<"no space for this element: "<<eword<<endl;
    return arrLen;
}
unsigned int BKDRHash::
search(string eword, int arrLen, const ENword* array)
{
    int hash = basicHash(eword, arrLen);
    for(int d=; d<arrLen; d++)
    {
        ++num_cmp;
        if(array[(hash+d)%arrLen].enword == eword) return (hash+d)%arrLen;
    }
    cout<<eword <<" not found"<<endl;
    return arrLen;
}
           

三、主函数main.cpp

// prj.cpp: 定义控制台应用程序的入口点。
//

#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <vector>
#include <locale>
#include "ENword.h"
#include "Search.h"

int main()
{
    string line, cword;
    wstring fENwords = L"英语单词词库.txt";
    ifstream ifile;
    vector<ENword> ENwords;
    setlocale(LC_ALL, "Chinese-simplified");
    ifile.open(fENwords);
    while (getline(ifile, line))
    {
        ENword temp;
        istringstream record(line);
        record >> temp.enword;
        while (record >> cword)temp.chexp.push_back(cword);
        ENwords.push_back(temp);
    }
    ifile.close();
    ENword ew1("a"),ew2("Monday");
    BinSearch1<ENword> bs1;
    cout << ew1.enword << ", word position: " << bs1.Search(ENwords, ENwords.size(), ew1) <<endl;
    bs1.PrintCmpNum();
    bs1.Clear();

    cout << ew2.enword << ", word position: " << bs1.Search(ENwords, ENwords.size(), ew2) << endl;
    bs1.PrintCmpNum();
    bs1.Clear();
    cout << endl;

    BinSearch2<ENword> bs2;
    cout << ew1.enword << ", word position: " << bs2.Search(ENwords, , ENwords.size(), ew1) << endl;
    bs2.PrintCmpNum();
    bs2.Clear();

    cout << ew2.enword << ", word position: " << bs2.Search(ENwords, , ENwords.size(), ew2) << endl;
    bs2.PrintCmpNum();
    bs2.Clear();
    cout << endl;
    //BKDR hash
    ENword head[];
    BKDRHash hash;
    ifile.open(fENwords);
    setlocale(LC_ALL, "Chinese-simplified");
    while (getline(ifile, line))
    {
        ENword temp;
        istringstream record(line);
        record >> temp.enword;
        while (record >> cword)temp.chexp.push_back(cword);
        if (hash.setHash(temp.enword, , head) != )
        {
            int i = hash.setHash(temp.enword, , head);
            head[i] = temp;
        }
    }
    ifile.close();
    setlocale(LC_ALL, "C");
    int num;
    if (hash.search("Monday", , head) != )
    {
        num = hash.search("Monday", , head);//
        hash.PrintCmpNum();
        hash.Clear();
        cout << "Monday. position: " << num << endl;
        cout << head[num].enword << ": ";
        for (vector<string>::iterator iter = head[num].chexp.begin()
            ; iter != head[num].chexp.end();
            iter++)
            cout << *iter;
        cout << endl;
    }
    int num_space_used = ;
    int num_sapce_not_used = ;
    for (int i = ; i < ; i++)
    {
        if (head[i].enword == "")++num_sapce_not_used;
        else ++num_space_used;
    }
    cout <<"the number of total words: "<<ENwords.size()<<". array head size: 2000. "<<
        num_space_used<<" space used. "<<
        num_sapce_not_used<<" space not used." << endl;
    return ;
}
           

排序

作业内容

将《英语单词词库打乱.txt》中的文本用排序算法排序。

实验环境:vs2017,将main.cpp文件保存为gb2312编码格式

一、单词及中文释义构成的类文件ENword.h

#pragma once
#include <string>
using namespace std;
struct ENword
{
    string enword;//未知重写说明符.加上了头文件和命名空间
    vector<string> chexp;
};
bool operator<(const ENword& a, const ENword& b)
{
    if (a.enword < b.enword)return true;
    else return false;
}
bool operator<=(const ENword& a, const ENword& b)
{
    if (a.enword <= b.enword)return true;
    else return false;
}
           

二、排序算法

#pragma once
template <typename T>
void insert_sort(vector<T>& A)//插入排序
{
    int num_cmp=;
    int num_swap=;
    for (int i = ; i<A.size(); i++)
    {

        for (int j = i; (j>)//start: i=1
            && ++num_cmp
            && (A[j]<A[j - ]); //elems before j has sorted,
            //ensure whether tmp need sorted 
            j--)
            {
                swap(A[j],A[j - ]);
                num_swap++;
            }
    }
    cout<<"insert sort ";
    cout<<"the number of comparison: "<<num_cmp
    <<"; the number of swap: "<<num_swap<<endl;
}
template <typename T>
void selection_sort(vector<T>& A)//选择排序
{
    int num_cmp=;
    int num_swap=;
    for (int i=; i < A.size()-; i++)
    {
        int lowindex = i;
        for (int j = A.size() - ; j > i; j--)
        {
            num_cmp++;
            if (A[j] < A[lowindex])
                lowindex = j;
        }
        ++num_swap;
        swap(A[i], A[lowindex]);
    }
    cout<<"selection sort ";
    cout<<"the number of comparison: "<<num_cmp
    <<"; the number of swap: "<<num_swap<<endl; 
}
template <typename T>
void bubble_sort(vector<T>& A)//冒泡排序
{
    int num_cmp=;
    int num_swap=;
    for(int i=;i<A.size()-;i++)
        for(int j=A.size()-; j>i;j--)
        {
            num_cmp++;//the number of comparison is a constant:n(n-1)/2
            if(A[j]<A[j-])
            {
                swap(A[j],A[j-]);
                num_swap++;
            }
        }
    cout<<"bubble sort ";
    cout<<"the number of comparison: "<<num_cmp
    <<"; the number of swap: "<<num_swap<<endl; 
}
template <typename T>
void shell_insert_sort(vector<T>& A,int d,int& num_cmp,int& num_swap)
{

    for (int i = d; i<A.size(); i++)
    {

        for (int j = i; (j>=d)//start: i=1
            && ++num_cmp
            && (A[j]<A[j - d]); //elems before j has sorted,
            //ensure whether tmp need sorted 
            j-=d)
            {
                swap(A[j],A[j - d]);
                num_swap++;
            }
    }

}
template <typename T>
void shell_sort(vector<T>& A)//希尔排序
{
    int d=A.size()/;
    int num_cmp=;
    int num_swap=;
    while(d>=)
    {
        shell_insert_sort(A, d, num_cmp, num_swap);
        d = d / ;
    }
    cout<<"shell sort ";
    cout<<"the number of comparison: "<<num_cmp
    <<"; the number of swap: "<<num_swap<<endl; 
}
template <typename T>
class QuickSort//快速排序
{
private:
    int num_cmp;
    int num_swap;
    int SelectPivot(int left, int right);
    int Partition(vector<T>& A, int left,int right);
public:
    QuickSort():num_cmp(),num_swap() {};
    void Sort(vector<T>& A,int left, int right);
    void PrintData();
};
template <typename T>
void QuickSort<T>::
Sort(vector<T>& A, int left, int right)
{
    if(right<=left) return;//if there are only one elem in vector, return
    int pivot;
    //int pivot=SelectPivot(left,right);
    //swap(A[pivot],A[right]);
    pivot=Partition(A, left, right);
    if (pivot != )
    {
        Sort(A, left, pivot-);
        Sort(A, pivot + , right);
    }
    else
    {
        Sort(A,left,right);
    }
}
template <typename T>
int QuickSort<T>::
SelectPivot(int left, int right) 
{return (left+right)/;}
template <typename T>
int QuickSort<T>::
Partition(vector<T>& A, int left, int right)
{
    T tmp;
    int i=left;
    int j=right;
    tmp=A[j];//基准为A[j],从i开始,寻找比temp=A[j]大的数字填A[j]
    ++num_swap;
    while(i!=j)
    {
        //i向右,填充A[j]
        while(++num_cmp &&
        A[i]<tmp && j>i)//temp<A[i]时停止,则A[i]<temp时,i++
            i++;
        if(i<j)
        {
            ++num_swap;
            A[j]=A[i];
            j--;
        }
        //j向左,填充A[i]
        while(++num_cmp &&
        tmp<=A[j] && j>i)//A[j]<temp时停止,A[i]=A[j]
            j--;
        if(i<j)
        {
            ++num_swap;
            A[i]=A[j];
            i++;
        }
    }
    A[i]=tmp;
    return i;
}
template <typename T>
void QuickSort<T>::
PrintData()
{
    cout << "quick sort ";
    cout << "the number of comparison: " << num_cmp
        << "; the number of swap: " << num_swap << endl;
}
           

三、main.cpp

#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <locale>
#include "ENword.h"
#include "sort.h"
using namespace std;
int main()
{
    std::string line,cword;
    std::wstring fENwords = L"英语单词词库打乱.txt";
    std::wstring fENwords_sorted = L"英语单词排序.txt";
    std::ifstream ifile;
    std::ofstream ofile;
    std::vector<ENword> ENwords_origin,ENwords;
    setlocale(LC_ALL,"Chinese-simplified");
    ifile.open(fENwords);
    while (getline(ifile,line)) 
    {

        ENword temp;
        istringstream record(line);
        record >> temp.enword;
        while (record >> cword)temp.chexp.push_back(cword);
        ENwords_origin.push_back(temp);
    }
    ENwords = ENwords_origin;
    insert_sort(ENwords);
    ENwords = ENwords_origin;
    bubble_sort(ENwords);
    ENwords = ENwords_origin;
    selection_sort(ENwords);
    ENwords = ENwords_origin;
    shell_sort(ENwords);
    ENwords = ENwords_origin;
    QuickSort<ENword> s;
    s.Sort(ENwords, , ENwords.size()-);
    s.PrintData();
    setlocale(LC_ALL, "C");
    for (vector<ENword>::iterator iter = ENwords.begin();
        iter != ENwords.end(); iter++)
    {
        cout << iter->enword<<" ";
        for (vector<string>::iterator iter2 = iter->chexp.begin();
            iter2 != iter->chexp.end(); iter2++)
            cout << *iter2;
        cout << endl;
    }
    ifile.close();
    setlocale(LC_ALL, "Chinese-simplified");
    ofile.open(fENwords_sorted);
    setlocale(LC_ALL, "C");
    for (vector<ENword>::iterator iter = ENwords.begin();
        iter != ENwords.end(); iter++)
    {
        ofile << iter->enword << " ";
        for (vector<string>::iterator iter2 = iter->chexp.begin();
            iter2 != iter->chexp.end(); iter2++)
            ofile << *iter2;
        ofile<< endl;
    }
    ofile.close();
    return ;
}
           

哈夫曼编码

#include <iostream>
#include <queue>
#include <map>
#include <climits> // for CHAR_BIT,<climits>头文件定义的符号常量,256 bits
#include <iterator>
#include <algorithm>

const int UniqueSymbols =  << CHAR_BIT;
const char* SampleString = "huffman encoding";

typedef std::vector<bool> HuffCode;
typedef std::map<char, HuffCode> HuffCodeMap;
//Map是c++的一个标准容器,她提供了很好一对一的关系,在一些程序中建立一个map可以起到事半功倍的效果,总结了一些map基本简单实用的操作!
class INode
{
public:
    const int f;

    virtual ~INode() {}

protected:
    INode(int f) : f(f) {}
};

class InternalNode : public INode
{
public:
    INode *const left;
    INode *const right;

    InternalNode(INode* c0, INode* c1) : INode(c0->f + c1->f), left(c0), right(c1) {}
    ~InternalNode()
    {
        delete left;
        delete right;
    }
};

class LeafNode : public INode
{
public:
    const char c;

    LeafNode(int f, char c) : INode(f), c(c) {}
};

struct NodeCmp
{//()重载
    bool operator()(const INode* lhs, const INode* rhs) const { return lhs->f > rhs->f; }
};

INode* BuildTree(const int(&frequencies)[UniqueSymbols])
{
    //priority_queue 对于基本类型的使用方法相对简单。他的模板声明带有三个参数,priority_queue<Type, Container, Functional>
    //Type 为数据类型, Container 为保存数据的容器,Functional 为元素比较方式。
    std::priority_queue<INode*, std::vector<INode*>, NodeCmp> trees;

    for (int i = ; i < UniqueSymbols; ++i)
    {
        if (frequencies[i] != )
            trees.push(new LeafNode(frequencies[i], (char)i));
    }
    while (trees.size() > )
    {
        INode* childR = trees.top();
        trees.pop();

        INode* childL = trees.top();
        trees.pop();

        INode* parent = new InternalNode(childR, childL);
        trees.push(parent);
    }
    return trees.top();
}

void GenerateCodes(const INode* node, const HuffCode& prefix, HuffCodeMap& outCodes)
{
/* dynamic_cast<> 的使用方法。*/ 

/*作用:将一个基类对象指针(或引用)cast到继承类指针,dynamic_cast会根据基类指针是否真正指向继承类指针来做相应处理, 
       即会作一定的判断。 
       对指针进行dynamic_cast,失败返回null,成功返回正常cast后的对象指针; 
       对引用进行dynamic_cast,失败抛出一个异常,成功返回正常cast后的对象引用。 
*/
    if (const LeafNode* lf = dynamic_cast<const LeafNode*>(node))
    {
        outCodes[lf->c] = prefix;//<c,prefix>
    }
    else if (const InternalNode* in = dynamic_cast<const InternalNode*>(node))
    {
        HuffCode leftPrefix = prefix;
        leftPrefix.push_back(false);//左0
        GenerateCodes(in->left, leftPrefix, outCodes);

        HuffCode rightPrefix = prefix;
        rightPrefix.push_back(true);//右1
        GenerateCodes(in->right, rightPrefix, outCodes);
    }
}

int main()
{
    // Build frequency table
    int frequencies[UniqueSymbols] = {  };
    const char* ptr = SampleString;
    while (*ptr != '\0')
        ++frequencies[*ptr++];

    INode* root = BuildTree(frequencies);

    HuffCodeMap codes;
    GenerateCodes(root, HuffCode(), codes);
    delete root;

    for (HuffCodeMap::const_iterator it = codes.begin(); it != codes.end(); ++it)
    //const_iterator,传递过来一个const类型的容器,只能用const_iterator来遍历。
    {
        //map容器的迭代器里面 有个first 和 second
        std::cout << it->first << " ";//<char, HuffCode>,char
        std::copy(it->second.begin(), it->second.end(),
            std::ostream_iterator<bool>(std::cout));
            //Ostream iterators are output iterators that write sequentially to an output stream (such as cout).
            //copy 将数值复制到输出流中,参数依次是开始、结束,输出流
        std::cout << std::endl;
    }
    return ;
}
           

继续阅读