天天看点

Levenshtein字符串编辑距离算法Levenshtein(莱文斯坦)编辑距离算法实现

Levenshtein(莱文斯坦)编辑距离算法实现

1.C++版本(含三个benchmark)

#include <iostream>
#include <string>
#include <vector>

using namespace std;

int minimum(int first, int second, int third)
{
    int former = ;
    if (first < second)
    {
        former = first;
    }
    else
    {
        former = second;
    }
    if (former < third)
    {
        return former;
    }
    else
    {
        return third;
    }
}

int getLevenshteinDistance(string firstStr, string secondStr)
{
    int firstStrLen = firstStr.length();
    int secondStrLen = secondStr.length();
    vector<vector<int>> levenshteinDistanceTable;
    for (int firstIndex = ; firstIndex <= firstStrLen; firstIndex++)
    {
        vector<int> levenshteinRow(secondStrLen+);
        levenshteinDistanceTable.push_back(levenshteinRow);
        levenshteinDistanceTable[firstIndex][] = firstIndex;
    }
    for (int secondIndex = ; secondIndex <= secondStrLen; secondIndex++)
    {
        levenshteinDistanceTable[][secondIndex] = secondIndex;
    }

    int lastCharCost = ;
    for (int firstIndex = ; firstIndex <= firstStrLen; firstIndex++)
    {
        for (int secondIndex = ; secondIndex <= secondStrLen; secondIndex++)
        {
            if (firstStr[firstIndex-] == secondStr[secondIndex-])
            {
                lastCharCost = ;
            }
            else
            {
                lastCharCost = ;
            }
            levenshteinDistanceTable[firstIndex][secondIndex] = minimum(levenshteinDistanceTable[firstIndex - ][secondIndex] + , levenshteinDistanceTable[firstIndex][secondIndex - ] + , levenshteinDistanceTable[firstIndex - ][secondIndex - ] + lastCharCost);
        }
    }
    return levenshteinDistanceTable[firstStrLen][secondStrLen];
}

int main(int argc, char * * argv, char * * env)
{
    string firstStr1 = "sitting";
    string secondStr1 = "kitten";
    cout << "levenstein distance of " << firstStr1 << " and " << secondStr1 << " is:" << getLevenshteinDistance(firstStr1, secondStr1) << endl;
    string firstStr2 = "Saturday";
    string secondStr2 = "Sunday";
    cout << "levenstein distance of " << firstStr2 << " and " << secondStr2 << " is:" << getLevenshteinDistance(firstStr2, secondStr2) << endl;
    string firstStr3 = "levenshtein";
    string secondStr3 = "meilenstein";
    cout << "levenstein distance of " << firstStr3 << " and " << secondStr3 << " is:" << getLevenshteinDistance(firstStr3, secondStr3) << endl;
    char ch;
    cin >> ch;
    return ;
}
           

2.awk版本1(表格法)

function minimum(first, second, third)
{
    if(first < second)
    {
        former = first;
    }
    else
    {
        former = second;
    }
    if(former < third)
    {
        return former;
    }
    else
    {
        return third;
    }
}

function getLevenshteinDistance(firstStr, secondStr)
{
    firstStrLen = length(firstStr);
    secondStrLen = length(secondStr);
    for(secondIndex = ; secondIndex <= secondStrLen; secondIndex++)
    {
        levenshteinDistanceTable[, secondIndex] = secondIndex;
    }
    for(firstIndex = ; firstIndex <= firstStrLen; firstIndex++)
    {
        levenshteinDistanceTable[firstIndex, ] = firstIndex;
    }
    for(firstIndex = ; firstIndex <= firstStrLen; firstIndex++)
    {
        for(secondIndex = ; secondIndex <= secondStrLen; secondIndex++)
        {
            if(match(substr(firstStr, firstIndex, ), substr(secondStr, secondIndex, )) > )
            {
                lastCharCost = ;
            }
            else
            {
                lastCharCost = ;
            }
            levenshteinDistanceTable[firstIndex, secondIndex] = minimum(levenshteinDistanceTable[firstIndex-, secondIndex] + , levenshteinDistanceTable[firstIndex, secondIndex-] + , levenshteinDistanceTable[firstIndex-, secondIndex-] + lastCharCost);
        }
    }
    return levenshteinDistanceTable[firstStrLen, secondStrLen];
}
           

2.awk版本1(递归法,存在重叠子问题重复计算问题,性能较低)

function minimum(first, second, third)
{
    if(first < second)
    {
        former = first;
    }
    else
    {
        former = second;
    }
    if(former < third)
    {
        return former;
    }
    else
    {
        return third;
    }
}

function getLevenshteinDistance(firstStr, firstLen, secondStr, secondLen)
{
    lastCost = ;
    if(firstLen == )
    {
        return secondLen;
    }
    if(secondLen == )
    {
        return firstLen;
    }
    if(match(substr(firstStr, firstLen, ), substr(secondStr, secondLen, )) > )
    {
        lastCost = ;
    }
    else
    {
        lastCost = ;
    }
    first = getLevenshteinDistance(firstStr, firstLen-, secondStr, secondLen) + ;
    second = getLevenshteinDistance(firstStr, firstLen, secondStr, secondLen-) + ;
    third = getLevenshteinDistance(firstStr, firstLen-, secondStr, secondLen-) + lastCost;
    return minimum(first, second, third);
}
           

继续阅读