Damerau–Levenshtein distance in SQL

Couple of years ago I needed to implement a kind of fuzzy matching algorithm in SQL Server. Today I have just found my code SmileI implemented it in SQL 2005 and it works on newer versions as well. Code is based on the Damerau–Levenshtein distance algorithm. I was using a SQL CLR user defined scalar function: inputs are 2 strings and returns a number between 0 and 1. If it is more close to 1 it means the two input strings are closer to each other. Of course SSIS Fuzzy Lookup Transformation may work better Winking smile.

Here is the code I was using;

using System;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;

public partial class UserDefinedFunctions
{
    // This function is also exposed to the database.  It accepts two names,
    // performs a Damerau-Levenshtein edit distance calculation on them and
    // calculates a similarity score. this is the fuzzy matching.
    [Microsoft.SqlServer.Server.SqlFunction]
    [return: Microsoft.SqlServer.Server.SqlFacet(Precision = 5, Scale = 4)]
    public static SqlDecimal FuzzyMatching(SqlString string1, SqlString string2)
    {
        SqlDecimal result;
        // Special case:  Either string is NULL, result is NULL
        if (string1 == SqlString.Null || string2 == SqlString.Null)
            result = SqlDecimal.Null;
        else
        {
            // Special case:  Either string is empty string, result is 0.0
            int strlen1 = string1.Value.Length;
            int strlen2 = string2.Value.Length;
            if (strlen1 == 0 || strlen2 == 0)
                return new SqlDecimal(0.0);
            SqlInt32 distance = CalculateDamLev(string1, string2);
            result = new SqlDecimal(1.0 - (double)distance / Math.Max(strlen1, strlen2));
        }
        return result;
    }

    // Accepts 3 numbers, returns the minimum
    private static int Min3(int a, int b, int c)
    {
        return Math.Min(a, Math.Min(b, c));
    }

    // This private function calculates the Damerau-Levenshtein edit distance
    private static SqlInt32 CalculateDamLev(SqlString string1, SqlString string2)
    {
        SqlInt32 result;
        // Special case:  If either string is NULL, the result is NULL
        if (string1 == SqlString.Null || string2 == SqlString.Null)
            result = SqlInt32.Null;
        {
            // Special case:  If either string is length 0, the result is the
            // length of the other string
            int strlen1 = string1.Value.Length;
            int strlen2 = string2.Value.Length;
            if (strlen1 == 0 || strlen2 == 0)
                result = new SqlInt32(Math.Max(strlen1, strlen2));
            else
            {
                // d is a table with lenStr1+1 rows and lenStr2+1 columns
                int[,] calarray = new int[strlen1 + 1, strlen2 + 1];

                // initialize the array
                for (int i = 0; i < strlen1; i++)
                    calarray[i, 0] = i;
                for (int i = 0; i < strlen2; i++)
                    calarray[0, i] = i;

                // loop through the array
                for (int i = 1; i <= strlen1; i++)
                    for (int j = 1; j <= strlen2; j++)
                    {
                        int cost = 0;
                        cost = (char.ToUpper(string1.Value[i - 1]) == char.ToUpper(string2.Value[j - 1])) ? 0 : 1;
                        calarray[i, j] = Min3(
                                calarray[i - 1, j] + 1,     // deletion
                                calarray[i, j - 1] + 1,     // insertion
                                calarray[i - 1, j - 1] + cost   // substitution
                            );
                        if (i > 1
                            && j > 1
                            && char.ToUpper(string1.Value[i - 1]) == char.ToUpper(string2.Value[j - 2])
                            && char.ToUpper(string1.Value[i - 2]) == char.ToUpper(string2.Value[j - 1]))
                            calarray[i, j] = Math.Min(
                                    calarray[i, j],
                                    calarray[i - 2, j - 2] + cost   // transposition
                                 );
                    }
                result = new SqlInt32(calarray[strlen1, strlen2]);
            }
        }
        return result;
    }
};

 

I may used some sources from the internet that time, but I do not remember. However if you feel that part of your code is shown here and would like to have the credit, please drop me a line through the contact form.

Add comment