Buscar contenidos

lunes, 26 de marzo de 2018

Cálculo porcentaje de similitud entre Strings


C#: Calculating Percentage Similarity of 2 strings

double CalculateSimilarity(string source, string target)
{
    if ((source == null) || (target == null)) return 0.0;
    if ((source.Length == 0) || (target.Length == 0)) return 0.0;
    if (source == target) return 1.0;
   
    int stepsToSame = ComputeLevenshteinDistance(source, target);
    return (1.0 - ((double)stepsToSame / (double)Math.Max(source.Length, target.Length)));
}
int ComputeLevenshteinDistance(string source, string target)
{
    if ((source == null) || (target == null)) return 0;
    if ((source.Length == 0) || (target.Length == 0)) return 0;
    if (source == target) return source.Length;
   
    int sourceWordCount = source.Length;
    int targetWordCount = target.Length;
   
    // Step 1
    if (sourceWordCount == 0)
        return targetWordCount;
   
    if (targetWordCount == 0)
        return sourceWordCount;
   
    int[,] distance = new int[sourceWordCount + 1, targetWordCount + 1];
   
    // Step 2
    for (int i = 0; i <= sourceWordCount; distance[i, 0] = i++);
    for (int j = 0; j <= targetWordCount; distance[0, j] = j++);
   
    for (int i = 1; i <= sourceWordCount; i++)
    {
        for (int j = 1; j <= targetWordCount; j++)
        {
            // Step 3
            int cost = (target[j - 1] == source[i - 1]) ? 0 : 1;
   
            // Step 4
            distance[i, j] = Math.Min(Math.Min(distance[i - 1, j] + 1, distance[i, j - 1] + 1), distance[i - 1, j - 1] + cost);
        }
    }
   
    return distance[sourceWordCount, targetWordCount];
}

No hay comentarios:

Publicar un comentario