April 26, 2012

Comparer Algorithm

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Text;

/// 
/// Summary description for ComparerWorker
/// 
public class ComparerWorker
{
    public string Source { get; set; }
    public string Target { get; set; }
    public List Result { get; set; }

    public const string DifferentColor = "#FF2A2A";
    public const string NotFoundSourceColor = "#FF2A2A";
    public const string NotFoundTargetColor = "#FF2A2A";

    public ComparerWorker()
    {
        Source = string.Empty;
        Target = string.Empty;
        Result = new List();
    }

    public void DoWork()
    {
        Result.Clear();
        string[] sourceLines = Source.Split(Environment.NewLine.ToCharArray());
        string[] targetLines = Target.Split(Environment.NewLine.ToCharArray());

        int limit = targetLines.Length;
        if (sourceLines.Length > limit) limit = sourceLines.Length;

        int i2 = 0;
        for (int i = 0; i < limit || i2 < limit; )
        {
            string s = string.Empty;
            try { s = sourceLines[i]; }
            catch { }

            string t = string.Empty;
            try { t = targetLines[i2]; }
            catch { }

            
            try
            {
                LineCompare c = GetLineDifference(s, t);
                if (c.DiffType == DifferenceType.Same || c.DiffType == DifferenceType.Different)
                {
                    Result.Add(c);
                    i = i + 2;
                    i2 = i2 + 2;
                }
                else
                {
                    string s2 = string.Empty;
                    try { s2 = sourceLines[i + 2]; }
                    catch { }

                    string t2 = string.Empty;
                    try { t2 = targetLines[i2 + 2]; }
                    catch { }

                    //Compare target line with next line from source line
                    LineCompare c1 = GetLineDifference(s2, t);

                    //Compare source list with next line from target line
                    LineCompare c2 = GetLineDifference(s, t2);

                    if ((c1.DiffType == DifferenceType.Same || c1.DiffType == DifferenceType.Different) 
                        && (c2.DiffType == DifferenceType.Same || c2.DiffType == DifferenceType.Different) ||
                        (c1.DiffType == DifferenceType.TotalMismatch && c2.DiffType == DifferenceType.TotalMismatch))
                    {
                        //Lines are interchanged probably so make one entry for each line
                        LineCompare l1 = new LineCompare();
                        l1.SourceLine = c.SourceLine;
                        l1.SourceLineHtml = c.SourceLineHtml;
                        l1.DiffType = DifferenceType.NotFoundTarget;
                        Result.Add(l1);

                        LineCompare l2 = new LineCompare();
                        l2.TargetLine = c.TargetLine;
                        l2.TargetLineHtml = c.TargetLineHtml;
                        l2.DiffType = DifferenceType.NotFoundSource;
                        Result.Add(l2);

                        i = i + 2;
                        i2 = i2 + 2;
                    }
                    else if ((c1.DiffType == DifferenceType.Same || c1.DiffType == DifferenceType.Different) && c2.DiffType == DifferenceType.TotalMismatch)
                    {
                        //source line is deleted probably so add it in result
                        LineCompare l1 = new LineCompare();
                        l1.SourceLine = c.SourceLine;
                        l1.SourceLineHtml = c.SourceLineHtml;
                        l1.DiffType = DifferenceType.NotFoundTarget;
                        Result.Add(l1);
                        i = i + 2;
                    }
                    else if (c1.DiffType == DifferenceType.TotalMismatch && (c2.DiffType == DifferenceType.Same || c2.DiffType == DifferenceType.Different))
                    {
                        //target line is inserted probably so add it in result
                        LineCompare l2 = new LineCompare();
                        l2.TargetLine = c.TargetLine;
                        l2.TargetLineHtml = c.TargetLineHtml;
                        l2.DiffType = DifferenceType.NotFoundSource;
                        Result.Add(l2);

                        i2 = i2 + 2;
                    }
                }
            }
            catch { }
        }

        return;
    }

    public LineCompare GetLineDifference(string sourceLine, string targetLine)
    {
        LineCompare result = new LineCompare();
        result.SourceLine = sourceLine;
        result.TargetLine = targetLine;
        result.DiffType = DifferenceType.TotalMismatch;

        if (string.Compare(sourceLine.Replace(" ", ""), targetLine.Replace(" ", ""), true) == 0)
        {
            result.SourceLineHtml = string.Format("
{0}
", HttpUtility.HtmlEncode(sourceLine)); result.TargetLineHtml = string.Format("
{0}
", HttpUtility.HtmlEncode(targetLine)); result.DiffType = DifferenceType.Same; return result; } else { int lresult2 = LevenshteinDistance.Compute(sourceLine, targetLine); int length = sourceLine.Length; if (targetLine.Length > length) length = targetLine.Length; result.SourceLineHtml = string.Format("
{0}
", HttpUtility.HtmlEncode(sourceLine), DifferentColor); result.TargetLineHtml = string.Format("
{0}
", HttpUtility.HtmlEncode(targetLine), DifferentColor); if (((lresult2 * 100) / length) <= 50) { result.DiffType = DifferenceType.Different; } else { result.DiffType = DifferenceType.TotalMismatch; } return result; } } } public class LineCompare { public string SourceLine { get; set; } public string TargetLine { get; set; } public DifferenceType DiffType { get; set; } public string SourceLineHtml { get; set; } public string TargetLineHtml { get; set; } } public enum DifferenceType { None, Same, Different, TotalMismatch, NotFoundSource, NotFoundTarget } public static class LevenshteinDistance { /// /// Compute the distance between two strings. /// public static int Compute(string s, string t) { int n = s.Length; int m = t.Length; int[,] d = new int[n + 1, m + 1]; // Step 1 if (n == 0) { return m; } if (m == 0) { return n; } // Step 2 for (int i = 0; i <= n; d[i, 0] = i++) { } for (int j = 0; j <= m; d[0, j] = j++) { } // Step 3 for (int i = 1; i <= n; i++) { //Step 4 for (int j = 1; j <= m; j++) { // Step 5 int cost = (t[j - 1] == s[i - 1]) ? 0 : 1; // Step 6 d[i, j] = Math.Min( Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1), d[i - 1, j - 1] + cost); } } // Step 7 return d[n, m]; } }

0 Comments:

Post a Comment

Subscribe to Post Comments [Atom]

<< Home