All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.EditDistance Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.

There is a newer version: 3.8.6
Show newest version
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    AbstractStringDistanceFunction.java
 *    Copyright (C) 2008 Bruno Woltzenlogel Paleo (http://www.logic.at/people/bruno/ ; http://bruno-wp.blogspot.com/)
 *
 */

package weka.core;

/**
 * Computes the Levenshtein edit distance between two strings.
 *
 * @author Bruno Woltzenlogel Paleo
 * @version $Revision: 1.2 $
 */
public class EditDistance
    extends AbstractStringDistanceFunction {

  public EditDistance() {
  }

  public EditDistance(Instances data) {
    super(data);
  }

  /**
   * Calculates the distance (Levenshtein Edit Distance) between two strings
   *
   * @param stringA the first string
   * @param stringB the second string
   * @return the distance between the two given strings
   */
  double stringDistance(String stringA, String stringB) {
    int lengthA = stringA.length();
    int lengthB = stringB.length();

    double[][] distanceMatrix = new double[lengthA + 1][lengthB + 1];

    for (int i = 0; i <= lengthA; i++) {
      distanceMatrix[i][0] = i;
    }

    for (int j = 1; j <= lengthB; j++) {
      distanceMatrix[0][j] = j;
    }

    for (int i = 1; i <= lengthA; i++) {
      for (int j = 1; j <= lengthB; j++) {
        if (stringA.charAt(i - 1) == stringB.charAt(j - 1)) {
          distanceMatrix[i][j] = distanceMatrix[i - 1][j - 1];
        }
        else {
          distanceMatrix[i][j] = 1 + Math.min(distanceMatrix[i - 1][j],
                                              Math.min(distanceMatrix[i][j - 1],
                                                       distanceMatrix[i - 1][j - 1]));
        }
      }
    }
    return distanceMatrix[lengthA][lengthB];
  }

    
  /**
   * Returns a string describing this object.
   * 
   * @return 		a description of the evaluator suitable for
   * 			displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return 
      "Implementing Levenshtein distance function.\n\n"
      + "One object defines not one distance but the data model in which "
      + "the distances between objects of that data model can be computed.\n\n"
      + "Attention: For efficiency reasons the use of consistency checks "
      + "(like are the data models of the two instances exactly the same), "
      + "is low.\n\n"
      + "For more information, see: http://en.wikipedia.org/wiki/Levenshtein_distance\n\n";
  }  
  
  /**
   * Returns the revision string.
   * 
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.2 $");
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy