All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.spell.NGramDistance Maven / Gradle / Ivy

There is a newer version: 3.6.2
Show newest version
package org.apache.lucene.search.spell;

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
 * N-Gram version of edit distance based on paper by Grzegorz Kondrak, 
 * "N-gram similarity and distance". Proceedings of the Twelfth International 
 * Conference on String Processing and Information Retrieval (SPIRE 2005), pp. 115-126, 
 * Buenos Aires, Argentina, November 2005. 
 * http://www.cs.ualberta.ca/~kondrak/papers/spire05.pdf
 * 
 * This implementation uses the position-based optimization to compute partial
 * matches of n-gram sub-strings and adds a null-character prefix of size n-1 
 * so that the first character is contained in the same number of n-grams as 
 * a middle character.  Null-character prefix matches are discounted so that 
 * strings with no matching characters will return a distance of 0.
 * 
 */
public class NGramDistance implements StringDistance {

  private int n;
  
  /**
   * Creates an N-Gram distance measure using n-grams of the specified size.
   * @param size The size of the n-gram to be used to compute the string distance.
   */
  public NGramDistance(int size) {
    this.n = size;
  }
  
  /**
   * Creates an N-Gram distance measure using n-grams of size 2.
   */
  public NGramDistance() {
    this(2);
  }
  
  public float getDistance(String source, String target) {
    final int sl = source.length();
    final int tl = target.length();
    
    if (sl == 0 || tl == 0) {
      if (sl == tl) {
        return 1;
      }
      else {
        return 0;
      }
    }

    int cost = 0;
    if (sl < n || tl < n) {
      for (int i=0,ni=Math.min(sl,tl);i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy