org.apache.commons.text.similarity.EditDistanceFrom Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of commons-text Show documentation
Show all versions of commons-text Show documentation
Apache Commons Text is a library focused on algorithms working on strings.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.similarity;
/**
*
* This stores a {@link EditDistance} implementation and a {@link CharSequence} "left" string.
* The {@link #apply(CharSequence right)} method accepts the "right" string and invokes the
* comparison function for the pair of strings.
*
*
*
* The following is an example which finds the most similar string:
*
*
* EditDistance<Integer> editDistance = new LevenshteinDistance();
* String target = "Apache";
* EditDistanceFrom<Integer> editDistanceFrom =
* new EditDistanceFrom<Integer>(editDistance, target);
* String mostSimilar = null;
* Integer shortestDistance = null;
*
* for (String test : new String[] { "Appaloosa", "a patchy", "apple" }) {
* Integer distance = editDistanceFrom.apply(test);
* if (shortestDistance == null || distance < shortestDistance) {
* shortestDistance = distance;
* mostSimilar = test;
* }
* }
*
* System.out.println("The string most similar to \"" + target + "\" "
* + "is \"" + mostSimilar + "\" because "
* + "its distance is only " + shortestDistance + ".");
*
*
* @param This is the type of similarity score used by the EditDistance function.
* @since 1.0
*/
public class EditDistanceFrom {
/**
* Edit distance.
*/
private final EditDistance editDistance;
/**
* Left parameter used in distance function.
*/
private final CharSequence left;
/**
* This accepts the edit distance implementation and the "left" string.
*
* @param editDistance This may not be null.
* @param left This may be null here,
* but the EditDistance#compare(CharSequence left, CharSequence right)
* implementation may not accept nulls.
*/
public EditDistanceFrom(final EditDistance editDistance, final CharSequence left) {
if (editDistance == null) {
throw new IllegalArgumentException("The edit distance may not be null.");
}
this.editDistance = editDistance;
this.left = left;
}
/**
*
* This compares "left" field against the "right" parameter
* using the "edit distance" implementation.
*
*
* @param right the second CharSequence
* @return the similarity score between two CharSequences
*/
public R apply(final CharSequence right) {
return editDistance.apply(left, right);
}
/**
* Gets the left parameter.
*
* @return the left parameter
*/
public CharSequence getLeft() {
return left;
}
/**
* Gets the edit distance.
*
* @return the edit distance
*/
public EditDistance getEditDistance() {
return editDistance;
}
}