
com.intuit.fuzzymatcher.domain.Document Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of fuzzy-matcher Show documentation
Show all versions of fuzzy-matcher Show documentation
A java library to determine probability of objects being similar
package com.intuit.fuzzymatcher.domain;
import com.intuit.fuzzymatcher.function.ScoringFunction;
import org.apache.commons.lang3.StringUtils;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
*
* The primary object for matching. The required attribute is a unique key and elements
*
* Configurable attributes
*
* - elements - A set of Element object to match against
* - threshold - Value above which documents are considered a match, default 0.5
* - scoringFunction - Function used to aggregate scores of matching elements, default ExponentialWeightedAverageScore
*
*/
public class Document implements Matchable {
private Document(String key, Set elements, double threshold, Function scoringFunction) {
this.key = key;
this.elements = elements;
this.threshold = threshold;
this.scoringFunction = scoringFunction;
}
private String key;
private Set elements;
private double threshold;
private Function scoringFunction;
private Boolean source;
private static final Function DEFAULT_DOCUMENT_SCORING = ScoringFunction.getExponentialWeightedAverageScore();
public String getKey() {
return key;
}
public Set getElements() {
return elements;
}
public double getThreshold() {
return threshold;
}
public Stream getDistinctElements() {
return this.elements.stream()
.filter(distinctByKey(Element::getPreprocessedValueWithType));
}
public Stream getDistinctNonEmptyElements() {
return getDistinctElements()
.filter(m -> !StringUtils.isEmpty(m.getPreProcessedValue()));
}
private static Predicate distinctByKey(Function super T, ?> keyExtractor) {
Set
© 2015 - 2025 Weber Informatics LLC | Privacy Policy