
com.intuit.fuzzymatcher.domain.Document Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of fuzzy-matcher Show documentation
Show all versions of fuzzy-matcher Show documentation
A java library to determine probability of objects being similar
package com.intuit.fuzzymatcher.domain;
import com.intuit.fuzzymatcher.function.ScoringFunction;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
*
* The primary object for matching. The required attribute is a unique key and elements
*
* Configurable attributes
*
* - elements - A set of Element object to match against
* - threshold - Value above which documents are considered a match, default 0.5
*
*/
public class Document implements Matchable {
private Document(String key, Set elements, double threshold) {
this.key = key;
this.elements = elements;
this.threshold = threshold;
}
private String key;
private Set elements;
private Set preProcessedElement;
private double threshold;
private Boolean source;
private static final BiFunction, Score> DEFAULT_DOCUMENT_SCORING = ScoringFunction.getExponentialWeightedAverageScore();
public String getKey() {
return key;
}
public Set getElements() {
return elements;
}
public Set getPreProcessedElement() {
if (this.preProcessedElement == null) {
this.preProcessedElement = getDistinctNonEmptyElements().collect(Collectors.toSet());
}
return preProcessedElement;
}
public double getThreshold() {
return threshold;
}
public Stream getDistinctElements() {
return this.elements.stream()
.filter(distinctByKey(Element::getPreprocessedValueWithType));
}
public Stream getDistinctNonEmptyElements() {
return getDistinctElements()
.filter(m -> {
if (m.getPreProcessedValue() instanceof String) {
return !StringUtils.isEmpty(m.getPreProcessedValue().toString());
} else
return m.getPreProcessedValue() != null;
});
}
private static Predicate distinctByKey(Function super T, ?> keyExtractor) {
Set
© 2015 - 2025 Weber Informatics LLC | Privacy Policy