All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.intuit.fuzzymatcher.domain.Element Maven / Gradle / Ivy

There is a newer version: 1.2.1
Show newest version
package com.intuit.fuzzymatcher.domain;

import com.intuit.fuzzymatcher.function.ScoringFunction;
import org.apache.commons.lang3.StringUtils;

import java.util.AbstractMap;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Stream;

import static com.intuit.fuzzymatcher.function.PreProcessFunction.toLowerCase;
import static com.intuit.fuzzymatcher.function.PreProcessFunction.trim;

/**
 * 

* This class represent the string "value" against which match are run. *

* Configurable attributes *

    *
  • type - The ElementType for the value. This determines the functions applied at different steps of the match
  • *
  • weight - Used in scoring function to increase the Document score for an Element. Default is 1.0 for all elements
  • *
  • threshold - Value above which elements are considered a match, default 0.3
  • *
  • scoringFunction - Function used to aggregate scores of matching elements, default ExponentialWeightedAverageScore
  • *
  • preProcessFunction - Function to pre-process the value. If this is not set, the function defined in ElementType is used
  • *
  • tokenizerFunction - Function to break values into tokens. If this is not set, the function defined in ElementType is used
  • *
  • similarityMatchFunction - Function to compare tokens. If this is not set, the function defined in ElementType is used
  • *
*/ public class Element implements Matchable { private String value; private double weight; private double threshold; private ElementType type; private Document document; private Function preProcessFunction; private Function> tokenizerFunction; private BiFunction similarityMatchFunction; private Function scoringFunction; private String preProcessedValue; private static final Function DEFAULT_ELEMENT_SCORING = ScoringFunction.getAverageScore(); public Element(ElementType type, String value, double weight, double threshold, Function preProcessFunction, Function> tokenizerFunction, BiFunction similarityMatchFunction, Function scoringFunction) { this.weight = weight; this.type = type; this.value = value; this.threshold = threshold; this.preProcessFunction = preProcessFunction; this.tokenizerFunction = tokenizerFunction; this.similarityMatchFunction = similarityMatchFunction; this.scoringFunction = scoringFunction; } public ElementType getType() { return type; } public String getValue() { return value; } @Override public double getWeight() { return weight; } public double getThreshold() { return threshold; } public Document getDocument() { return document; } public void setDocument(Document document) { this.document = document; } public void setPreProcessedValue(String preProcessedValue) { this.preProcessedValue = preProcessedValue; } public Function getPreProcessFunction() { return this.preProcessFunction != null ? this.preProcessFunction : this.type.getPreProcessFunction(); } public String getPreProcessedValue() { if (this.preProcessedValue == null) { setPreProcessedValue(getPreProcessFunction().andThen(trim()).andThen(toLowerCase()).apply(this.value)); } return this.preProcessedValue; } public AbstractMap.SimpleEntry getPreprocessedValueWithType() { return new AbstractMap.SimpleEntry(this.getType(), this.getPreProcessedValue()); } public Function> getTokenizerFunction() { return this.tokenizerFunction != null ? this.tokenizerFunction : this.type.getTokenizerFunction(); } public Stream getTokens() { return getTokenizerFunction().apply(this).distinct(); } public BiFunction getSimilarityMatchFunction() { return this.similarityMatchFunction != null ? this.similarityMatchFunction : this.type.getSimilarityMatchFunction(); } @Override public long getChildCount() { return getTokens().count(); } @Override public long getEmptyChildCount() { return getTokens().filter(token -> StringUtils.isEmpty(token.getValue())).count(); } @Override public Function getScoringFunction() { return this.scoringFunction != null ? this.scoringFunction : DEFAULT_ELEMENT_SCORING; } public static class Builder { private ElementType type; private String value; private double weight = 1.0; private double threshold = 0.3; private Function preProcessFunction; private Function> tokenizerFunction; private BiFunction similarityMatchFunction; private Function scoringFunction; public Builder setType(ElementType type) { this.type = type; return this; } public Builder setValue(String value) { this.value = value; return this; } public Builder setWeight(double weight) { this.weight = weight; return this; } public Builder setThreshold(double threshold) { this.threshold = threshold; return this; } public Builder setPreProcessingFunction(Function preProcessingFunction) { this.preProcessFunction = preProcessingFunction; return this; } public Builder setTokenizerFunction(Function> tokenizerFunction) { this.tokenizerFunction = tokenizerFunction; return this; } public Builder setSimilarityMatchFunction(BiFunction similarityMatchFunction) { this.similarityMatchFunction = similarityMatchFunction; return this; } public Builder setScoringFunction(Function scoringFunction) { this.scoringFunction = scoringFunction; return this; } public Element createElement() { return new Element(type, value, weight, threshold, preProcessFunction, tokenizerFunction, similarityMatchFunction, scoringFunction); } } @Override public String toString() { return "{" + "'" + value + '\'' + '}'; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Element element = (Element) o; if (!value.equals(element.value)) return false; if (type != element.type) return false; return !(document != null ? !document.equals(element.document) : element.document != null); } @Override public int hashCode() { int result = value.hashCode(); result = 31 * result + type.hashCode(); result = 31 * result + (document != null ? document.hashCode() : 0); return result; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy