All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.intuit.fuzzymatcher.domain.Document Maven / Gradle / Ivy

There is a newer version: 1.2.1
Show newest version
package com.intuit.fuzzymatcher.domain;

import com.intuit.fuzzymatcher.function.ScoringFunction;
import org.apache.commons.lang3.StringUtils;


import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * 

* The primary object for matching. The required attribute is a unique key and elements *

* Configurable attributes *

    *
  • elements - A set of Element object to match against
  • *
  • threshold - Value above which documents are considered a match, default 0.5
  • *
  • scoringFunction - Function used to aggregate scores of matching elements, default ExponentialWeightedAverageScore
  • *
*/ public class Document implements Matchable { private Document(String key, Set elements, double threshold, Function scoringFunction) { this.key = key; this.elements = elements; this.threshold = threshold; this.scoringFunction = scoringFunction; } private String key; private Set elements; private double threshold; private Function scoringFunction; private Boolean source; private static final Function DEFAULT_DOCUMENT_SCORING = ScoringFunction.getExponentialWeightedAverageScore(); public String getKey() { return key; } public Set getElements() { return elements; } public double getThreshold() { return threshold; } public Stream getDistinctElements() { return this.elements.stream() .filter(distinctByKey(Element::getPreprocessedValueWithType)); } public Stream getDistinctNonEmptyElements() { return getDistinctElements() .filter(m -> !StringUtils.isEmpty(m.getPreProcessedValue())); } private static Predicate distinctByKey(Function keyExtractor) { Set seen = ConcurrentHashMap.newKeySet(); return t -> seen.add(keyExtractor.apply(t)); } @Override public long getChildCount() { return this.getDistinctElements().count(); } @Override public long getEmptyChildCount() { return this.elements.stream().filter(element -> StringUtils.isEmpty(element.getPreProcessedValue())).count(); } @Override public Function getScoringFunction() { return this.scoringFunction != null ? this.scoringFunction : DEFAULT_DOCUMENT_SCORING; } @Override public double getWeight() { return 1.0; } public Boolean isSource() { return source; } public void setSource(Boolean source) { this.source = source; } public static class Builder { private String key; private Set elements; private double threshold = 0.5; private Function scoringFunction; public Builder(String key) { this.key = key; } public Builder setThreshold(double threshold) { this.threshold = threshold; return this; } public Builder addElement(Element element) { if (this.elements == null || this.elements.isEmpty()) { this.elements = new HashSet<>(); } this.elements.add(element); return this; } public Builder setScoringFunction(Function scoringFunction) { this.scoringFunction = scoringFunction; return this; } public Document createDocument() { Document doc = new Document(key, elements, threshold, scoringFunction); doc.elements.stream().forEach(element -> element.setDocument(doc)); return doc; } } @Override public String toString() { return "{" + getOrderedElements(elements) + "}"; } public List getOrderedElements(Set elements) { return elements.stream().sorted(Comparator.comparing(Element::getType)).collect(Collectors.toList()); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Document document = (Document) o; return key.equals(document.key); } @Override public int hashCode() { return key.hashCode(); } }