All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.intuit.fuzzymatcher.domain.Document Maven / Gradle / Ivy

There is a newer version: 1.2.1
Show newest version
package com.intuit.fuzzymatcher.domain;

import com.intuit.fuzzymatcher.function.ScoringFunction;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;

import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * 

* The primary object for matching. The required attribute is a unique key and elements *

* Configurable attributes *

    *
  • elements - A set of Element object to match against
  • *
  • threshold - Value above which documents are considered a match, default 0.5
  • *
*/ public class Document implements Matchable { private Document(String key, Set elements, double threshold) { this.key = key; this.elements = elements; this.threshold = threshold; } private String key; private Set elements; private Set preProcessedElement; private double threshold; private Boolean source; private static final BiFunction, Score> DEFAULT_DOCUMENT_SCORING = ScoringFunction.getExponentialWeightedAverageScore(); public String getKey() { return key; } public Set getElements() { return elements; } public Set getPreProcessedElement() { if (this.preProcessedElement == null) { this.preProcessedElement = getDistinctNonEmptyElements().collect(Collectors.toSet()); } return preProcessedElement; } public double getThreshold() { return threshold; } public Stream getDistinctElements() { return this.elements.stream() .filter(distinctByKey(Element::getPreprocessedValueWithType)); } public Stream getDistinctNonEmptyElements() { return getDistinctElements() .filter(m -> { if (m.getPreProcessedValue() instanceof String) { return !StringUtils.isEmpty(m.getPreProcessedValue().toString()); } else return m.getPreProcessedValue() != null; }); } private static Predicate distinctByKey(Function keyExtractor) { Set seen = ConcurrentHashMap.newKeySet(); return t -> seen.add(keyExtractor.apply(t)); } @Override public long getChildCount(Matchable other) { if (other instanceof Document) { Document o = (Document) other; List childrenType = this.getPreProcessedElement().stream() .map(Element::getElementClassification).collect(Collectors.toList()); List oChildrenType = o.getPreProcessedElement().stream() .map(Element::getElementClassification).collect(Collectors.toList()); return CollectionUtils.union(childrenType, oChildrenType).size(); } return 0; } @Override public long getUnmatchedChildCount(Matchable other) { if (other instanceof Document) { Document o = (Document) other; List childrenType = this.getPreProcessedElement().stream() .map(Element::getElementClassification).collect(Collectors.toList()); List oChildrenType = o.getPreProcessedElement().stream() .map(Element::getElementClassification).collect(Collectors.toList()); return CollectionUtils.disjunction(childrenType, oChildrenType).size(); } return 0; } @Override public BiFunction, Score> getScoringFunction() { return DEFAULT_DOCUMENT_SCORING; } @Override public double getWeight() { return 1.0; } public Boolean isSource() { return source; } public void setSource(Boolean source) { this.source = source; } public static class Builder { private String key; private Set elements; private double threshold = 0.5; public Builder(String key) { this.key = key; } public Builder setThreshold(double threshold) { this.threshold = threshold; return this; } public Builder addElement(Element element) { if (this.elements == null || this.elements.isEmpty()) { this.elements = new HashSet<>(); } this.elements.add(element); return this; } public Document createDocument() { Document doc = new Document(key, elements, threshold); doc.elements.stream().forEach(element -> element.setDocument(doc)); return doc; } } @Override public String toString() { return "{" + getOrderedElements(elements) + "}"; } public List getOrderedElements(Set elements) { return elements.stream().sorted(Comparator.comparing(ele -> ele.getElementClassification().getElementType())) .collect(Collectors.toList()); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Document document = (Document) o; return key.equals(document.key); } @Override public int hashCode() { return key.hashCode(); } }