
com.intuit.fuzzymatcher.component.DocumentMatch Maven / Gradle / Ivy
Show all versions of fuzzy-matcher Show documentation
package com.intuit.fuzzymatcher.component;
import com.intuit.fuzzymatcher.domain.Document;
import com.intuit.fuzzymatcher.domain.Element;
import com.intuit.fuzzymatcher.domain.Match;
import com.intuit.fuzzymatcher.domain.Score;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
*
* Starts the Matching process by element level matching and aggregates the results back
* This uses the ScoringFunction defined at each Document to get the aggregated Document score for matched Elements
*/
@Component
public class DocumentMatch {
@Autowired
private ElementMatch elementMatch;
/**
* Executes matching of a document stream
*
* @param documents Stream of Document objects
* @return Stream of Match of Document type objects
*/
public Stream> matchDocuments(Stream documents) {
Stream elements = documents.flatMap(d -> d.getDistinctNonEmptyElements());
Stream> matchedElements = elementMatch.matchElements(elements);
return rollupDocumentScore(matchedElements);
}
private Stream> rollupDocumentScore(Stream> matchElementStream) {
Map>>> groupBy = matchElementStream
.collect(Collectors.groupingBy(matchElement -> matchElement.getData().getDocument(),
Collectors.groupingBy(matchElement -> matchElement.getMatchedWith().getDocument())));
return groupBy.entrySet().parallelStream().flatMap(leftDocumentEntry ->
leftDocumentEntry.getValue().entrySet()
.parallelStream()
.map(rightDocumentEntry -> {
List childScoreList = rightDocumentEntry.getValue()
.stream()
.map(d -> d.getScore())
.collect(Collectors.toList());
return new Match(leftDocumentEntry.getKey(), rightDocumentEntry.getKey(), childScoreList);
}))
.filter(match -> match.getResult() > match.getData().getThreshold());
}
}