All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.intuit.fuzzymatcher.component.DocumentMatch Maven / Gradle / Ivy

There is a newer version: 1.2.1
Show newest version
package com.intuit.fuzzymatcher.component;


import com.intuit.fuzzymatcher.domain.Document;
import com.intuit.fuzzymatcher.domain.Element;
import com.intuit.fuzzymatcher.domain.Match;
import com.intuit.fuzzymatcher.domain.Score;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;


/**
 * 

* Starts the Matching process by element level matching and aggregates the results back * This uses the ScoringFunction defined at each Document to get the aggregated Document score for matched Elements */ @Component public class DocumentMatch { @Autowired private ElementMatch elementMatch; /** * Executes matching of a document stream * * @param documents Stream of Document objects * @return Stream of Match of Document type objects */ public Stream> matchDocuments(Stream documents) { Stream elements = documents.flatMap(d -> d.getDistinctNonEmptyElements()); Stream> matchedElements = elementMatch.matchElements(elements); return rollupDocumentScore(matchedElements); } private Stream> rollupDocumentScore(Stream> matchElementStream) { Map>>> groupBy = matchElementStream .collect(Collectors.groupingBy(matchElement -> matchElement.getData().getDocument(), Collectors.groupingBy(matchElement -> matchElement.getMatchedWith().getDocument()))); return groupBy.entrySet().parallelStream().flatMap(leftDocumentEntry -> leftDocumentEntry.getValue().entrySet() .parallelStream() .map(rightDocumentEntry -> { List childScoreList = rightDocumentEntry.getValue() .stream() .map(d -> d.getScore()) .collect(Collectors.toList()); return new Match(leftDocumentEntry.getKey(), rightDocumentEntry.getKey(), childScoreList); })) .filter(match -> match.getResult() > match.getData().getThreshold()); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy