All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.intuit.fuzzymatcher.component.MatchService Maven / Gradle / Ivy

There is a newer version: 1.2.1
Show newest version
package com.intuit.fuzzymatcher.component;

import com.intuit.fuzzymatcher.domain.Document;
import com.intuit.fuzzymatcher.domain.Match;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * Entry Point for Fuzzy Matching. This class provides different ways to accept Documents for primarily 3 use case
 * 

* 1. De-duplication of data - Where for a given list of documents it finds duplicates * 2. Check duplicate for a new data - Where it checks for a new Document a duplicate is present in existing list * 3. Check duplicates for bulk inserts - Similar to 2, where a list of new Documents is checked against existing *

* This also has similar implementation to aggregate results in different formats. */ @Component public class MatchService { @Autowired private DocumentMatch documentMatch; /** * Use this for De-duplication of data, where for a given list of documents it finds duplicates * Data is aggregated by a given Document * * @param documents the list of documents to match against * @return a map containing the grouping of each document and its corresponding matches */ public Map>> applyMatch(List documents) { return documentMatch.matchDocuments(documents.parallelStream()) .collect(Collectors.groupingBy(Match::getData)); } /** * Use this to check duplicates for bulk inserts, where a list of new Documents is checked against existing list * Data is aggregated by a given Document * * @param documents the list of documents to match from * @param matchWith the list of documents to match against * @return a map containing the grouping of each document and its corresponding matches */ public Map>> applyMatch(List documents, List matchWith) { return documentMatch.matchDocuments(Stream.concat( documents.parallelStream().map(document -> { document.setSource(true); return document; }), matchWith.parallelStream().map(document -> { document.setSource(false); return document; }))) .collect(Collectors.groupingBy(Match::getData)); } /** * Use this to check duplicate for a new record, where it checks whether a new Document is a duplicate in existing list * Data is aggregated by a given Document * * @param document the document to match * @param matchWith the list of documents to match against * @return a map containing the grouping of each document and its corresponding matches */ public Map>> applyMatch(Document document, List matchWith) { return applyMatch(Arrays.asList(document), matchWith); } /** * Use this to check duplicate for a new record, where it checks whether a new Document is a duplicate in existing list * Data is aggregated by a given Document Id * * @param document the document to match * @param matchWith the list of documents to match against * @return a map containing the grouping of each document id and its corresponding matches */ public Map>> applyMatchByDocId(Document document, List matchWith) { return applyMatchByDocId(Arrays.asList(document), matchWith); } /** * Use this for De-duplication of data, where for a given list of documents it finds duplicates * Data is aggregated by a given Document Id * * @param documents the list of documents to match against * @return a map containing the grouping of each document id and its corresponding matches */ public Map>> applyMatchByDocId(List documents) { return documentMatch.matchDocuments(documents.parallelStream()) .collect(Collectors.groupingBy(match -> match.getData().getKey())); } /** * Use this to check duplicates for bulk inserts, where a list of new Documents is checked against existing list * Data is aggregated by a given Document Id * * @param documents the list of documents to match from * @param matchWith the list of documents to match against * @return a map containing the grouping of each document id and its corresponding matches */ public Map>> applyMatchByDocId(List documents, List matchWith) { return documentMatch.matchDocuments(Stream.concat( documents.parallelStream().map(document -> { document.setSource(true); return document; }), matchWith.parallelStream().map(document -> { document.setSource(false); return document; }))) .collect(Collectors.groupingBy(match -> match.getData().getKey())); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy