
net.projectmonkey.object.mapper.analysis.token.matcher.TokenMatcher Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of object-mapper Show documentation
Show all versions of object-mapper Show documentation
Object mapping implementation written as an alternative to modelmapper which is able to support inheritance, handles flattening / expanding in a precise way, and is extensible / configurable
The newest version!
package net.projectmonkey.object.mapper.analysis.token.matcher;
/*
*
* * Copyright 2012 the original author or authors.
* *
* * Licensed under the Apache License, Version 2.0 (the "License");
* * you may not use this file except in compliance with the License.
* * You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
*
*/
/**
*
* @author Andy Moody
*/
import net.projectmonkey.object.mapper.util.CollectionUtil;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class TokenMatcher
{
public static TokenMatcher INSTANCE = new TokenMatcher();
private TokenMatcher(){}
/**
* @param sourceProperties
* @param destinationProperties
* @return a list of MatchingResult objects representing the closest match of destination properties
* for each source property in the list followed by a PropertyMatchResult for each unmatched destination property.
*
* N.B. the algorithm operates in a sequential basis so if sourceProperties[0] has a match with
* destinationProperties[1] then sourceProperties[1] will be unable to match with destinationProperties[1]
* even if this may provide a closer match.
*
* I haven't found a real world scenario where this actually creates an issue yet - so if you have one let me know the details
* and I'll implement a solution.
*/
public List match(List> sourceProperties, List> destinationProperties)
{
List> unmatchedTokens = initUnmatchedTokens(destinationProperties);
List results = matchWithTheClosestMatchingProperties(sourceProperties, destinationProperties, unmatchedTokens);
matchRemainingSourceTokensWithUmatchedTokensInAnyProperty(sourceProperties, results, unmatchedTokens);
for(int destinationPropertyPos = 0; destinationPropertyPos < unmatchedTokens.size(); destinationPropertyPos++)
{
List unmatchedTokensForDestinationProperty = unmatchedTokens.get(destinationPropertyPos);
if(!unmatchedTokensForDestinationProperty.isEmpty())
{
List unmatchedDestinations = new ArrayList();
List destinationTokens = destinationProperties.get(destinationPropertyPos);
for(int unmatchedDestinationTokenPos = 0; unmatchedDestinationTokenPos < unmatchedTokensForDestinationProperty.size(); unmatchedDestinationTokenPos ++)
{
String token = unmatchedTokensForDestinationProperty.get(unmatchedDestinationTokenPos);
int numberOfTokensUnmatched = CollectionUtil.countInstancesOf(token, unmatchedTokensForDestinationProperty);
int destinationTokenPos = CollectionUtil.nthLastIndexOf(token, destinationTokens, numberOfTokensUnmatched);
unmatchedDestinations.add(new TokenMatch(MatchStrength.NONE, null, destinationPropertyPos, null, destinationTokenPos));
}
if(results.size() > destinationPropertyPos && unmatchedDestinations.size() < destinationTokens.size())
{
//it's a partially matched destination - add it to the appropriate source property tally.
results.get(destinationPropertyPos).getMatches().addAll(unmatchedDestinations);
}
else
{
results.add(new PropertyMatchResult(unmatchedDestinations));
}
}
}
return results;
}
private void matchRemainingSourceTokensWithUmatchedTokensInAnyProperty(final List> sourceProperties, final List results,
final List> remainingUnmatchedTokens)
{
for(int resultPos = 0; resultPos < results.size() ; resultPos++)
{
List matches = results.get(resultPos).getMatches();
List unmatchedSourceTokens = resolveUnmatchedSourceTokens(sourceProperties, resultPos, matches);
for(int sourceTokenPos = 0; sourceTokenPos < unmatchedSourceTokens.size(); sourceTokenPos ++)
{
TokenAndPosition sourceTokenAndPosition = unmatchedSourceTokens.get(sourceTokenPos);
MatchingSummary bestMatch = locateBestRemainingMatchForSingleTokenInAnyProperty(remainingUnmatchedTokens, sourceTokenAndPosition);
if(bestMatch != null)
{
int matchingDestinationPropertyPosition = bestMatch.getPropertyPosition();
TokenMatch tokenMatch = bestMatch.getMatches().get(0);
CollectionUtil.replace(matches, sourceTokenAndPosition.tokenPosition, tokenMatch);
CollectionUtil.replace(remainingUnmatchedTokens, matchingDestinationPropertyPosition, bestMatch.getUnmatchedDestinationTokens());
}
}
}
}
private MatchingSummary locateBestRemainingMatchForSingleTokenInAnyProperty(final List> remainingUnmatchedTokens,
final TokenAndPosition sourceTokenAndPosition)
{
MatchingSummary bestMatch = null;
int destinationPropertyPosition = 0;
for(List unmatchedTokens : remainingUnmatchedTokens)
{
if(!unmatchedTokens.isEmpty())
{
List newUnmatchedTokens = new ArrayList(unmatchedTokens);
boolean inCorrectProperty = destinationPropertyPosition == sourceTokenAndPosition.propertyPosition;
TokenMatch match = matchSingleToken(newUnmatchedTokens, new ArrayList(unmatchedTokens), sourceTokenAndPosition,
destinationPropertyPosition);
MatchStrength strengthForToken = match.getStrength();
if(bestMatch == null && !MatchStrength.NONE.equals(strengthForToken))
{
bestMatch = new MatchingSummary(match, newUnmatchedTokens, destinationPropertyPosition);
}
else if (bestMatch != null && bestMatch.getMatches().get(0).getStrength().getPriority() > strengthForToken.getPriority())
{
bestMatch = new MatchingSummary(match, newUnmatchedTokens, destinationPropertyPosition);
}
}
destinationPropertyPosition ++;
}
return bestMatch;
}
private List resolveUnmatchedSourceTokens(final List> sourceProperties, final int resultPos,
final List matches)
{
List unmatchedSourceTokens = new ArrayList();
for(int tokenMatchPos = 0; tokenMatchPos < matches.size(); tokenMatchPos++)
{
if(MatchStrength.NONE.equals(matches.get(tokenMatchPos).getStrength()))
{
if(sourceProperties.size() > resultPos && sourceProperties.get(resultPos).size() > tokenMatchPos)
{
unmatchedSourceTokens.add(new TokenAndPosition(sourceProperties.get(resultPos).get(tokenMatchPos), resultPos, tokenMatchPos));
}
}
}
return unmatchedSourceTokens;
}
private List matchWithTheClosestMatchingProperties(final List> sourceProperties, final List> destinationProperties,
final List> unmatchedTokens)
{
List results = new ArrayList();
int sourcePropertyPosition = 0;
Set matchedDestinationProperties = new HashSet();
for(List sourceTokens: sourceProperties)
{
MatchingSummary bestMatch = null;
for(int i = 0; i < destinationProperties.size(); i++)
{
if(!matchedDestinationProperties.contains(i))
{
List destinationTokens = destinationProperties.get(i);
List unmatchedTokensForDestination = new ArrayList(destinationTokens);
List matchStrengths = matchTokens(unmatchedTokensForDestination, sourceTokens, destinationTokens, sourcePropertyPosition, i);
MatchingSummary summary = new MatchingSummary(matchStrengths, unmatchedTokensForDestination, i);
if(summary.isMatch() && (bestMatch == null || summary.getScore() > bestMatch.getScore()))
{
bestMatch = summary;
}
}
}
if(bestMatch != null)
{
results.add(new PropertyMatchResult(bestMatch.getMatches()));
int destinationPropertyPosition = bestMatch.getPropertyPosition();
CollectionUtil.replace(unmatchedTokens, destinationPropertyPosition, bestMatch.getUnmatchedDestinationTokens());
matchedDestinationProperties.add(destinationPropertyPosition);
}
else
{
//we have more source properties than destination properties - fill up with MatchStrength.NONE
List tokenMatches = new ArrayList();
for(int sourceTokenPosition = 0; sourceTokenPosition < sourceTokens.size(); sourceTokenPosition++)
{
tokenMatches.add(new TokenMatch(MatchStrength.NONE, sourcePropertyPosition, null, sourceTokenPosition, null));
}
results.add(new PropertyMatchResult(tokenMatches));
}
sourcePropertyPosition++;
}
return results;
}
private List> initUnmatchedTokens(final List> destinationProperties)
{
List> unmatchedTokens = new ArrayList>(destinationProperties.size());
for(List destinationTokens: destinationProperties)
{
unmatchedTokens.add(new ArrayList(destinationTokens));
}
return unmatchedTokens;
}
private List> initResult(final String[][] sourceProperties)
{
int length = sourceProperties.length;
List> strengths = new ArrayList>(length);
for(int i = 0; i < length; i++)
{
strengths.add(new ArrayList());
}
return strengths;
}
private List matchTokens(final List unmatchedTokens, final List sourceTokens, final List destinationTokens,
final int sourcePropertyPosition, final int destinationPropertyPosition)
{
final List matches = new ArrayList();
int sourceTokenPosition = 0;
for(String token : sourceTokens)
{
TokenMatch strength = matchSingleToken(unmatchedTokens, destinationTokens, new TokenAndPosition(token, sourcePropertyPosition, sourceTokenPosition),
destinationPropertyPosition);
matches.add(strength);
sourceTokenPosition++;
}
return matches;
}
private TokenMatch matchSingleToken(final List unmatchedTokens, final List destinationTokens, final TokenAndPosition sourceTokenAndPosition,
final int destinationPropertyPosition)
{
TokenMatch match = null;
int sourceTokenPosition = sourceTokenAndPosition.tokenPosition;
String token = sourceTokenAndPosition.token;
boolean tokenAvailableInDestination = destinationTokens.size() > sourceTokenPosition;
boolean inCorrectProperty = destinationPropertyPosition == sourceTokenAndPosition.propertyPosition;
if(tokenAvailableInDestination)
{
String destToken = destinationTokens.get(sourceTokenPosition);
if(destToken.equals(token))
{
MatchStrength strength = inCorrectProperty ? MatchStrength.EXACT : MatchStrength.IGNORING_PROPERTY_POSITION;
match = new TokenMatch(strength, sourceTokenAndPosition.propertyPosition, destinationPropertyPosition, sourceTokenPosition, sourceTokenPosition);
unmatchedTokens.remove(token);
}
else if(destToken.equalsIgnoreCase(token))
{
MatchStrength strength = inCorrectProperty ? MatchStrength.IGNORING_CASE : MatchStrength.IGNORING_CASE_AND_PROPERTY_POSITION;
match = new TokenMatch(strength, sourceTokenAndPosition.propertyPosition, destinationPropertyPosition, sourceTokenPosition, sourceTokenPosition);
boolean remove = unmatchedTokens.remove(destToken);
if(!remove)
{
/*
* Handle the case where we have a token appearing twice with different cases
* and the corresponding one has already been matched in unmatched properties list
*/
remove(unmatchedTokens, destToken);
}
}
else
{
match = manuallyMatch(destinationTokens, unmatchedTokens, sourceTokenAndPosition, destinationPropertyPosition);
}
}
else
{
match = manuallyMatch(destinationTokens, unmatchedTokens, sourceTokenAndPosition, destinationPropertyPosition);
}
return match;
}
private TokenMatch manuallyMatch(final List destinationTokens, final List unmatchedTokens, final TokenAndPosition sourceTokenAndPosition, int destinationPropertyPosition)
{
MatchStrength strength = MatchStrength.NONE;
Integer matchPosition = null;
Integer unmatchedTokenPosition = null;
String token = sourceTokenAndPosition.token;
Integer sourcePropertyPosition = sourceTokenAndPosition.propertyPosition;
boolean inCorrectProperty = destinationPropertyPosition == sourcePropertyPosition;
for(int i = 0; i < destinationTokens.size() && matchPosition == null; i++)
{
String otherToken = destinationTokens.get(i);
int numberOfInstancesOfThisTokenInDestinationProperties = CollectionUtil.countInstancesOf(otherToken, destinationTokens, i);
int numberOfUnmatchedInstancesOfThisToken = CollectionUtil.countInstancesOf(otherToken, unmatchedTokens);
if(unmatchedTokens.contains(otherToken) && numberOfInstancesOfThisTokenInDestinationProperties == numberOfUnmatchedInstancesOfThisToken)
{
if(token.equals(otherToken))
{
strength = inCorrectProperty ? MatchStrength.IGNORING_TOKEN_POSITION : MatchStrength.IGNORING_PROPERTY_AND_TOKEN_POSITION;
matchPosition = i;
unmatchedTokens.remove(otherToken);
}
else if(token.equalsIgnoreCase(otherToken))
{
strength = inCorrectProperty ? MatchStrength.IGNORING_CASE_AND_TOKEN_POSITION : MatchStrength.IGNORING_CASE_AND_PROPERTY_AND_TOKEN_POSITION;
matchPosition = i;
unmatchedTokens.remove(otherToken);
}
}
}
Integer reportedDestPropertyPosition = strength == MatchStrength.NONE ? null : destinationPropertyPosition;
return new TokenMatch(strength, sourcePropertyPosition, reportedDestPropertyPosition, sourceTokenAndPosition.tokenPosition, matchPosition);
}
private void remove(final List unmatchedTokens, final String destToken)
{
int matchPosition = -1;
for(int i = 0; i < unmatchedTokens.size(); i++)
{
String otherToken = unmatchedTokens.get(i);
if(destToken.equalsIgnoreCase(otherToken))
{
matchPosition = i;
break;
}
}
unmatchedTokens.remove(matchPosition);
}
private static class TokenAndPosition {
private String token;
private Integer propertyPosition;
private Integer tokenPosition;
private TokenAndPosition(final String token, final Integer propertyPosition, final Integer tokenPosition)
{
this.token = token;
this.propertyPosition = propertyPosition;
this.tokenPosition = tokenPosition;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy