ma.glasnost.orika.metadata.ScoringClassMapBuilder Maven / Gradle / Ivy
/*
* Orika - simpler, better and faster Java bean mapping
*
* Copyright (C) 2011-2013 Orika authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ma.glasnost.orika.metadata;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.PriorityQueue;
import java.util.Set;
import ma.glasnost.orika.DefaultFieldMapper;
import ma.glasnost.orika.MapperFactory;
import ma.glasnost.orika.property.PropertyResolverStrategy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* ScoringClassMapBuilder is an extension of the basic ClassMapBuilder that
* attempts to compute a best-fit matching of all properties (at every level
* of nesting) of one type to another, based on various metrics used to measure
* a given property match.
*
* Since this builder generates mappings based on scoring matches, it cannot always
* guess the correct mappings; be sure to test and double-check the mappings
* generated to assure they match expectations.
*
* Note: levenshtein distance implementation is pulled from code found in
* Apache Commons Lang org.apache.commons.lang.StringUtils, which is based on
* the implementation provided by Chas Emerick
* http://www.merriampark.com/ldjava.htm
*
* @author [email protected]
* @param
* @param
*/
public class ScoringClassMapBuilder extends ClassMapBuilder {
private static final Logger LOGGER = LoggerFactory.getLogger(ScoringClassMapBuilder.class);
private final PropertyMatchingWeights matchingWeights;
/**
* PropertyMatchingWeights is a class used to describe how different
* matching scenarios should be weighted when computing a match
* score for a set of properties.
*
* @author [email protected]
*
*/
public static final class PropertyMatchingWeights {
private static final double MIN_WEIGHT = 0.0;
private static final double MAX_WEIGHT = 1.0;
private double nestedDepth = MAX_WEIGHT / 2.0;
private double unmatchedWords = MAX_WEIGHT / 2.0;
private double editDistance = MAX_WEIGHT / 2.0;
private double containsName = MAX_WEIGHT / 2.0;
private double typeMatch = MAX_WEIGHT / 2.0;
private double commonWordCount = MAX_WEIGHT / 2.0;
private double minimumScore = MAX_WEIGHT / 2.0;
/**
* @return the weight associated with the number of words found in common
* between two property expressions
*/
public double commonWordCount() {
return commonWordCount;
}
/**
* Set the weight associated with the number of words found in common
* between two property expressions
*
* @param weight the weight associated with the number of words found in common
* @return this instance of PropertyMatchingWeights
* between two property expressions
*/
public PropertyMatchingWeights commonWordCount(double weight) {
validateWeight(weight);
this.commonWordCount = weight;
return this;
}
/**
* @return the weight associated with one property containing the
* entire name of another property
*/
public double containsName() {
return containsName;
}
/**
* Set the weight associated with one property containing the
* entire name of another property.
*
* @param weight the weight associated with one property containing the
* entire name of another property
* @return this instance of PropertyMatchingWeights
*/
public PropertyMatchingWeights containsName(double weight) {
validateWeight(weight);
this.containsName = weight;
return this;
}
/**
* @return the weight associated with one property matching the type of the other
*/
public double typeMatch() {
return typeMatch;
}
/**
* Set the weight associated with one property matching the type of the other
*
* @param weight the weight associated with one property matching the type of the other
* @return this instance of PropertyMatchingWeights
*/
public PropertyMatchingWeights typeMatch(double weight) {
validateWeight(weight);
this.typeMatch = weight;
return this;
}
/**
* @return the weight modifier associated with a property word's edit distance based on
* it's nesting depth
*/
public double nestedDepth() {
return nestedDepth;
}
/**
* Set the weight modifier associated with a property word's edit distance based on
* it's nesting depth; higher values here causes the matching to be more focused toward
* the final name of a nested property, lower values focus on the entire name more evenly
*
* @param weight the weight modifier associated with a property word's edit distance based on
* it's nesting depth
* @return this instance of PropertyMatchingWeights
*/
public PropertyMatchingWeights nestedDepth(double weight) {
validateWeight(weight);
this.nestedDepth = weight;
return this;
}
/**
* @return the weight associated with the number of unmatched words between two property expressions
*/
public double unmatchedWords() {
return unmatchedWords;
}
/**
* Set the weight associated with the number of unmatched words between two property expressions
*
* @param weight the weight associated with the number of unmatched words between two property expressions
* @return this instance of PropertyMatchingWeights
*/
public PropertyMatchingWeights unmatchedWords(double weight) {
validateWeight(weight);
this.unmatchedWords = weight;
return this;
}
/**
* @return the weight associated with the edit distance between words in two property expressions
*/
public double editDistance() {
return editDistance;
}
/**
* Set the weight associated with the edit distance between words in two property expressions
*
*
* @param weight the weight associated with the edit distance between words in two property expressions
* @return this instance of PropertyMatchingWeights
*/
public PropertyMatchingWeights editDistance(double weight) {
validateWeight(weight);
this.editDistance = weight;
return this;
}
/**
* @return the weight applied to the minimum score needed to accept a given match
*/
public double minimumScore() {
return minimumScore;
}
/**
* Set the weight applied to the minimum score needed to accept a given match; setting higher
* values makes the matching more restrictive, lower scores make matching more lenient.
*
* @param weight the weight applied to the minimum score needed to accept a given match
* @return this instance of PropertyMatchingWeights
*/
public PropertyMatchingWeights minimumScore(double weight) {
validateWeight(weight);
this.minimumScore = weight;
return this;
}
private void validateWeight(double weight) {
if (weight < MIN_WEIGHT || weight > MAX_WEIGHT) {
throw new IllegalArgumentException("weights should be between " + MIN_WEIGHT + " and " + MAX_WEIGHT);
}
}
}
/**
* Constructs a new instance of ScoringClassMapBuilder, using the provided PropertyMatchingWeights
* to adjust the overall scoring of how properties are matched.
*
* @param aType
* @param bType
* @param propertyResolver
* @param defaults
*/
protected ScoringClassMapBuilder(Type aType, Type bType, MapperFactory mapperFactory, PropertyResolverStrategy propertyResolver,
DefaultFieldMapper[] defaults, PropertyMatchingWeights matchingWeights) {
super(aType, bType, mapperFactory, propertyResolver, defaults);
this.matchingWeights = matchingWeights;
}
/*
* (non-Javadoc)
*
* @see ma.glasnost.orika.metadata.ClassMapBuilder#byDefault(ma.glasnost.
* orika.DefaultFieldMapper[])
*/
public ClassMapBuilder byDefault(MappingDirection direction, DefaultFieldMapper... withDefaults) {
DefaultFieldMapper[] defaults;
if (withDefaults.length == 0) {
defaults = getDefaultFieldMappers();
} else {
defaults = withDefaults;
}
/*
* For our custom 'byDefault' method, we're going to try and match
* fields by their Levenshtein distance
*/
PriorityQueue matchScores = new PriorityQueue();
Map propertiesForA = getPropertyExpressions(getAType());
Map propertiesForB = getPropertyExpressions(getBType());
for (final Entry propertyA : propertiesForA.entrySet()) {
if (!propertyA.getValue().getName().equals("class")) {
for (final Entry propertyB : propertiesForB.entrySet()) {
if (!propertyB.getValue().getName().equals("class")) {
FieldMatchScore matchScore = new FieldMatchScore(propertyA.getValue(), propertyB.getValue(), matchingWeights);
matchScores.add(matchScore);
}
}
}
}
Set unmatchedFields = new LinkedHashSet(this.getPropertiesForTypeA());
unmatchedFields.remove("class");
for (FieldMatchScore score : matchScores) {
if (!this.getMappedPropertiesForTypeA().contains(score.propertyA.getExpression())
&& !this.getMappedPropertiesForTypeB().contains(score.propertyB.getExpression())) {
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("\n" + score.toString());
}
if (score.meetsMinimumScore()) {
fieldMap(score.propertyA.getExpression(), score.propertyB.getExpression()).direction(direction).add();
unmatchedFields.remove(score.propertyA.getExpression());
}
}
}
/*
* Apply any default field mappers to the unmapped fields
*/
for (String propertyNameA : unmatchedFields) {
Property prop = resolvePropertyForA(propertyNameA);
for (DefaultFieldMapper defaulter : defaults) {
String suggestion = defaulter.suggestMappedField(propertyNameA, prop.getType());
if (suggestion != null && getPropertiesForTypeB().contains(suggestion)) {
if (!getMappedPropertiesForTypeB().contains(suggestion)) {
fieldMap(propertyNameA, suggestion).direction(direction).add();
}
}
}
}
return this;
}
/**
* @author mattdeboer
*
*/
public static class Factory extends ClassMapBuilderFactory {
private PropertyMatchingWeights matchingWeights;
/**
* Constructs a new Factory for ScoringClassMapBuilder instances
*/
public Factory() {
matchingWeights = new PropertyMatchingWeights();
}
/**
* Constructs a new Factory for ScoringClassMapBuilder instances
*
* @param matchingWeights the weights used to control the scorin on ScoringClassMapBuilder instances
* created by this factory
*/
public Factory(PropertyMatchingWeights matchingWeights) {
this.matchingWeights = matchingWeights;
}
/*
* (non-Javadoc)
*
* @see
* ma.glasnost.orika.metadata.ClassMapBuilderFactory#newClassMapBuilder
* (ma.glasnost.orika.metadata.Type, ma.glasnost.orika.metadata.Type,
* ma.glasnost.orika.property.PropertyResolverStrategy,
* ma.glasnost.orika.DefaultFieldMapper[])
*/
@Override
protected ClassMapBuilder newClassMapBuilder(Type aType, Type bType, MapperFactory mapperFactory,
PropertyResolverStrategy propertyResolver, DefaultFieldMapper[] defaults) {
return new ScoringClassMapBuilder(aType, bType, mapperFactory, propertyResolver, defaults, matchingWeights);
}
}
/**
* FieldMatchScore is used to score the match of a pair of property expressions
*
* @author [email protected]
*
*/
public static class FieldMatchScore implements Comparable {
private static final List IGNORED_WORDS = Arrays.asList("with","this","that","an","a","of","the");
/*
* TODO: static for now; should probably be computed
*/
private static final double MAX_POSSIBLE_SCORE = 50.0;
private final PropertyMatchingWeights matchingWeights;
private boolean contains;
private boolean containsIgnoreCase;
private double typeMatch;
private Property propertyA;
private Property propertyB;
private int hashCode;
private double commonWordCount;
private double avgWordCount;
private double wordMatchScore;
private double score;
private double typeMatchScore;
private double commonWordsScore;
private double containsScore;
/**
* Constructs a new FieldMatchScore based on the provided pair of properties, with scoring modified by
* the provided PropertyMatchingWeights
*
* @param propertyA
* @param propertyB
* @param matchingWeights
*/
public FieldMatchScore(Property propertyA, Property propertyB, PropertyMatchingWeights matchingWeights) {
this.matchingWeights = matchingWeights;
this.propertyA = propertyA;
this.propertyB = propertyB;
String propertyALower = propertyA.getName().toLowerCase();
String propertyBLower = propertyB.getName().toLowerCase();
List> aWords = splitIntoLowerCaseWords(propertyA.getExpression());
List> bWords = splitIntoLowerCaseWords(propertyB.getExpression());
aWords.removeAll(IGNORED_WORDS);
bWords.removeAll(IGNORED_WORDS);
Set commonWords = intersection(aWords,bWords);
this.avgWordCount = (aWords.size() + bWords.size()) / 2.0;
this.commonWordCount = commonWords.size();
this.wordMatchScore = computeWordMatchScore(aWords, bWords);
this.contains = propertyA.getName().contains(propertyB.getName()) || propertyB.getName().contains(propertyA.getName());
this.containsIgnoreCase = contains || propertyALower.contains(propertyBLower) || propertyBLower.contains(propertyALower);
if ((propertyA.isMultiOccurrence() && !propertyB.isMultiOccurrence())
|| (!propertyA.isMultiOccurrence() && propertyB.isMultiOccurrence())) {
this.typeMatch = Double.NEGATIVE_INFINITY;
} else if (propertyA.getType().isAssignableFrom(propertyB.getType())
|| propertyB.getType().isAssignableFrom(propertyA.getType())){
this.typeMatch = 1.0;
} else {
this.typeMatch = 0.0;
}
computeOverallScore();
this.hashCode = computeHashCode();
}
public String toString() {
return
"[" + propertyA.getExpression() + ", " + propertyB.getExpression() + "] {\n" +
" wordMatchScore: " + wordMatchScore + "\n" +
" commonWordScore: " + commonWordsScore + "\n" +
" containsScore: " + containsScore + "\n" +
" typeMatchScore: " + typeMatchScore + "\n" +
" ------------------- \n" +
" total: " + score + "\n" +
"}";
}
private Set intersection(List> setA, List> setB) {
Set intersection = flatten(setA);
Set temp = flatten(setB);
intersection.retainAll(temp);
return intersection;
}
private Set flatten(List> aWords) {
Set set = new LinkedHashSet();
for (List collection: aWords) {
for (T item: collection) {
set.add(item);
}
}
return set;
}
/**
* @return true if this match meets the minimum score (determined by the matching weights)
*/
public boolean meetsMinimumScore() {
double normalizedScore = ((MAX_POSSIBLE_SCORE / 2.0)* this.matchingWeights.minimumScore());
return this.score >= normalizedScore;
}
/**
* Compute the match score between two properties, broken up into arrays of
* words at each property divider level.
*
* @param aWords
* @param bWords
* @return
*/
double computeWordMatchScore(List> aWords, List> bWords) {
Set aWordsRemaining = new LinkedHashSet(flatten(aWords));
Set bWordsRemaining = new LinkedHashSet(flatten(bWords));
PriorityQueue orderedPairs = new PriorityQueue();
double aDepth = 0;
for (List aWordList : aWords) {
++aDepth;
for (String aWord : aWordList) {
double bDepth = 0;
for (List bWordList: bWords) {
for (String bWord : bWordList) {
++bDepth;
orderedPairs.add(new WordPair(aWord, bWord, (aDepth/aWords.size()), (bDepth/bWords.size()), matchingWeights));
}
}
}
}
double score = 0.0d;
for (WordPair w: orderedPairs) {
if (aWordsRemaining.contains(w.aWord) && bWordsRemaining.contains(w.bWord)) {
score += w.score;
aWordsRemaining.remove(w.aWord);
bWordsRemaining.remove(w.bWord);
}
}
double remains = (aWordsRemaining.size() + bWordsRemaining.size()) / 2.0;
double initial = (aWords.size() + bWords.size()) / 2.0;
double unmatchedWordsCount = (remains - initial) * (matchingWeights.unmatchedWords());
return score + unmatchedWordsCount;
}
private void computeOverallScore() {
this.containsScore = this.matchingWeights.containsName() * (this.containsIgnoreCase ? 10 : 0);
if (this.commonWordCount == 0) {
this.commonWordsScore = 0.0;
} else {
this.commonWordsScore = (this.matchingWeights.commonWordCount()) * (Math.pow(2 * this.commonWordCount, 2.0)*((avgWordCount + commonWordCount)/avgWordCount));
}
this.typeMatchScore = (this.matchingWeights.typeMatch()) * this.typeMatch;
this.score = this.wordMatchScore + commonWordsScore + containsScore + typeMatchScore;
}
/**
* WordPair is used to rank a match of a given set of words based on
* word depth and levenshtein distance between the words
*
*/
private static class WordPair implements Comparable{
private String aWord;
private String bWord;
private double score;
private WordPair(String aWord, String bWord, double aWordDepth, double bWordDepth, PropertyMatchingWeights matchingWeights) {
this.aWord = aWord;
this.bWord = bWord;
double aDepth = (1.0 + aWordDepth) * (matchingWeights.nestedDepth);
double bDepth = (1.0 + bWordDepth) * (matchingWeights.nestedDepth);
double editDistance = getLevenshteinDistance(aWord, bWord);
double distanceWeight = matchingWeights.editDistance * (1.0 / (editDistance + 1.0));
double wordLength = Math.max(aWord.length(), bWord.length());
double wordLengthWeight = matchingWeights.editDistance * Math.sqrt(wordLength);
this.score = aDepth + bDepth + distanceWeight + wordLengthWeight;
}
/* (non-Javadoc)
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
public int compareTo(WordPair o) {
double score = this.score - o.score;
if (score < 0) {
return 1;
} else if (score > 0) {
return -1;
} else {
return 0;
}
}
public String toString() {
return "[" + aWord + "],[" + bWord + "] = " + score;
}
/* (non-Javadoc)
* @see java.lang.Object#hashCode()
*/
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((aWord == null) ? 0 : aWord.hashCode());
result = prime * result + ((bWord == null) ? 0 : bWord.hashCode());
long temp;
temp = Double.doubleToLongBits(score);
result = prime * result + (int) (temp ^ (temp >>> 32));
return result;
}
/* (non-Javadoc)
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
WordPair other = (WordPair) obj;
if (aWord == null) {
if (other.aWord != null)
return false;
} else if (!aWord.equals(other.aWord))
return false;
if (bWord == null) {
if (other.bWord != null)
return false;
} else if (!bWord.equals(other.bWord))
return false;
if (Double.doubleToLongBits(score) != Double.doubleToLongBits(other.score))
return false;
return true;
}
}
/**
* Computes the levenshtein distance of 2 strings
*
* @param s
* @param t
* @return
*/
private static int getLevenshteinDistance(String s, String t) {
if (s == null || t == null) {
throw new IllegalArgumentException("Strings must not be null");
}
int lengthOfS = s.length();
int lengthOfT = t.length();
if (lengthOfS == 0) {
return lengthOfT;
} else if (lengthOfT == 0) {
return lengthOfS;
}
if (lengthOfS > lengthOfT) {
// swap the input strings to consume less memory
String tmp = s;
s = t;
t = tmp;
lengthOfS = lengthOfT;
lengthOfT = t.length();
}
int previousCosts[] = new int[lengthOfS + 1];
int costs[] = new int[lengthOfS + 1];
int swap[];
int indexOfS;
int indexOfT;
char charAtIndexOfT; // jth character of t
int cost;
for (indexOfS = 0; indexOfS <= lengthOfS; indexOfS++) {
previousCosts[indexOfS] = indexOfS;
}
for (indexOfT = 1; indexOfT <= lengthOfT; indexOfT++) {
charAtIndexOfT = t.charAt(indexOfT - 1);
costs[0] = indexOfT;
for (indexOfS = 1; indexOfS <= lengthOfS; indexOfS++) {
cost = s.charAt(indexOfS - 1) == charAtIndexOfT ? 0 : 1;
// minimum of cell to the left+1, to the top+1, diagonally
// left and up +cost
costs[indexOfS] = Math.min(Math.min(costs[indexOfS - 1] + 1, previousCosts[indexOfS] + 1), previousCosts[indexOfS - 1]
+ cost);
}
// copy current distance counts to 'previous row' distance
// counts
swap = previousCosts;
previousCosts = costs;
costs = swap;
}
// previousCosts now has the most recent cost counts
return previousCosts[lengthOfS];
}
/**
* Pattern is used to split a string into words on camel-case word boundaries
*/
private static final String WORD_SPLITTER = String.format("%s|%s|%s|%s",
"([\\{\\}\\]\\[-_])", "(?<=[A-Z])(?=[A-Z][a-z])", "(?<=[^A-Z])(?=[A-Z])",
"(?<=[A-Za-z])(?=[^A-Za-z])");
/**
* Splits a given property expression into arrays of lower-case words;
* result is returned as a set of String[], which represent a property
* component split on word boundaries.
*
* @param s
* @return
*/
private static List> splitIntoLowerCaseWords(String s) {
List> results = new ArrayList>();
for (String property: s.split("[.]")) {
List words = new LinkedList();
results.add(words);
for (String word : property.split(WORD_SPLITTER)) {
if (word != null && word.trim().length() > 0) {
words.add(word.toLowerCase());
}
}
}
return results;
}
/*
* (non-Javadoc)
*
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
public int compareTo(FieldMatchScore that) {
/*
* Higher scores are better, and should be ordered first ("lower")
*/
if (this.score < that.score) {
return 1;
} else if (this.score > that.score) {
return -1;
} else {
return 0;
}
}
private int computeHashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((propertyA == null) ? 0 : propertyA.hashCode());
result = prime * result + ((propertyB == null) ? 0 : propertyB.hashCode());
return result;
}
public int hashCode() {
return hashCode;
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
FieldMatchScore other = (FieldMatchScore) obj;
if (propertyA == null) {
if (other.propertyA != null)
return false;
} else if (!propertyA.equals(other.propertyA))
return false;
if (propertyB == null) {
if (other.propertyB != null)
return false;
} else if (!propertyB.equals(other.propertyB))
return false;
return true;
}
}
}