org.datacleaner.metadata.DefaultColumnMeaningCollection Maven / Gradle / Ivy
/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.metadata;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class DefaultColumnMeaningCollection implements ColumnMeaningCollection {
private static Map _matchingMap;
private static Map getMatchingMap() {
if (_matchingMap == null) {
_matchingMap = new HashMap<>();
final EnumSet meanings = EnumSet.allOf(ColumnMeaning.class);
for (final HasColumnMeaning columnMeaning : meanings) {
populateMatchMap(columnMeaning.getName(), columnMeaning);
for (final String alias : columnMeaning.getAliases()) {
populateMatchMap(alias, columnMeaning);
}
}
}
return _matchingMap;
}
private static void populateMatchMap(String key, final HasColumnMeaning columnMeaning) {
key = standardizeForMatching(key);
final HasColumnMeaning oldValue = getMatchingMap().put(key, columnMeaning);
if (oldValue != null) {
throw new IllegalStateException("Multiple ColumnMeanings with name/alias: " + key);
}
}
private static String standardizeForMatching(String key) {
key = key.trim().toLowerCase();
key = replaceAll(key, ".", "");
key = replaceAll(key, ",", "");
key = replaceAll(key, "'", "");
key = replaceAll(key, " ", "");
key = replaceAll(key, "_", "");
key = replaceAll(key, "-", "");
// remove all the numbers at the end of a string to avoid words like
// ADDRESSLINE1 being mapped to OTHER
key = key.replaceAll("\\d*$", "");
// remove the 'FLD' prefix of some fields such as FLD_FIRSTNAME
// so it can be mapped properly
key = replaceAll(key, "fld" , "");
return key;
}
private static String replaceAll(String str, final String searchFor, final String replaceWith) {
while (str.contains(searchFor)) {
str = str.replace(searchFor, replaceWith);
}
return str;
}
@Override
public Collection getColumnMeanings() {
final Set set = new HashSet<>();
for (final HasColumnMeaning meaning : getMatchingMap().values()) {
set.add(meaning);
}
return set;
}
@Override
public HasColumnMeaning find(final String name) {
return getMatchingMap().get(standardizeForMatching(name));
}
@Override
public HasColumnMeaning getDefault() {
return ColumnMeaning.OTHER;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy