All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.metadata.DefaultColumnMeaningCollection Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.metadata;

import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

public class DefaultColumnMeaningCollection implements ColumnMeaningCollection {
    private static Map _matchingMap;

    private static Map getMatchingMap() {
        if (_matchingMap == null) {
            _matchingMap = new HashMap<>();
            final EnumSet meanings = EnumSet.allOf(ColumnMeaning.class);

            for (final HasColumnMeaning columnMeaning : meanings) {
                populateMatchMap(columnMeaning.getName(), columnMeaning);

                for (final String alias : columnMeaning.getAliases()) {
                    populateMatchMap(alias, columnMeaning);
                }
            }
        }

        return _matchingMap;
    }

    private static void populateMatchMap(String key, final HasColumnMeaning columnMeaning) {
        key = standardizeForMatching(key);
        final HasColumnMeaning oldValue = getMatchingMap().put(key, columnMeaning);

        if (oldValue != null) {
            throw new IllegalStateException("Multiple ColumnMeanings with name/alias: " + key);
        }
    }

    private static String standardizeForMatching(String key) {
        key = key.trim().toLowerCase();
        key = replaceAll(key, ".", "");
        key = replaceAll(key, ",", "");
        key = replaceAll(key, "'", "");
        key = replaceAll(key, " ", "");
        key = replaceAll(key, "_", "");
        key = replaceAll(key, "-", "");
        // remove all the numbers at the end of a string to avoid words like
        // ADDRESSLINE1 being mapped to OTHER
        key = key.replaceAll("\\d*$", "");
        // remove the 'FLD' prefix of some fields such as FLD_FIRSTNAME
        // so it can be mapped properly
        key = replaceAll(key, "fld" , "");

        return key;
    }

    private static String replaceAll(String str, final String searchFor, final String replaceWith) {
        while (str.contains(searchFor)) {
            str = str.replace(searchFor, replaceWith);
        }

        return str;
    }

    @Override
    public Collection getColumnMeanings() {
        final Set set = new HashSet<>();

        for (final HasColumnMeaning meaning : getMatchingMap().values()) {
            set.add(meaning);
        }

        return set;
    }

    @Override
    public HasColumnMeaning find(final String name) {
        return getMatchingMap().get(standardizeForMatching(name));
    }

    @Override
    public HasColumnMeaning getDefault() {
        return ColumnMeaning.OTHER;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy