All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.util.DefaultEnumMatcher Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Free Software Foundation, Inc.
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.util;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;

import org.apache.metamodel.util.HasName;
import org.datacleaner.descriptors.EnumerationProvider;
import org.datacleaner.descriptors.EnumerationValue;

import com.google.common.base.Splitter;

/**
 * Default {@link EnumMatcher} implementation that uses a normalized/trimmed
 * version of the following values for exact matching:
 *
 * 
    *
  • The constant name of the enum
  • *
  • The name of the enum, if it implements {@link HasName}
  • *
  • The alias(es) of the enum, if it implements {@link HasAliases}
  • *
* */ public class DefaultEnumMatcher implements EnumMatcher { private final Map _exactMatchesMap; public DefaultEnumMatcher(final Class> enumClass) { this(EnumerationValue.providerFromEnumClass(enumClass)); } public DefaultEnumMatcher(final EnumerationProvider enumProvider) { _exactMatchesMap = new HashMap<>(); final EnumerationValue[] enumConstants = enumProvider.values(); for (final EnumerationValue e : enumConstants) { final String[] aliases = e.getAliases(); if (aliases != null) { for (final String alias : aliases) { putMatch(alias, e); } } } for (final EnumerationValue e : enumConstants) { final HasName hasName = (HasName) e; final String name = hasName.getName(); putMatch(name, e); } for (final EnumerationValue e : enumConstants) { final String constantName = e.getValue(); putMatch(constantName, e); } } private void putMatch(final String string, final EnumerationValue e) { final Collection normalizedStrings = normalize(string, false); for (final String normalizedString : normalizedStrings) { _exactMatchesMap.put(normalizedString, e); } } @Override public EnumerationValue suggestMatch(final String string) { final Collection normalizedStrings = normalize(string, true); for (final String normalizedString : normalizedStrings) { final EnumerationValue exactMatchResult = _exactMatchesMap.get(normalizedString); if (exactMatchResult != null) { return exactMatchResult; } } return null; } /** * Normalizes the incoming string before doing matching * * @param string * @param tokenize * @return */ protected Collection normalize(String string, final boolean tokenize) { if (string == null) { return Collections.emptyList(); } if (tokenize) { final Collection result = new LinkedHashSet<>(); result.addAll(normalize(string, false)); final Splitter splitter = Splitter.on(' ').omitEmptyStrings(); final List tokens = splitter.splitToList(string); for (final String token : tokens) { final Collection normalizedTokens = normalize(token, false); result.addAll(normalizedTokens); } return result; } else { string = StringUtils.replaceWhitespaces(string, ""); string = StringUtils.replaceAll(string, "-", ""); string = StringUtils.replaceAll(string, "_", ""); string = StringUtils.replaceAll(string, "|", ""); string = StringUtils.replaceAll(string, "*", ""); string = string.toUpperCase(); if (string.isEmpty()) { return Collections.emptyList(); } final String withoutNumbers = string.replaceAll("[0-9]", ""); if (withoutNumbers.equals(string) || withoutNumbers.isEmpty()) { return Arrays.asList(string); } return Arrays.asList(string, withoutNumbers); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy