All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.databene.text.DelocalizingConverter Maven / Gradle / Ivy

Go to download

'databene webdecs' is an open source software library for WEB Data Extraction, Conversion and Scripting, written by Volker Bergmann.

The newest version!
/*
 * (c) Copyright 2007-2010 by Volker Bergmann. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, is permitted under the terms of the
 * GNU General Public License.
 *
 * For redistributing this software or a derivative work under a license other
 * than the GPL-compatible Free Software License as defined by the Free
 * Software Foundation or approved by OSI, you must first obtain a commercial
 * license to this software product from Volker Bergmann.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
 * REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
 * HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

package org.databene.text;

import org.databene.document.csv.CSVLineIterator;
import org.databene.webdecs.DataContainer;
import org.databene.commons.ConfigurationError;
import org.databene.commons.Converter;
import org.databene.commons.Encodings;
import org.databene.commons.converter.ThreadSafeConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Map;
import java.util.HashMap;

/**
 * Delocalizes a String bye replacing local characters by international latin characters.
 * For example the umlaut 'ä' is replaced with 'ae'.
*
* Created: 12.06.2006 18:53:55 * @author Volker Bergmann */ public class DelocalizingConverter extends ThreadSafeConverter { /** File that contains the character mapping */ private static final String CONFIG_FILENAME = "org/databene/text/DelocalizingConverter.csv"; /** The logger */ private static Logger logger = LoggerFactory.getLogger(DelocalizingConverter.class); /** a Map of replacements. The key indicates the character to replace, * the value the character to use for replacement*/ private Map replacements; /** Default constructor */ public DelocalizingConverter() throws IOException { super(String.class, String.class); init(); } // Converter implementation ---------------------------------------------------------------------------------------- /** * Implementation of the Converter interface. * @see Converter */ public String convert(String source) { String product = source; for (Map.Entry entry : replacements.entrySet()) product = product.replace(String.valueOf(entry.getKey()), entry.getValue()); return product; } // private initializers -------------------------------------------------------------------------------------------- /** * Initializes the instance by reading the definition file of replacements * @throws IOException when file access fails. */ private void init() throws IOException { replacements = new HashMap(); CSVLineIterator iterator = new CSVLineIterator(CONFIG_FILENAME, ',', true, Encodings.UTF_8); DataContainer tokens = new DataContainer(); while ((tokens = iterator.next(tokens)) != null) addReplacements(tokens.getData()); } /** * adds a line from the replacement definition file to the replacement map. * @param tokens the tokens of one line in the file. * One line contains several replacement pairs. */ private void addReplacements(String[] tokens) { if (tokens.length < 2) throw new ConfigurationError("At least two tokens needed to define a replacement"); String replacement = tokens[tokens.length - 1]; for (int i = 0; i < tokens.length - 1; i++) { String token = tokens[i]; if (token.length() != 1) throw new ConfigurationError("Source token length must be 1, wrong for token: " + token); addReplacement(token.charAt(0), replacement); } } /** * Adds one replacement pair to the replacement map. * @param original the character to replace * @param replacement the String to use as replacement */ private void addReplacement(char original, String replacement) { String preset = replacements.get(original); if (preset != null) { if (preset.equals(replacement)) logger.warn("double definition of replacement: " + original + " -> " + replacement); else logger.error("ambiguous definition of replacement: " + original + " -> " + replacement + " / " + preset); } replacements.put(original, replacement); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy