All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.widgets.CharSetEncodingComboBox Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Free Software Foundation, Inc.
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.widgets;

import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

import org.datacleaner.connection.FixedWidthDatastore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;

/**
 * An editable combobox with a default set of available character set encodings
 *
 * @author Kasper Sørensen
 */
public class CharSetEncodingComboBox extends DCComboBox {

    private static final long serialVersionUID = 1L;
    private static final Logger logger = LoggerFactory.getLogger(CharSetEncodingComboBox.class);

    private static final String[] encodings;

    private static final String EBCDIC_POSTFIX = " (EBCDIC)";


    static {
        final List list = new ArrayList<>();
        list.add("UTF-8");
        list.add("UTF-16");
        list.add("UTF-16BE");
        list.add("UTF-16LE");
        list.add("ASCII");

        for (int i = 1; i <= 16; i++) {
            list.add("ISO-8859-" + i);
        }

        for (int i = 1250; i <= 1258; i++) {
            list.add("Windows-" + i);
        }

        for (int i = 1140; i <= 1149; i++) {
            list.add(FixedWidthDatastore.EBCDIC_PREFIX + i + EBCDIC_POSTFIX);
        }

        encodings = list.toArray(new String[list.size()]);
    }

    public CharSetEncodingComboBox() {
        super(encodings);
        setEditable(true);

        final String defaultCharset = Charset.defaultCharset().name();
        setSelectedItem(defaultCharset);
    }

    public String autoDetectEncoding(final byte[] bytes) {
        final CharsetDetector cd = new CharsetDetector();
        cd.setText(bytes);
        final CharsetMatch charsetMatch = cd.detect();
        final String charSet = charsetMatch.getName();

        final int confidence = charsetMatch.getConfidence();
        logger.info("CharsetMatch: {} ({}% confidence)", charSet, confidence);
        setSelectedItem(charSet);
        return charSet;
    }

    @Override
    public String getSelectedItem() {
        return super.getSelectedItem().replace(EBCDIC_POSTFIX, "");
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy