All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.windows.CsvDatastoreDialog Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.windows;

import java.awt.Image;
import java.awt.event.ItemEvent;
import java.awt.event.ItemListener;
import java.util.List;
import java.util.Map.Entry;

import javax.inject.Inject;
import javax.swing.JCheckBox;
import javax.swing.JComboBox;
import javax.swing.JComponent;
import javax.swing.SwingWorker;
import javax.swing.filechooser.FileFilter;

import org.apache.metamodel.csv.CsvConfiguration;
import org.apache.metamodel.util.FileHelper;
import org.apache.metamodel.util.Resource;
import org.datacleaner.bootstrap.WindowContext;
import org.datacleaner.configuration.DataCleanerConfiguration;
import org.datacleaner.connection.CsvDatastore;
import org.datacleaner.guice.Nullable;
import org.datacleaner.user.MutableDatastoreCatalog;
import org.datacleaner.user.UserPreferences;
import org.datacleaner.util.CsvConfigurationDetection;
import org.datacleaner.util.ErrorUtils;
import org.datacleaner.util.FileFilters;
import org.datacleaner.util.IconUtils;
import org.datacleaner.util.ImmutableEntry;
import org.datacleaner.util.StringUtils;
import org.datacleaner.util.WidgetUtils;
import org.datacleaner.widgets.CharSetEncodingComboBox;
import org.datacleaner.widgets.DCComboBox;
import org.datacleaner.widgets.DCComboBox.Listener;
import org.datacleaner.widgets.HeaderLineComboBox;
import org.datacleaner.widgets.ResourceSelector;
import org.datacleaner.widgets.ResourceTypePresenter;

/**
 * Dialog for setting up CSV datastores.
 */
public final class CsvDatastoreDialog extends AbstractResourceBasedDatastoreDialog {

    private static final long serialVersionUID = 1L;

    private static final String SEPARATOR_TAB = "Tab (\\t)";
    private static final String SEPARATOR_COMMA = "Comma (,)";
    private static final String SEPARATOR_SEMICOLON = "Semicolon (;)";
    private static final String SEPARATOR_PIPE = "Pipe (|)";

    private static final String QUOTE_DOUBLE_QUOTE = "Double quote (\")";
    private static final String QUOTE_SINGLE_QUOTE = "Single quote (')";
    private static final String QUOTE_NONE = "(None)";

    private static final String ESCAPE_BACKSLASH = "Backslash (\\)";
    private static final String ESCAPE_NONE = "(None)";

    private final JComboBox _separatorCharField;
    private final JComboBox _quoteCharField;
    private final JComboBox _escapeCharField;
    private final HeaderLineComboBox _headerLineComboBox;
    private final CharSetEncodingComboBox _encodingComboBox;
    private final JCheckBox _failOnInconsistenciesCheckBox;
    private final JCheckBox _multilineValuesCheckBox;

    private volatile boolean showPreview = true;

    @Inject
    public CsvDatastoreDialog(@Nullable CsvDatastore originalDatastore,
            MutableDatastoreCatalog mutableDatastoreCatalog, WindowContext windowContext,
            DataCleanerConfiguration configuration, UserPreferences userPreferences) {
        super(originalDatastore, mutableDatastoreCatalog, windowContext, configuration, userPreferences);
        _separatorCharField = new JComboBox<>(new String[] { SEPARATOR_COMMA, SEPARATOR_TAB, SEPARATOR_SEMICOLON,
                SEPARATOR_PIPE });
        _separatorCharField.setEditable(true);

        _quoteCharField = new JComboBox<>(new String[] { QUOTE_NONE, QUOTE_DOUBLE_QUOTE, QUOTE_SINGLE_QUOTE });
        _quoteCharField.setEditable(true);

        _escapeCharField = new JComboBox<>(new String[] { ESCAPE_NONE, ESCAPE_BACKSLASH });
        _escapeCharField.setSelectedItem(ESCAPE_BACKSLASH);
        _escapeCharField.setEditable(true);

        _encodingComboBox = new CharSetEncodingComboBox();

        _headerLineComboBox = new HeaderLineComboBox();

        _failOnInconsistenciesCheckBox = new JCheckBox("Fail on inconsistent column count", true);
        _failOnInconsistenciesCheckBox.setOpaque(false);
        _failOnInconsistenciesCheckBox.setForeground(WidgetUtils.BG_COLOR_BRIGHTEST);
        _failOnInconsistenciesCheckBox
                .setToolTipText("Check this checkbox to fail fast in case of inconsistent record lengths found in the CSV file. If not checked, missing fields will be represented by  values.");

        _multilineValuesCheckBox = new JCheckBox("Enable multi-line values?", false);
        _multilineValuesCheckBox.setOpaque(false);
        _multilineValuesCheckBox.setForeground(WidgetUtils.BG_COLOR_BRIGHTEST);
        _multilineValuesCheckBox
                .setToolTipText("Check this checkbox if you want to allow CSV values to span multiple lines. Since this is rare, and comes at a performance penalty, we recommend turning multi-line values off.");

        setSaveButtonEnabled(false);
        showPreview = true;

        if (originalDatastore != null) {
            _failOnInconsistenciesCheckBox.setSelected(originalDatastore.isFailOnInconsistencies());
            _multilineValuesCheckBox.setSelected(originalDatastore.isMultilineValues());
            _encodingComboBox.setSelectedItem(originalDatastore.getEncoding());

            _headerLineComboBox.setSelectedItem(originalDatastore.getHeaderLineNumber());

            Character separatorChar = originalDatastore.getSeparatorChar();
            String separator = null;
            if (separatorChar != null) {
                if (separatorChar == ',') {
                    separator = SEPARATOR_COMMA;
                } else if (separatorChar == ';') {
                    separator = SEPARATOR_SEMICOLON;
                } else if (separatorChar == '|') {
                    separator = SEPARATOR_PIPE;
                } else if (separatorChar == '\t') {
                    separator = SEPARATOR_TAB;
                } else {
                    separator = separatorChar.toString();
                }
            }
            _separatorCharField.setSelectedItem(separator);

            Character quoteChar = originalDatastore.getQuoteChar();
            final String quote;
            if (quoteChar == null) {
                quote = QUOTE_NONE;
            } else {
                if (quoteChar == CsvDatastore.NOT_A_CHAR) {
                    quote = QUOTE_NONE;
                } else if (quoteChar == '"') {
                    quote = QUOTE_DOUBLE_QUOTE;
                } else if (quoteChar == '\'') {
                    quote = QUOTE_SINGLE_QUOTE;
                } else {
                    quote = quoteChar.toString();
                }
            }
            _quoteCharField.setSelectedItem(quote);

            Character escapeChar = originalDatastore.getEscapeChar();
            final String escape;
            if (escapeChar == null) {
                escape = ESCAPE_NONE;
            } else {
                if (escapeChar == CsvDatastore.NOT_A_CHAR) {
                    escape = ESCAPE_NONE;
                } else if (escapeChar == '\\') {
                    escape = ESCAPE_BACKSLASH;
                } else {
                    escape = escapeChar.toString();
                }
            }
            _escapeCharField.setSelectedItem(escape);

            onSettingsUpdated(false, false, getResource());
        }

        // add listeners
        _separatorCharField.addItemListener(new ItemListener() {
            @Override
            public void itemStateChanged(ItemEvent e) {
                onSettingsUpdated(false, false, getResource());
            }
        });
        _quoteCharField.addItemListener(new ItemListener() {
            @Override
            public void itemStateChanged(ItemEvent e) {
                onSettingsUpdated(false, false, getResource());
            }
        });
        _escapeCharField.addItemListener(new ItemListener() {
            @Override
            public void itemStateChanged(ItemEvent e) {
                onSettingsUpdated(false, false, getResource());
            }
        });
        _encodingComboBox.addListener(new Listener() {
            @Override
            public void onItemSelected(String item) {
                onSettingsUpdated(true, false, getResource());
            }
        });
        _headerLineComboBox.addListener(new DCComboBox.Listener() {
            @Override
            public void onItemSelected(Integer item) {
                onSettingsUpdated(false, false, getResource());
            }
        });
    }

    @Override
    protected String getBannerTitle() {
        return "Comma-separated file";
    }

    @Override
    protected void onSelected(Resource resource) {
        onSettingsUpdated(true, true, resource);
    }

    private void onSettingsUpdated(final boolean autoDetectSeparatorAndQuote, final boolean autoDetectEncoding, final Resource resource) {
        if (!validateForm()) {
            return;
        }

        new SwingWorker() {
            @Override
            protected CsvConfiguration doInBackground() throws Exception {
                if (resource == null || !resource.isExists()) {
                    throw new NullPointerException("No source selected, or source does not exist");
                }

                final CsvConfigurationDetection detection = new CsvConfigurationDetection(resource);
                final CsvConfiguration configuration;

                try {
                    if (autoDetectEncoding) {
                        configuration = detection.suggestCsvConfiguration();
                    } else {
                        final String charSet = _encodingComboBox.getSelectedItem();
                        configuration = detection.suggestCsvConfiguration(charSet);
                    }
                } catch (Exception e) {
                    logger.debug("Failed to auto detect CSV configuration", e);
                    throw e;
                }
                return configuration;
            }

            @Override
            protected void done() {
                final CsvConfiguration configuration;
                try {
                    configuration = get();
                } catch (Exception e) {
                    final Throwable error = ErrorUtils.unwrapForPresentation(e);
                    setStatusError(error.getMessage());
                    showPreview = false;
                    return;
                }

                if (autoDetectEncoding) {
                    _encodingComboBox.setSelectedItem(configuration.getEncoding());
                }

                if (autoDetectSeparatorAndQuote) {
                    // set the separator
                    final char separatorChar = configuration.getSeparatorChar();
                    if (separatorChar == ',') {
                        _separatorCharField.setSelectedItem(SEPARATOR_COMMA);
                    } else if (separatorChar == ';') {
                        _separatorCharField.setSelectedItem(SEPARATOR_SEMICOLON);
                    } else if (separatorChar == '\t') {
                        _separatorCharField.setSelectedItem(SEPARATOR_TAB);
                    } else if (separatorChar == '|') {
                        _separatorCharField.setSelectedItem(SEPARATOR_PIPE);
                    } else {
                        _separatorCharField.setSelectedItem(separatorChar + "");
                    }

                    // set the quote
                    final char quoteChar = configuration.getQuoteChar();
                    if (quoteChar == CsvConfiguration.NOT_A_CHAR) {
                        _quoteCharField.setSelectedItem(QUOTE_NONE);
                    } else if (quoteChar == '\'') {
                        _quoteCharField.setSelectedItem(QUOTE_SINGLE_QUOTE);
                    } else if (quoteChar == '"') {
                        _quoteCharField.setSelectedItem(QUOTE_DOUBLE_QUOTE);
                    } else {
                        _quoteCharField.setSelectedItem(quoteChar + "");
                    }

                    // set the escape char
                    char escapeChar = configuration.getEscapeChar();
                    if (escapeChar == '\\') {
                        _escapeCharField.setSelectedItem(ESCAPE_BACKSLASH);
                    }
                }

                showPreview = true;
                validateAndUpdate();
            }
        }.execute();
    }

    @Override
    protected boolean validateForm() {
        Object selectedEncoding = _encodingComboBox.getSelectedItem();
        if (selectedEncoding == null || selectedEncoding.toString().length() == 0) {
            setStatusError("Please select a character encoding!");
            return false;
        }
        return super.validateForm();
    }

    @Override
    protected boolean isPreviewTableEnabled() {
        return true;
    }

    @Override
    protected boolean isPreviewDataAvailable() {
        return showPreview;
    }

    @Override
    protected List> getFormElements() {
        List> result = super.getFormElements();
        result.add(new ImmutableEntry("Character encoding", _encodingComboBox));
        result.add(new ImmutableEntry("Separator", _separatorCharField));
        result.add(new ImmutableEntry("Quote char", _quoteCharField));
        result.add(new ImmutableEntry("Escape char", _escapeCharField));
        result.add(new ImmutableEntry("Header line", _headerLineComboBox));
        result.add(new ImmutableEntry("", _failOnInconsistenciesCheckBox));
        result.add(new ImmutableEntry("", _multilineValuesCheckBox));
        return result;
    }

    public int getHeaderLine() {
        Number headerLineComboValue = _headerLineComboBox.getSelectedItem();
        if (headerLineComboValue != null) {
            int intComboValue = headerLineComboValue.intValue();
            if (intComboValue < 0) {
                return CsvConfiguration.NO_COLUMN_NAME_LINE;
            } else {
                // MetaModel's headerline number is 0-based
                return intComboValue;
            }
        } else {
            return CsvConfiguration.DEFAULT_COLUMN_NAME_LINE;
        }
    }

    public String getEncoding() {
        String encoding = _encodingComboBox.getSelectedItem();
        if (StringUtils.isNullOrEmpty(encoding)) {
            encoding = FileHelper.UTF_8_ENCODING;
        }
        return encoding;
    }

    public Character getSeparatorChar() {
        Object separatorItem = _separatorCharField.getSelectedItem();
        if (SEPARATOR_COMMA.equals(separatorItem)) {
            return ',';
        } else if (SEPARATOR_SEMICOLON.equals(separatorItem)) {
            return ';';
        } else if (SEPARATOR_TAB.equals(separatorItem)) {
            return '\t';
        } else if (SEPARATOR_PIPE.equals(separatorItem)) {
            return '|';
        } else {
            return separatorItem.toString().charAt(0);
        }
    }

    public Character getQuoteChar() {
        Object quoteItem = _quoteCharField.getSelectedItem();
        if (QUOTE_NONE.equals(quoteItem)) {
            return CsvDatastore.NOT_A_CHAR;
        } else if (QUOTE_DOUBLE_QUOTE.equals(quoteItem)) {
            return '"';
        } else if (QUOTE_SINGLE_QUOTE.equals(quoteItem)) {
            return '\'';
        } else {
            return quoteItem.toString().charAt(0);
        }
    }

    public Character getEscapeChar() {
        Object escapeItem = _escapeCharField.getSelectedItem();
        if (ESCAPE_NONE.equals(escapeItem)) {
            return CsvDatastore.NOT_A_CHAR;
        } else if (ESCAPE_BACKSLASH.equals(escapeItem)) {
            return '\\';
        } else {
            return escapeItem.toString().charAt(0);
        }
    }

    @Override
    public Image getWindowIcon() {
        return imageManager.getImage(IconUtils.CSV_IMAGEPATH);
    }

    @Override
    public String getWindowTitle() {
        return "CSV file datastore";
    }

    @Override
    protected CsvDatastore getPreviewDatastore(Resource resource) {
        return createDatastore("Preview", resource, false);
    }

    @Override
    protected CsvDatastore createDatastore(String name, Resource resource) {
        boolean failOnInconsistentRecords = _failOnInconsistenciesCheckBox.isSelected();
        return createDatastore(name, resource, failOnInconsistentRecords);
    }

    private CsvDatastore createDatastore(String name, Resource resource, boolean failOnInconsistentRecords) {
        return new CsvDatastore(name, resource, resource.getQualifiedPath(), getQuoteChar(), getSeparatorChar(),
                getEscapeChar(), getEncoding(), failOnInconsistentRecords, isMultilineValues(), getHeaderLine());
    }

    public boolean isMultilineValues() {
        return _multilineValuesCheckBox.isSelected();
    }

    @Override
    protected String getDatastoreIconPath() {
        return IconUtils.CSV_IMAGEPATH;
    }

    @Override
    protected void initializeFileFilters(ResourceSelector resourceSelector) {
        final FileFilter combinedFilter = FileFilters.combined("Any raw data file (.csv, .tsv, .dat, .txt)",
                FileFilters.CSV, FileFilters.TSV, FileFilters.DAT, FileFilters.TXT);
        resourceSelector.addChoosableFileFilter(combinedFilter);
        resourceSelector.addChoosableFileFilter(FileFilters.CSV);
        resourceSelector.addChoosableFileFilter(FileFilters.TSV);
        resourceSelector.addChoosableFileFilter(FileFilters.DAT);
        resourceSelector.addChoosableFileFilter(FileFilters.TXT);
        resourceSelector.addChoosableFileFilter(FileFilters.ALL);
        resourceSelector.setSelectedFileFilter(combinedFilter);

        resourceSelector.addListener(new ResourceTypePresenter.Listener() {
            @Override
            public void onResourceSelected(ResourceTypePresenter presenter, Resource resource) {
                if (FileFilters.TSV.accept(resource)) {
                    _separatorCharField.setSelectedItem(SEPARATOR_TAB);
                }

                onSettingsUpdated(true, true, resource);
            }

            @Override
            public void onPathEntered(ResourceTypePresenter presenter, String path) {
            }
        });
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy