All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.util.DatastoreCreationUtil Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.util;

import java.io.File;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;

import org.apache.commons.io.FilenameUtils;
import org.apache.metamodel.csv.CsvConfiguration;
import org.apache.metamodel.util.FileResource;
import org.datacleaner.connection.AccessDatastore;
import org.datacleaner.connection.CsvDatastore;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.DatastoreCatalog;
import org.datacleaner.connection.DbaseDatastore;
import org.datacleaner.connection.ExcelDatastore;
import org.datacleaner.connection.JsonDatastore;
import org.datacleaner.connection.OdbDatastore;
import org.datacleaner.connection.SasDatastore;
import org.datacleaner.connection.XmlDatastore;
import org.datacleaner.user.MutableDatastoreCatalog;

public class DatastoreCreationUtil {

    public enum FileDatastoreEnum {
        CSV("csv", "tsv", "txt", "dat"), EXCEL("xls", "xlsx"), ACCESS("mdb"),

        // TODO: Add .sas7bdat file support
        SAS(), DBASE("dbf"), XML("xml"), JSON("json"), OPENOFFICE("odb");

        private final List _extensions;

        private FileDatastoreEnum(String... extensions) {
            _extensions = Arrays.asList(extensions);
        }

        protected static FileDatastoreEnum getDatastoreTypeFromFile(File file) {
            final String extension = FilenameUtils.getExtension(file.getName());

            for (FileDatastoreEnum datastoreType : EnumSet.allOf(FileDatastoreEnum.class)) {
                if (datastoreType._extensions.contains(extension.toLowerCase())) {
                    return datastoreType;
                }
            }

            return null;
        }
    }

    public static FileDatastoreEnum inferDatastoreTypeFromFile(File file) {
        if (file.isDirectory()) {
            return FileDatastoreEnum.SAS;
        }

        return FileDatastoreEnum.getDatastoreTypeFromFile(file);
    }

    public static Datastore createAndAddUniqueDatastoreFromFile(DatastoreCatalog catalog, File file) {
        String name = file.getName();
        if (catalog.containsDatastore(name)) {
            final String originalName = name;
            int prefix = 1;
            do {
                name = originalName + "_" + prefix++;
            } while (catalog.containsDatastore(name));
        }
        Datastore datastore = createDatastoreFromFile(file, name);
        if (catalog instanceof MutableDatastoreCatalog) {
            ((MutableDatastoreCatalog) catalog).addDatastore(datastore);
        }
        return datastore;
    }

    public static Datastore createDatastoreFromFile(File file, String datastoreName) {
        return createDatastoreFromEnum(inferDatastoreTypeFromFile(file), file, datastoreName);
    }

    public static Datastore createDatastoreFromEnum(FileDatastoreEnum fileDatastore, File file, String datastoreName) {
        final String filename = file.getAbsolutePath();
        if (fileDatastore == null) {
            throw new IllegalArgumentException("Unrecognized file type for: " + filename);
        }
        final FileResource resource = new FileResource(file);

        
        switch (fileDatastore) {
        case CSV:
            final CsvConfigurationDetection detection = new CsvConfigurationDetection(resource);
            final CsvConfiguration csvConfiguration = detection.suggestCsvConfiguration();
            return new CsvDatastore(datastoreName, resource, csvConfiguration);
        case EXCEL:
            return new ExcelDatastore(datastoreName, new FileResource(filename), filename);
        case ACCESS:
            return new AccessDatastore(datastoreName, filename);
        case SAS:
            return new SasDatastore(datastoreName, file);
        case DBASE:
            return new DbaseDatastore(datastoreName, filename);
        case JSON:
            return new JsonDatastore(datastoreName, resource);
        case OPENOFFICE:
            return new OdbDatastore(datastoreName, filename);
        case XML:
            return new XmlDatastore(datastoreName, filename);
        }

        throw new IllegalArgumentException("No such datastore type");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy