All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.h2gis.functions.io.tsv.TSVDriverFunction Maven / Gradle / Ivy

/**
 * H2GIS is a library that brings spatial support to the H2 Database Engine
 * . H2GIS is developed by CNRS
 * .
 *
 * This code is part of the H2GIS project. H2GIS is free software; 
 * you can redistribute it and/or modify it under the terms of the GNU
 * Lesser General Public License as published by the Free Software Foundation;
 * version 3.0 of the License.
 *
 * H2GIS is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 * for more details .
 *
 *
 * For more information, please consult: 
 * or contact directly: info_at_h2gis.org
 */


package org.h2gis.functions.io.tsv;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.channels.FileChannel;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import org.h2.tools.Csv;
import org.h2gis.functions.io.utility.FileUtil;
import org.h2gis.api.DriverFunction;
import org.h2gis.api.ProgressVisitor;
import org.h2gis.utilities.JDBCUtilities;
import org.h2gis.utilities.TableLocation;

/**
 * This driver allow to import and export the Tab Separated Values (TSV): a
 * format for tabular data exchange
 *
 * A file in TSV format consists of lines. Each line contain fields separated
 * from each other by TAB characters (horizontal tab, HT, Ascii control code 9).
 *
 * "Field" means here just any string of characters, excluding TABs. The point
 * is simply that TABs divide a line into pieces, components.
 *
 * Each line must contain the same number of fields.
 *
 * The first line contains the names for the fields (on all lines), i.e. column
 * headers.
 *
 * Please read : http://www.cs.tut.fi/~jkorpela/TSV.html
 *
 * @author Erwan Bocher
 */
public class TSVDriverFunction implements DriverFunction{

    public static String DESCRIPTION = "TSV file (Tab Separated Values)";
    private static final int BATCH_MAX_SIZE = 100;
   
    
    @Override
    public IMPORT_DRIVER_TYPE getImportDriverType() {
            return IMPORT_DRIVER_TYPE.COPY;
    }

    @Override
    public String[] getImportFormats() {
        return new String[]{"tsv"};
    }

    @Override
    public String[] getExportFormats() {
        return new String[]{"tsv"};
    }

    @Override
    public String getFormatDescription(String format) {
        if (format.equalsIgnoreCase("tsv")) {
            return DESCRIPTION;
        } else {
            return "";
        }
    }

    @Override
    public boolean isSpatialFormat(String extension) {
        return false;
    }

    @Override
    public void exportTable(Connection connection, String tableReference, File fileName, ProgressVisitor progress) throws SQLException, IOException {
        if (FileUtil.isExtensionWellFormated(fileName, "tsv")) {
            final boolean isH2 = JDBCUtilities.isH2DataBase(connection.getMetaData());
            TableLocation location = TableLocation.parse(tableReference, isH2);
            Statement st = null;
            try {
                st = connection.createStatement();
                Csv csv = new Csv();
                csv.setFieldDelimiter('\t');
                csv.setFieldSeparatorWrite("\t");
                csv.write(fileName.getPath(), st.executeQuery("SELECT * FROM " + location.toString()), null);
            } finally {
                if (st != null) {
                    st.close();
                }
            }
        } else {
            throw new SQLException("Only .tsv extension is supported");
        }

    }

    @Override
    public void importFile(Connection connection, String tableReference, File fileName, ProgressVisitor progress) throws SQLException, IOException {
        if (FileUtil.isFileImportable(fileName, "tsv")) {
            final boolean isH2 = JDBCUtilities.isH2DataBase(connection.getMetaData());
            TableLocation requestedTable = TableLocation.parse(tableReference, isH2);
            String table = requestedTable.getTable();
            
            int AVERAGE_NODE_SIZE = 500;
            FileInputStream fis = new FileInputStream(fileName);
            FileChannel fc = fis.getChannel();
            long fileSize = fc.size();
            // Given the file size and an average node file size.
            // Skip how many nodes in order to update progression at a step of 1%
            long readFileSizeEachNode = Math.max(1, (fileSize / AVERAGE_NODE_SIZE) / 100);            
            int average_row_size = 0;
            
            Csv csv = new Csv();
            csv.setFieldDelimiter('\t');
            csv.setFieldSeparatorRead('\t');
            ResultSet reader = csv.read(new BufferedReader(new InputStreamReader(fis)), null);
            ResultSetMetaData metadata = reader.getMetaData();
            int columnCount = metadata.getColumnCount();

            StringBuilder createTable = new StringBuilder("CREATE TABLE ");
            createTable.append(table).append("(");

            StringBuilder insertTable = new StringBuilder("INSERT INTO ");
            insertTable.append(table).append(" VALUES(");

            for (int i = 0; i < columnCount; i++) {
                createTable.append(metadata.getColumnName(i + 1)).append(" VARCHAR,");
                insertTable.append("?,");
            }
            createTable.append(")");
            insertTable.append(")");

            Statement stmt = connection.createStatement();
            stmt.execute(createTable.toString());
            stmt.close();

            PreparedStatement pst = connection.prepareStatement(insertTable.toString());
            long batchSize = 0;
            try {
                while (reader.next()) {
                    if (progress.isCanceled()) {
                        throw new SQLException("Canceled by user");
                    }
                    
                    for (int i = 0; i < columnCount; i++) {
                        pst.setString(i + 1, reader.getString(i + 1));
                    }
                    pst.addBatch();
                    batchSize++;
                    if (batchSize >= BATCH_MAX_SIZE) {
                        pst.executeBatch();
                        pst.clearBatch();
                        batchSize = 0;
                    }
                    
                    if (average_row_size++ % readFileSizeEachNode == 0) {
                        // Update Progress
                        try {
                            progress.setStep((int) (((double) fc.position() / fileSize) * 100));
                        } catch (IOException ex) {
                            // Ignore
                        }
                    }
                }
                if (batchSize > 0) {
                    pst.executeBatch();
                }
            } finally {
                pst.close();
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy