All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fuzzycsv.rdbms.FuzzyCSVDbExporter.groovy Maven / Gradle / Ivy

Go to download

A groovy/java tabular Data (from CSV,SQL,JSON) processing library that supports fuzzy column matching,tranformations/merging/querying etc

There is a newer version: 1.9.1-groovy4
Show newest version
package fuzzycsv.rdbms

import fuzzycsv.FuzzyCSVTable
import fuzzycsv.nav.Navigator
import fuzzycsv.rdbms.stmt.DefaultSqlRenderer
import fuzzycsv.rdbms.stmt.MySqlRenderer
import fuzzycsv.rdbms.stmt.SqlDialect
import fuzzycsv.rdbms.stmt.SqlRenderer
import groovy.sql.Sql
import groovy.transform.CompileStatic
import groovy.transform.ToString
import org.apache.commons.lang3.tuple.Pair
import org.slf4j.Logger
import org.slf4j.LoggerFactory

import java.sql.Connection
import java.util.concurrent.Callable

import static fuzzycsv.rdbms.FuzzyCsvDbInserter.inTicks

class FuzzyCSVDbExporter {

    private static Logger log = LoggerFactory.getLogger(FuzzyCSVDbExporter)

    Connection connection
    int defaultDecimals = 6

    SqlRenderer sqlRenderer
    ExportParams exportParams

    FuzzyCSVDbExporter(ExportParams params) {
        this.exportParams = params
        mayBeSetRenderer()
    }

    FuzzyCSVDbExporter(Connection c, ExportParams params) {
        this(params)
        this.connection = c
    }


    ExportResult dbExport(FuzzyCSVTable table) {
        assert table.name() != null

        ExportResult r = new ExportResult(createdTable: false)
        r.exportedData = table

        if (exportParams.exportFlags.contains(DbExportFlags.CREATE)) {
            createTable(table)
            r.createdTable = true
        }

        if (!r.createdTable && exportParams.exportFlags.contains(DbExportFlags.CREATE_IF_NOT_EXISTS)) {
            r.createdTable = createTableIfNotExists(table)
        }

        if (exportParams.exportFlags.contains(DbExportFlags.INSERT)) {
            def data = insertData(table)
            r.primaryKeys = data
        }

        return r

    }

    void mayBeSetRenderer() {
        sqlRenderer = resolveSqlRenderer(exportParams)
    }

    FuzzyCSVTable insertData(FuzzyCSVTable table) {

        def idList = doInsertData(table, exportParams.pageSize)

        return toPks(idList)

    }

    FuzzyCSVTable toPks(List> lists) {
        if (!lists || !lists[0]) {
            return FuzzyCSVTable.withHeader('pk')
        }

        def first = lists.first()

        def headers = (0..first.size() - 1).collect { "pk_$it".toString() }

        return FuzzyCSVTable.tbl([headers, *lists])
    }


    boolean createTableIfNotExists(FuzzyCSVTable table) {
        def exists = DDLUtils.tableExists(connection, table.tableName)
        if (!exists) {
            createTable(table)
            return true
        }
        return false
    }

    void createTable(FuzzyCSVTable table) {

        def ddl = createDDL(table)

        log.trace("creating table [$ddl]")
        sql().execute(ddl)
    }


    private Sql gSql

    private Sql sql() {
        assert connection != null
        if (gSql == null) gSql = new Sql(connection)
        return gSql
    }


    String createDDL(FuzzyCSVTable table) {
        def name = table.tableName
        assert name != null, "tables should contain name"

        def columns = createColumns(table)

        return sqlRenderer.createTable(name, columns)


    }

    List createColumns(FuzzyCSVTable table) {
        def header = table.header

        def start = Navigator.atTopLeft(table)

        def columns = header.collect { name ->
            def firstValue = start.to(name).downIter().skip()
                    .find { it.value() != null }?.value()

            if (exportParams.exportFlags.contains(DbExportFlags.USE_DECIMAL_FOR_INTS) && firstValue instanceof Number)
                firstValue = firstValue as BigDecimal

            def column = resolveType(name, firstValue)

            if (exportParams.primaryKeys?.contains(name))
                column.isPrimaryKey = true

            if (exportParams.autoIncrement?.contains(name))
                column.autoIncrement = true

            return column
        }

        return columns
    }


    def restructureTable(FuzzyCSVTable table) {
        def tableColumns = createColumns(table)
        def structureSync = new DbColumnSync(columns: tableColumns,
                gSql: sql(), tableName: table.tableName, table: table, sqlRenderer: sqlRenderer)

        structureSync.sync()


    }


    List> doInsertData(FuzzyCSVTable table, int pageSize) {

        def inserts = FuzzyCsvDbInserter.generateInserts(sqlRenderer,pageSize, table, table.tableName)

        def rt = []
        for (Pair> q in inserts) {
            doWithRestructure(table) {
                logQuery(q)
                def insert = sql().executeInsert(q.left, q.right)
                rt.addAll(insert)
            }
        }


        return rt
    }

    private logQuery(Pair> it) {
        log.trace("executing [$it.left] params $it.right")
    }

    @CompileStatic
    def updateData(FuzzyCSVTable table, String... identifiers) {
        def queries = FuzzyCsvDbInserter.generateUpdate(sqlRenderer, table, table.name(), identifiers)

        for (q in queries) {
            doWithRestructure(table) {
                logQuery(q)
                sql().executeUpdate(q.left, q.right)
            }
        }
    }

    @CompileStatic
    private  T doWithRestructure(FuzzyCSVTable table, Callable operation) {

        try {
            return operation.call()
        } catch (x) {
            if (!exportParams.exportFlags.contains(DbExportFlags.RESTRUCTURE)) throw x

            try {
                log.warn("error while exporting [${table.name()}] trying to restructure: $x")

                restructureTable(table)
                return operation.call()

            } catch (Exception x2) {
                x.addSuppressed(x2)
                throw x2
            }
        }

    }


    Column resolveType(String name, String data) {
        new Column(type: 'varchar', name: name, size: Math.max(data.size(), 255))
    }


    Column resolveType(String name, Integer data) {
        new Column(type: 'bigint', name: name, size: 11)
    }

    Column resolveType(String name, BigInteger data) {
        new Column(type: 'bigint', name: name, size: 11)
    }

    Column resolveType(String name, Long data) {
        new Column(type: 'bigint', name: name, size: 11)
    }

    Column resolveType(String name, BigDecimal data) {
        def decimals = Math.max(data.scale(), defaultDecimals)
        def wholeNumbers = data.precision() - data.scale()


        def scale = bigDecimalScale(wholeNumbers, decimals)

        new Column(type: 'decimal', name: name, size: scale.precision, decimals: scale.scale)
    }

    Column resolveType(String name, Number data) {
        resolveType(name, data as BigDecimal)
    }

    Column resolveType(String name, Boolean data) {
        new Column(type: 'boolean', name: name)
    }

    Column resolveType(String name, byte[] data) {
        new Column(type: 'boolean', name: name)
    }

    Column resolveType(String name, Object data) {
        new Column(type: 'varchar', name: name, size: 255)
    }

    static Map bigDecimalScale(int wholeNumbers, int decimals) {
        def precision = wholeNumbers + decimals
        [precision: precision, scale: decimals]
    }

    @CompileStatic
    SqlRenderer resolveSqlRenderer(ExportParams params) {
        assert params.dialect || params.sqlRenderer, "a sql dialect or sql renderer should be set "

        if (params.sqlRenderer) return params.sqlRenderer

        switch (params.dialect) {
            case SqlDialect.DEFAULT: return DefaultSqlRenderer.instance
            case SqlDialect.MYSQL: return MySqlRenderer.instance
            default: throw new UnsupportedOperationException("dialect $params.dialect not yet supported")
        }

    }

    @ToString(includePackage = false)
    static class Column {
        String name
        String type
        int size
        int decimals
        boolean isPrimaryKey
        boolean autoIncrement

        @Override
        String toString() {
            def primaryKeyStr = isPrimaryKey ? 'primary key' : ''

            if (autoIncrement) {
                primaryKeyStr = "$primaryKeyStr AUTO_INCREMENT"
            }

            if (decimals > 0)
                return "${inTicks(name)} $type($size, $decimals) ${primaryKeyStr}"

            if (size > 0)
                return "${inTicks(name)} $type($size) ${primaryKeyStr}"

            return "${inTicks(name)} $type ${primaryKeyStr}"


        }

        String sqlString() {
            return toString()
        }
    }

    static class ExportResult {
        boolean createdTable
        FuzzyCSVTable primaryKeys
        FuzzyCSVTable exportedData

        FuzzyCSVTable mergeKeys() {
            primaryKeys.joinOnIdx(exportedData)
        }
    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy