All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.embulk.input.tsurugidb.TsurugiInputPlugin Maven / Gradle / Ivy

The newest version!
package org.embulk.input.tsurugidb;

import static java.util.Locale.ENGLISH;

import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import org.embulk.config.ConfigDiff;
import org.embulk.config.ConfigException;
import org.embulk.config.ConfigSource;
import org.embulk.config.TaskReport;
import org.embulk.config.TaskSource;
import org.embulk.input.tsurugidb.common.DbColumnOption;
import org.embulk.input.tsurugidb.common.ToStringMap;
import org.embulk.input.tsurugidb.executor.PreparedQuery;
import org.embulk.input.tsurugidb.getter.ColumnGetter;
import org.embulk.input.tsurugidb.getter.ColumnGetterFactory;
import org.embulk.input.tsurugidb.option.TsurugiCommitType;
import org.embulk.input.tsurugidb.option.TsurugiSessionShutdownType;
import org.embulk.input.tsurugidb.select.BatchSelect;
import org.embulk.input.tsurugidb.select.SelectMethod;
import org.embulk.spi.BufferAllocator;
import org.embulk.spi.Column;
import org.embulk.spi.DataException;
import org.embulk.spi.Exec;
import org.embulk.spi.InputPlugin;
import org.embulk.spi.PageBuilder;
import org.embulk.spi.PageOutput;
import org.embulk.spi.Schema;
import org.embulk.util.config.Config;
import org.embulk.util.config.ConfigDefault;
import org.embulk.util.config.ConfigMapper;
import org.embulk.util.config.ConfigMapperFactory;
import org.embulk.util.config.Task;
import org.embulk.util.config.TaskMapper;
import org.embulk.util.config.modules.ZoneIdModule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.databind.JsonNode;
import com.tsurugidb.tsubakuro.exception.ServerException;

// https://github.com/embulk/embulk-input-jdbc/blob/master/embulk-input-jdbc/src/main/java/org/embulk/input/jdbc/AbstractJdbcInputPlugin.java
public class TsurugiInputPlugin implements InputPlugin {
    private static final Logger logger = LoggerFactory.getLogger(TsurugiInputPlugin.class);

    public static final String TYPE = "tsurugidb";

    protected static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().addModule(ZoneIdModule.withLegacyNames()).build();
    protected static final ConfigMapper CONFIG_MAPPER = CONFIG_MAPPER_FACTORY.createConfigMapper();
    protected static final TaskMapper TASK_MAPPER = CONFIG_MAPPER_FACTORY.createTaskMapper();;

    public interface PluginTask extends Task {
        @Config("endpoint")
        public String getEndpoint();

        @Config("connection_label")
        @ConfigDefault("\"embulk-input-tsurugidb\"")
        public String getConnectionLabel();

        @Config("user")
        @ConfigDefault("null")
        public Optional getUser();

        @Config("password")
        @ConfigDefault("null")
        public Optional getPassword();

        @Config("method")
        @ConfigDefault("\"select\"")
        public SelectMethod getSelectMethod();

        @Config("tx_type")
        @ConfigDefault("\"RTX\"") // OCC, LTX, RTX
        public String getTxType();

        @Config("tx_label")
        @ConfigDefault("\"embulk-input-tsurugidb\"")
        public String getTxLabel();

        @Config("tx_write_preserve")
        @ConfigDefault("[]")
        public List getTxWritePreserve();

        @Config("tx_inclusive_read_area")
        @ConfigDefault("[]")
        public List getTxInclusiveReadArea();

        @Config("tx_exclusive_read_area")
        @ConfigDefault("[]")
        public List getTxExclusiveReadArea();

        @Config("tx_priority")
        @ConfigDefault("null")
        public Optional getTxPriority();

        @Config("commit_type")
        @ConfigDefault("\"default\"")
        public TsurugiCommitType getCommitType();

        @Config("session_keep_alive")
        @ConfigDefault("null")
        public Optional getSessionKeepAlive();

        @Config("session_shutdown_type")
        @ConfigDefault("\"nothing\"")
        public TsurugiSessionShutdownType getSessionShutdownType();

        @Config("options")
        @ConfigDefault("{}")
        public ToStringMap getOptions();

        @Config("table")
        @ConfigDefault("null")
        public Optional getTable();

        public void setTable(Optional normalizedTableName);

        @Config("query")
        @ConfigDefault("null")
        public Optional getQuery();

        @Config("use_raw_query_with_incremental")
        @ConfigDefault("false")
        public boolean getUseRawQueryWithIncremental();

        @Config("select")
        @ConfigDefault("null")
        public Optional getSelect();

        @Config("where")
        @ConfigDefault("null")
        public Optional getWhere();

        @Config("order_by")
        @ConfigDefault("null")
        public Optional getOrderBy();

        @Config("incremental")
        @ConfigDefault("false")
        public boolean getIncremental();

        @Config("incremental_columns")
        @ConfigDefault("[]")
        public List getIncrementalColumns();

        public void setIncrementalColumns(List indexes);

        @Config("last_record")
        @ConfigDefault("null")
        public Optional> getLastRecord();

        // TODO limit_value is necessary to make sure repeated bulk load transactions
        // don't a same record twice or miss records when the column
        // specified at order_by parameter is not unique.
        // For example, if the order_by column is "timestamp created_at"
        // column whose precision is second, the table can include multiple
        // records with the same created_at time. At the first bulk load
        // transaction, it loads a record with created_at=2015-01-02 00:00:02.
        // Then next transaction will use WHERE created_at > '2015-01-02 00:00:02'.
        // However, if another record with created_at=2014-01-01 23:59:59 is
        // inserted between the 2 transactions, the new record will be skipped.
        // To prevent this scenario, we want to specify
        // limit_value=2015-01-02 00:00:00 (exclusive). With this way, as long as
        // a transaction runs after 2015-01-02 00:00:00 + some minutes, we don't
        // skip records. Ideally, to automate the scheduling, we want to set
        // limit_value="today".
        //
        // @Config("limit_value")
        // @ConfigDefault("null")
        // public Optional getLimitValue();

        //// TODO probably limit_rows is unnecessary as long as this has
        // supports parallel execution (partition_by option) and resuming.
        // @Config("limit_rows")
        // @ConfigDefault("null")
        // public Optional getLimitRows();

        @Config("connect_timeout")
        @ConfigDefault("300")
        public int getConnectTimeout();

        @Config("begin_timeout")
        @ConfigDefault("300")
        public int getBeginTimeout();

        @Config("select_timeout")
        @ConfigDefault("300")
        public int getSelectTimeout();

        @Config("update_timeout")
        @ConfigDefault("300")
        public int getUpdateTimeout();

        @Config("commit_timeout")
        @ConfigDefault("300")
        public int getCommitTimeout();

        @Config("session_shutdown_timeout")
        @ConfigDefault("300")
        public int getSessionShutdownTimeout();

        @Config("socket_timeout")
        @ConfigDefault("1800")
        public int getSocketTimeout();

        @Config("fetch_rows")
        @ConfigDefault("10000")
        // TODO set minimum number
        public int getFetchRows();

        // TODO parallel execution using "partition_by" config

        @Config("column_options")
        @ConfigDefault("{}")
        public Map getColumnOptions();

        @Config("default_timezone")
        @ConfigDefault("\"UTC\"")
        public ZoneId getDefaultTimeZone();

        @Config("default_column_options")
        @ConfigDefault("{}")
        public Map getDefaultColumnOptions();

        @Config("before_setup")
        @ConfigDefault("null")
        public Optional getBeforeSetup();

        @Config("before_select")
        @ConfigDefault("null")
        public Optional getBeforeSelect();

        @Config("after_select")
        @ConfigDefault("null")
        public Optional getAfterSelect();

        public void setBuiltQuery(PreparedQuery query);

        public PreparedQuery getBuiltQuery();

        public void setQuerySchema(TsurugiQuerySchema schema);

        public TsurugiQuerySchema getQuerySchema();
    }

    protected TsurugiInputConnection newConnection(PluginTask task) throws ServerException {
        return TsurugiInputConnection.newConnection(task);
    }

    @Override
    public ConfigDiff transaction(ConfigSource config, Control control) {
        final PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class);

        if (task.getIncremental()) {
            if (task.getOrderBy().isPresent()) {
                throw new ConfigException("order_by option must not be set if incremental is true");
            }
        } else {
            if (!task.getIncrementalColumns().isEmpty()) {
                throw new ConfigException("'incremental: true' must be set if incremental_columns is set");
            }
        }

        Schema schema;
        try (var con = newConnection(task)) {
            if (task.getBeforeSetup().isPresent()) {
                var executor = con.getSqlExecutor();
                executor.executeUpdate(task.getBeforeSetup().get());
                executor.commit();
            }

            // TODO incremental_columns is not set => get primary key
            schema = setupTask(con, task);
        } catch (ServerException e) {
            throw new ServerRuntimeException(e);
        }

        return buildNextConfigDiff(task, control.run(task.toTaskSource(), schema, 1));
    }

    protected Schema setupTask(TsurugiInputConnection con, PluginTask task) throws ServerException {
        if (task.getTable().isPresent()) {
            String actualTableName = normalizeTableNameCase(con, task.getTable().get());
            task.setTable(Optional.of(actualTableName));
        }

        var selectMethod = task.getSelectMethod();
        switch (selectMethod) {
        case SELECT:
            var sqlExecutor = con.getSqlExecutor();
            sqlExecutor.setupTask(task);
            break;
        case SCAN:
            var kvsExecutor = con.getKvsExecutor();
            kvsExecutor.setupTask(task);
            break;
        default:
            throw new AssertionError(selectMethod);
        }

        var querySchema = task.getQuerySchema();

        // validate column_options
        newColumnGetters(task, querySchema, null);

        ColumnGetterFactory factory = newColumnGetterFactory(null, task.getDefaultTimeZone());
        final var columns = new ArrayList();
        for (int i = 0; i < querySchema.getCount(); i++) {
            TsurugiColumn column = querySchema.getColumn(i);
            DbColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), column, factory.getJdbcType(column));
            columns.add(new Column(i, column.getName(), factory.newColumnGetter(column, columnOption, column.getSqlType()).getToType()));
        }
        return new Schema(Collections.unmodifiableList(columns));
    }

    private String normalizeTableNameCase(TsurugiInputConnection con, String tableName) throws ServerException {
        if (con.tableExists(tableName)) {
            return tableName;
        } else {
            String upperTableName = tableName.toUpperCase();
            String lowerTableName = tableName.toLowerCase();
            boolean upperExists = con.tableExists(upperTableName);
            boolean lowerExists = con.tableExists(lowerTableName);
            if (upperExists && lowerExists) {
                throw new ConfigException(String.format("Cannot specify table '%s' because both '%s' and '%s' exist.", tableName, upperTableName, lowerTableName));
            } else if (upperExists) {
                return upperTableName;
            } else if (lowerExists) {
                return lowerTableName;
            } else {
                // fallback to the given table name. this may throw error later at
                // getSchemaOfQuery
                return tableName;
            }
        }
    }

    @Override
    public ConfigDiff resume(TaskSource taskSource, Schema schema, int taskCount, Control control) {
        final PluginTask task = TASK_MAPPER.map(taskSource, PluginTask.class);

        return buildNextConfigDiff(task, control.run(taskSource, schema, taskCount));
    }

    @Override
    public ConfigDiff guess(ConfigSource config) {
        return CONFIG_MAPPER_FACTORY.newConfigDiff();
    }

    protected ConfigDiff buildNextConfigDiff(PluginTask task, List reports) {
        final ConfigDiff next = CONFIG_MAPPER_FACTORY.newConfigDiff();
        if (reports.size() > 0 && reports.get(0).has("last_record")) {
            // |reports| are from embulk-core, then their backend is Jackson on the
            // embulk-core side.
            // To render |JsonNode| (that is on the plugin side) from |reports|, they need
            // to be rebuilt.
            final TaskReport report = CONFIG_MAPPER_FACTORY.rebuildTaskReport(reports.get(0));
            next.set("last_record", report.get(JsonNode.class, "last_record"));
        } else if (task.getLastRecord().isPresent()) {
            next.set("last_record", task.getLastRecord().get());
        }
        return next;
    }

    @Override
    public void cleanup(TaskSource taskSource, Schema schema, int taskCount, List successTaskReports) {
        // do nothing
    }

    private static class LastRecordStore {
        private final JsonNode[] lastValues;
        private final List columnNames;

        public LastRecordStore(List columnNames) {
            this.lastValues = new JsonNode[columnNames.size()];
            this.columnNames = columnNames;
        }

        public void accept(Map getters) {
            int i = 0;
            for (String name : columnNames) {
                lastValues[i] = getters.get(name).encodeToJson();
                i++;
            }
        }

        public List getList() {
            final var values = new ArrayList();
            for (int i = 0; i < lastValues.length; i++) {
                if (lastValues[i] == null || lastValues[i].isNull()) {
                    throw new DataException(String.format(ENGLISH, "incremental_columns can't include null values but the last row is null at column '%s'", columnNames.get(i)));
                }
                values.add(lastValues[i]);
            }
            return Collections.unmodifiableList(values);
        }
    }

    @Override
    public TaskReport run(TaskSource taskSource, Schema schema, int taskIndex, PageOutput output) {
        final PluginTask task = TASK_MAPPER.map(taskSource, PluginTask.class);

        PreparedQuery builtQuery = task.getBuiltQuery();
        TsurugiQuerySchema querySchema = task.getQuerySchema();
        BufferAllocator allocator = Exec.getBufferAllocator();
        PageBuilder pageBuilder = Exec.getPageBuilder(allocator, schema, output);

        long totalRows = 0;

        LastRecordStore lastRecordStore = null;

        try (TsurugiInputConnection con = newConnection(task)) {
            var executor = con.getSqlExecutor();
            if (task.getBeforeSelect().isPresent()) {
                executor.executeUpdate(task.getBeforeSelect().get());
            }

            Map getters = newColumnGetters(task, querySchema, pageBuilder);
            try (BatchSelect cursor = newSelectCursor(task, con, builtQuery, getters, task.getFetchRows(), task.getSocketTimeout())) {
                while (true) {
                    long rows = cursor.fetch(getters, pageBuilder, logger);
                    if (rows <= 0L) {
                        break;
                    }
                    totalRows += rows;
                }
            }

            if (task.getIncremental() && totalRows > 0) {
                lastRecordStore = new LastRecordStore(task.getIncrementalColumns());
                lastRecordStore.accept(getters);
            }

            pageBuilder.finish();

            // after_select runs after pageBuilder.finish because pageBuilder.finish may
            // fail.
            // TODO Output plugin's transaction might still fail. In that case, after_select
            // is
            // already done but output plugin didn't commit the data to the target storage.
            // This means inconsistency between data source and destination. To avoid this
            // issue, we need another option like `after_commit` that runs after output
            // plugin's
            // commit. after_commit can't run in the same transaction with SELECT. So,
            // after_select gets values and store them in TaskReport, and after_commit take
            // them as placeholder. Or, after_select puts values to an intermediate table,
            // and
            // after_commit moves those values to the actual table.
            if (task.getAfterSelect().isPresent()) {
                executor.executeUpdate(task.getAfterSelect().get());
            }
            executor.commit();

        } catch (ServerException e) {
            throw new ServerRuntimeException(e);
        }

        final TaskReport report = CONFIG_MAPPER_FACTORY.newTaskReport();
        if (lastRecordStore != null) {
            report.set("last_record", lastRecordStore.getList());
        }

        return report;
    }

    protected BatchSelect newSelectCursor(PluginTask task, TsurugiInputConnection con, PreparedQuery preparedQuery, Map getters, int fetchRows, int queryTimeout)
            throws ServerException {
        var selectMethod = task.getSelectMethod();
        switch (selectMethod) {
        case SELECT:
            return con.getSqlExecutor().newBatchSelect(preparedQuery, getters, fetchRows, queryTimeout);
        case SCAN:
            return con.getKvsExecutor().newBatchSelect(preparedQuery, getters, fetchRows, queryTimeout);
        default:
            throw new AssertionError(selectMethod);
        }
    }

    protected ColumnGetterFactory newColumnGetterFactory(PageBuilder pageBuilder, ZoneId dateTimeZone) {
        return new ColumnGetterFactory(pageBuilder, dateTimeZone);
    }

    private Map newColumnGetters(PluginTask task, TsurugiQuerySchema querySchema, PageBuilder pageBuilder) {
        ColumnGetterFactory factory = newColumnGetterFactory(pageBuilder, task.getDefaultTimeZone());
        final var getters = new LinkedHashMap();
        for (TsurugiColumn column : querySchema.getColumns()) {
            DbColumnOption columnOption = columnOptionOf(task.getColumnOptions(), task.getDefaultColumnOptions(), column, factory.getJdbcType(column));
            getters.put(column.getName(), factory.newColumnGetter(column, columnOption, column.getSqlType()));
        }
        return Collections.unmodifiableMap(getters);
    }

    private static DbColumnOption columnOptionOf(Map columnOptions, Map defaultColumnOptions, TsurugiColumn targetColumn, String targetColumnSQLType) {
        DbColumnOption columnOption = columnOptions.get(targetColumn.getName());
        if (columnOption == null) {
            String foundName = null;
            for (Map.Entry entry : columnOptions.entrySet()) {
                if (entry.getKey().equalsIgnoreCase(targetColumn.getName())) {
                    if (columnOption != null) {
                        throw new ConfigException(String.format("Cannot specify column '%s' because both '%s' and '%s' exist in column_options.", targetColumn.getName(), foundName, entry.getKey()));
                    }
                    foundName = entry.getKey();
                    columnOption = entry.getValue();
                }
            }
        }

        if (columnOption != null) {
            return columnOption;
        }
        final DbColumnOption defaultColumnOption = defaultColumnOptions.get(targetColumnSQLType);
        if (defaultColumnOption != null) {
            return defaultColumnOption;
        }
        return CONFIG_MAPPER.map(CONFIG_MAPPER_FACTORY.newConfigSource(), DbColumnOption.class);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy