All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gobblin.publisher.JdbcPublisher Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.publisher;

import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.sql.DataSource;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
import gobblin.configuration.WorkUnitState;
import gobblin.source.extractor.JobCommitPolicy;
import gobblin.util.ForkOperatorUtils;
import gobblin.util.jdbc.DataSourceBuilder;
import gobblin.writer.commands.JdbcWriterCommands;
import gobblin.writer.commands.JdbcWriterCommandsFactory;


/**
 * Publishes data into JDBC RDBMS. Expects all the data has been already in staging table.
 */
public class JdbcPublisher extends DataPublisher {
  public static final String JDBC_PUBLISHER_PREFIX = "jdbc.publisher.";
  public static final String JDBC_PUBLISHER_DATABASE_NAME = JDBC_PUBLISHER_PREFIX + "database_name";
  public static final String JDBC_PUBLISHER_FINAL_TABLE_NAME = JDBC_PUBLISHER_PREFIX + "table_name";
  public static final String JDBC_PUBLISHER_REPLACE_FINAL_TABLE = JDBC_PUBLISHER_PREFIX + "replace_table";
  public static final String JDBC_PUBLISHER_USERNAME = JDBC_PUBLISHER_PREFIX + "username";
  public static final String JDBC_PUBLISHER_PASSWORD = JDBC_PUBLISHER_PREFIX + "password";
  public static final String JDBC_PUBLISHER_ENCRYPTION_KEY_LOC = JDBC_PUBLISHER_PREFIX + "encrypt_key_loc";
  public static final String JDBC_PUBLISHER_URL = JDBC_PUBLISHER_PREFIX + "url";
  public static final String JDBC_PUBLISHER_TIMEOUT = JDBC_PUBLISHER_PREFIX + "timeout";
  public static final String JDBC_PUBLISHER_DRIVER = JDBC_PUBLISHER_PREFIX + "driver";

  private static final Logger LOG = LoggerFactory.getLogger(JdbcPublisher.class);
  private final JdbcWriterCommandsFactory jdbcWriterCommandsFactory;

  /**
   * Expects all data is in staging table ready to be published. To validate this, it checks COMMIT_ON_FULL_SUCCESS and PUBLISH_DATA_AT_JOB_LEVEL
   * @param state
   * @param jdbcWriterCommandsFactory
   * @param conn
   */
  @VisibleForTesting
  public JdbcPublisher(State state, JdbcWriterCommandsFactory jdbcWriterCommandsFactory) {
    super(state);
    this.jdbcWriterCommandsFactory = jdbcWriterCommandsFactory;
    validate(getState());
  }

  public JdbcPublisher(State state) {
    this(state, new JdbcWriterCommandsFactory());
    validate(getState());
  }

  /**
   * @param state
   * @throws IllegalArgumentException If job commit policy is not COMMIT_ON_FULL_SUCCESS or is not on PUBLISH_DATA_AT_JOB_LEVEL
   */
  private void validate(State state) {
    JobCommitPolicy jobCommitPolicy = JobCommitPolicy.getCommitPolicy(this.getState().getProperties());
    if (JobCommitPolicy.COMMIT_ON_FULL_SUCCESS != jobCommitPolicy) {
      throw new IllegalArgumentException(this.getClass().getSimpleName()
          + " won't publish as already commited by task. Job commit policy " + jobCommitPolicy);
    }

    if (!state.getPropAsBoolean(ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL,
        ConfigurationKeys.DEFAULT_PUBLISH_DATA_AT_JOB_LEVEL)) {
      throw new IllegalArgumentException(this.getClass().getSimpleName() + " won't publish as "
          + ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL + " is set as false");
    }
  }

  @VisibleForTesting
  public Connection createConnection() {
    DataSource dataSource = DataSourceBuilder.builder().url(this.state.getProp(JDBC_PUBLISHER_URL))
        .driver(this.state.getProp(JDBC_PUBLISHER_DRIVER)).userName(this.state.getProp(JDBC_PUBLISHER_USERNAME))
        .passWord(this.state.getProp(JDBC_PUBLISHER_PASSWORD))
        .cryptoKeyLocation(this.state.getProp(JDBC_PUBLISHER_ENCRYPTION_KEY_LOC)).maxActiveConnections(1)
        .maxIdleConnections(1).state(this.state).build();
    try {
      return dataSource.getConnection();
    } catch (SQLException e) {
      throw new RuntimeException(e);
    }
  }

  @Override
  public void close() throws IOException {}

  @Override
  public void initialize() throws IOException {}

  /**
   * 1. Truncate destination table if requested
   * 2. Move data from staging to destination
   * 3. Update Workunit state
   *
   * TODO: Research on running this in parallel. While testing publishing it in parallel, it turns out delete all from the table locks the table
   * so that copying table threads wait until transaction lock times out and throwing exception(MySQL). Is there a way to avoid this?
   *
   * {@inheritDoc}
   * @see gobblin.publisher.DataPublisher#publishData(java.util.Collection)
   */
  @Override
  public void publishData(Collection states) throws IOException {
    LOG.info("Start publishing data");
    int branches = this.state.getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY, 1);
    Set emptiedDestTables = Sets.newHashSet();

    final Connection conn = createConnection();
    final JdbcWriterCommands commands = this.jdbcWriterCommandsFactory.newInstance(this.state, conn);
    try {
      conn.setAutoCommit(false);

      for (int i = 0; i < branches; i++) {
        final String destinationTable = this.state
            .getProp(ForkOperatorUtils.getPropertyNameForBranch(JDBC_PUBLISHER_FINAL_TABLE_NAME, branches, i));
        final String databaseName =
            this.state.getProp(ForkOperatorUtils.getPropertyNameForBranch(JDBC_PUBLISHER_DATABASE_NAME, branches, i));
        Preconditions.checkNotNull(destinationTable);

        if (this.state.getPropAsBoolean(
            ForkOperatorUtils.getPropertyNameForBranch(JDBC_PUBLISHER_REPLACE_FINAL_TABLE, branches, i), false)
            && !emptiedDestTables.contains(destinationTable)) {
          LOG.info("Deleting table " + destinationTable);
          commands.deleteAll(databaseName, destinationTable);
          emptiedDestTables.add(destinationTable);
        }

        Map> stagingTables = getStagingTables(states, branches, i);
        for (Map.Entry> entry : stagingTables.entrySet()) {
          String stagingTable = entry.getKey();
          LOG.info("Copying data from staging table " + stagingTable + " into destination table " + destinationTable);
          commands.copyTable(databaseName, stagingTable, destinationTable);
          for (WorkUnitState workUnitState : entry.getValue()) {
            workUnitState.setWorkingState(WorkUnitState.WorkingState.COMMITTED);
          }
        }
      }
      LOG.info("Commit publish data");
      conn.commit();
    } catch (Exception e) {
      try {
        LOG.error("Failed publishing. Rolling back.");
        conn.rollback();
      } catch (SQLException se) {
        LOG.error("Failed rolling back.", se);
      }
      throw new RuntimeException("Failed publishing", e);
    } finally {
      try {
        conn.close();
      } catch (SQLException e) {
        throw new RuntimeException(e);
      }
    }
  }

  private static Map> getStagingTables(Collection states,
      int branches, int i) {
    Map> stagingTables = Maps.newHashMap();
    for (WorkUnitState workUnitState : states) {
      String stagingTableKey =
          ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_TABLE, branches, i);
      String stagingTable = Preconditions.checkNotNull(workUnitState.getProp(stagingTableKey));
      List existing = stagingTables.get(stagingTable);
      if (existing == null) {
        existing = Lists.newArrayList();
        stagingTables.put(stagingTable, existing);
      }
      existing.add(workUnitState);
    }
    return stagingTables;
  }

  @Override
  public void publishMetadata(Collection states) throws IOException {}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy