All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sqoop.manager.oracle.OraOopOutputFormatUpdate Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.sqoop.manager.oracle;

import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

import com.cloudera.sqoop.lib.SqoopRecord;

import org.apache.sqoop.SqoopOptions;
import org.apache.sqoop.manager.oracle.OraOopOracleQueries.
           CreateExportChangesTableOptions;

/**
 * Update an Oracle table based on emitted keys.
 */
public class OraOopOutputFormatUpdate extends
    OraOopOutputFormatBase {

  private static final OraOopLog LOG = OraOopLogFactory
      .getLog(OraOopOutputFormatUpdate.class);

  /**
   * Type of export - straight update or merge (update-insert).
   */
  public enum UpdateMode {
    Update, Merge
  }

  private enum ExportTableUpdateTechnique {
    ReInsertUpdatedRows, ReInsertUpdatedRowsAndNewRows, UpdateSql, MergeSql
  }

  @Override
  public RecordWriter getRecordWriter(TaskAttemptContext context)
      throws IOException {
    OraOopUtilities.checkJavaSecurityEgd();
    Configuration conf = context.getConfiguration();

    // Get the unique JDBC URL to use for this mapper and update the
    // configuration property
    // so that the URL is actually used...
    int mapperId = this.getMapperId(context);
    applyMapperJdbcUrl(context, mapperId);

    UpdateMode updateMode = OraOopUtilities.getExportUpdateMode(conf);

    boolean useAppendValuesOracleHint = false;

    if (updateMode == UpdateMode.Merge || updateMode == UpdateMode.Update) {
      // Should we use the APPEND_VALUES Oracle hint?...
      useAppendValuesOracleHint = this.canUseOracleAppendValuesHint(context);
    }

    // Has the user forced the use of APPEND_VALUES either on or off?...
    useAppendValuesOracleHint =
        allowUserToOverrideUseOfTheOracleAppendValuesHint(context,
            useAppendValuesOracleHint);

    // If using APPEND_VALUES, check the batch size and commit frequency...
    if (useAppendValuesOracleHint) {
      updateBatchSizeInConfigurationToAllowOracleAppendValuesHint(context);
    }

    // Create the Record Writer...
    OraOopDBRecordWriterUpdate result = null;
    try {
      result =
          new OraOopDBRecordWriterUpdate(context, mapperId, updateMode,
              useAppendValuesOracleHint);
    } catch (NoClassDefFoundError ex) {
      throw new IOException(String.format(
          "Unable to create an instance of OraOopDBRecordWriterUpdate.\n"
              + "The classpath is:\n%s", OraOopUtilities.getJavaClassPath()),
          ex);
    } catch (Exception ex) {
      throw new IOException(ex);
    }

    try {
      result.getExportTableAndColumns(context);
    } catch (SQLException ex) {
      throw new IOException(ex);
    }

    return result;
  }

  /**
   * Update an Oracle table based on emitted keys.
   */
  public class OraOopDBRecordWriterUpdate extends OraOopDBRecordWriterBase {

    private String sqlStatement; // <- The SQL used when updating batches of
                                 // rows into the Oracle table
    private String[] updateColumnNames; // <- The name of the column(s) used to
                                        // match a row in the HDFS file to a row
                                        // in the Oracle table. i.e. What as
                                        // specified in the "--update-key" sqoop
                                        // argument.
    private UpdateMode updateMode; // <- The modus operandi of this class. i.e.
                                   // Whether we update the Oracle table
                                   // directly, or insert data into a separate
                                   // table and then apply a SQL MERGE
                                   // statement.
    private boolean useAppendValuesOracleHint; // <- Whether to use the
                                               // " /*+APPEND_VALUES*/ " hint
                                               // within the Oracle SQL
                                               // statement we generate
    private boolean tableHasOraOopPartitions; // <- Indicates whether the export
                                              // table has partitions that were
                                              // creted by OraOop
    private long numberOfRowsSkipped; // <- The number of rows encountered
                                      // during configurePreparedStatement()
                                      // that had a NULL value for (one of) the
                                      // update columns. This row was therefore
                                      // skipped.

    public OraOopDBRecordWriterUpdate(TaskAttemptContext context, int mapperId,
        UpdateMode updateMode, boolean useAppendValuesOracleHint)
        throws ClassNotFoundException, SQLException {

      super(context, mapperId);

      Configuration conf = context.getConfiguration();

      this.updateColumnNames =
          OraOopUtilities.getExportUpdateKeyColumnNames(conf);
      this.useAppendValuesOracleHint = useAppendValuesOracleHint;
      this.updateMode = updateMode;
      this.tableHasOraOopPartitions =
          conf.getBoolean(OraOopConstants.EXPORT_TABLE_HAS_ORAOOP_PARTITIONS,
              false);
    }

    @Override
    protected void getExportTableAndColumns(TaskAttemptContext context)
        throws SQLException {

      Configuration conf = context.getConfiguration();

      this.oracleTable = createUniqueMapperTable(context);
      setOracleTableColumns(OraOopOracleQueries.getTableColumns(
        this.getConnection(),
        this.oracleTable,
        OraOopUtilities.omitLobAndLongColumnsDuringImport(conf),
        OraOopUtilities.recallSqoopJobType(conf),
        true, // <- onlyOraOopSupportedTypes
        false, // <- omitOraOopPseudoColumns
        OracleUtils.isOracleEscapingDisabled(conf))
      );
    }

    @Override
    public void closeConnection(TaskAttemptContext context)
        throws SQLException {

      try {

        if (this.numberOfRowsSkipped > 0) {
          LOG.warn(String.format(
              "%d records were skipped due to a NULL value within one of the "
            + "update-key column(s).\nHaving a NULL value prevents a record "
            + "from being able to be matched to a row in the Oracle table.",
                  this.numberOfRowsSkipped));
        }

        // Now update the "main" export table with data that was inserted into
        // this mapper's table...
        updateMainExportTableFromUniqueMapperTable(context,
            this.updateColumnNames);

        LOG.debug(String.format("Dropping temporary mapper table %s",
            this.oracleTable.toString()));
        OraOopOracleQueries.dropTable(this.getConnection(), this.oracleTable);
      } finally {
        super.closeConnection(context);
      }
    }

    private ExportTableUpdateTechnique getExportTableUpdateTechnique() {

      ExportTableUpdateTechnique result;

      if (this.tableHasOraOopPartitions) {
        switch (this.updateMode) {

          case Update:
            result = ExportTableUpdateTechnique.ReInsertUpdatedRows;
            break;

          case Merge:
            result = ExportTableUpdateTechnique.ReInsertUpdatedRowsAndNewRows;
            break;

          default:
            throw new RuntimeException(String.format(
                "Update %s to cater for the updateMode \"%s\".",
                OraOopUtilities.getCurrentMethodName(), this.updateMode
                    .toString()));
        }
      } else {
        switch (this.updateMode) {

          case Update:
            result = ExportTableUpdateTechnique.UpdateSql;
            break;

          case Merge:
            result = ExportTableUpdateTechnique.MergeSql;
            break;

          default:
            throw new RuntimeException(String.format(
                "Update %s to cater for the updateMode \"%s\".",
                OraOopUtilities.getCurrentMethodName(), this.updateMode
                    .toString()));
        }
      }

      return result;
    }

    private void updateMainExportTableFromUniqueMapperTable(
        TaskAttemptContext context, String[] mergeColumnNames)
        throws SQLException {

      String schema =
          context.getConfiguration().get(OraOopConstants.ORAOOP_TABLE_OWNER);
      String localTableName =
          context.getConfiguration().get(OraOopConstants.ORAOOP_TABLE_NAME);
      OracleTable targetTable = new OracleTable(schema, localTableName);

      Object sysDateTime = getJobSysDate(context);
      OracleTable changesTable =
          OraOopUtilities.generateExportTableMapperTableName(Integer
              .toString(this.mapperId)
              + "_CHG", sysDateTime, null);

      OraOopOracleQueries.CreateExportChangesTableOptions changesTableOptions;
      boolean parallelizationEnabled =
          OraOopUtilities.enableOracleParallelProcessingDuringExport(context
              .getConfiguration());

      ExportTableUpdateTechnique exportTableUpdateTechnique =
          getExportTableUpdateTechnique();
      switch (exportTableUpdateTechnique) {

        case ReInsertUpdatedRows:
        case UpdateSql:
          changesTableOptions =
              CreateExportChangesTableOptions.OnlyRowsThatDiffer;
          break;

        case ReInsertUpdatedRowsAndNewRows:
        case MergeSql:
          changesTableOptions =
              CreateExportChangesTableOptions.RowsThatDifferPlusNewRows;
          break;

        default:
          throw new RuntimeException(String.format(
              "Update %s to cater for the ExportTableUpdateTechnique \"%s\".",
              OraOopUtilities.getCurrentMethodName(),
              exportTableUpdateTechnique.toString()));
      }

      String temporaryTableStorageClause =
          OraOopUtilities.getTemporaryTableStorageClause(context
              .getConfiguration());

      try {
        int changeTableRowCount =
            OraOopOracleQueries.createExportChangesTable(this.getConnection(),
                changesTable, temporaryTableStorageClause, this.oracleTable,
                targetTable, this.updateColumnNames, changesTableOptions,
                parallelizationEnabled, OracleUtils.isOracleEscapingDisabled(context.getConfiguration()));

        if (changeTableRowCount == 0) {
          LOG.debug(String.format(
              "The changes-table does not contain any rows. %s is now exiting.",
                  OraOopUtilities.getCurrentMethodName()));
          return;
        }

        switch (exportTableUpdateTechnique) {

          case ReInsertUpdatedRows:
          case ReInsertUpdatedRowsAndNewRows:

            OraOopOracleQueries.deleteRowsFromTable(this.getConnection(),
                targetTable, changesTable, this.updateColumnNames,
                parallelizationEnabled);

            OraOopOracleQueries.insertRowsIntoExportTable(this.getConnection(),
                targetTable, changesTable, sysDateTime, this.mapperId,
              parallelizationEnabled, OracleUtils.isOracleEscapingDisabled(context.getConfiguration()));
            break;

          case UpdateSql:

            long start = System.nanoTime();

            OraOopOracleQueries.updateTable(this.getConnection(), targetTable,
                changesTable, this.updateColumnNames, this
                    .getOracleTableColumns(), sysDateTime, this.mapperId,
                parallelizationEnabled, context.getConfiguration().getBoolean(SqoopOptions.ORACLE_ESCAPING_DISABLED,false));

            double timeInSec = (System.nanoTime() - start) / Math.pow(10, 9);
            LOG.info(String.format("Time spent performing an update: %f sec.",
                timeInSec));
            break;

          case MergeSql:

            long mergeStart = System.nanoTime();

            OraOopOracleQueries.mergeTable(this.getConnection(), targetTable,
                changesTable, this.updateColumnNames, this
                    .getOracleTableColumns(), sysDateTime, this.mapperId,
                parallelizationEnabled);

            double mergeTimeInSec = (System.nanoTime() - mergeStart)
                / Math.pow(10, 9);
            LOG.info(String.format("Time spent performing a merge: %f sec.",
                mergeTimeInSec));

            break;

          default:
            throw new RuntimeException(
              String.format(
                "Update %s to cater for the ExportTableUpdateTechnique \"%s\".",
                        OraOopUtilities.getCurrentMethodName(),
                        exportTableUpdateTechnique.toString()));
        }

        this.getConnection().commit();
      } catch (SQLException ex) {
        this.getConnection().rollback();
        throw ex;
      } finally {
        OraOopOracleQueries.dropTable(this.getConnection(), changesTable);
      }
    }

    @Override
    protected String getBatchSqlStatement() {

      if (sqlStatement == null) {
        this.sqlStatement =
            getBatchInsertSqlStatement(
                this.useAppendValuesOracleHint ? "/*+APPEND_VALUES*/"
                : "");
      }

      return this.sqlStatement;
    }

    @Override
    void configurePreparedStatement(PreparedStatement statement,
        List userRecords) throws SQLException {

      Map fieldMap;
      try {
        for (SqoopRecord record : userRecords) {
          fieldMap = record.getFieldMap();

          boolean updateKeyValueIsNull = false;
          for (int idx = 0; idx < this.updateColumnNames.length; idx++) {
            String updateColumnName = this.updateColumnNames[idx];
            Object updateKeyValue = fieldMap.get(updateColumnName);
            if (updateKeyValue == null) {
              this.numberOfRowsSkipped++;
              updateKeyValueIsNull = true;
              break;
            }
          }

          if (updateKeyValueIsNull) {
            continue;
          }

          configurePreparedStatementColumns(statement, fieldMap);
        }

      } catch (Exception ex) {
        if (ex instanceof SQLException) {
          throw (SQLException) ex;
        } else {
          LOG.error(String.format("The following error occurred during %s",
              OraOopUtilities.getCurrentMethodName()), ex);
          throw new SQLException(ex);
        }
      }

    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy