org.apache.sqoop.manager.oracle.OraOopOutputFormatUpdate Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sqoop Show documentation
Show all versions of sqoop Show documentation
Bandwidth controlled sqoop for network aware data migration
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.manager.oracle;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import com.cloudera.sqoop.lib.SqoopRecord;
import org.apache.sqoop.SqoopOptions;
import org.apache.sqoop.manager.oracle.OraOopOracleQueries.
CreateExportChangesTableOptions;
/**
* Update an Oracle table based on emitted keys.
*/
public class OraOopOutputFormatUpdate extends
OraOopOutputFormatBase {
private static final OraOopLog LOG = OraOopLogFactory
.getLog(OraOopOutputFormatUpdate.class);
/**
* Type of export - straight update or merge (update-insert).
*/
public enum UpdateMode {
Update, Merge
}
private enum ExportTableUpdateTechnique {
ReInsertUpdatedRows, ReInsertUpdatedRowsAndNewRows, UpdateSql, MergeSql
}
@Override
public RecordWriter getRecordWriter(TaskAttemptContext context)
throws IOException {
OraOopUtilities.checkJavaSecurityEgd();
Configuration conf = context.getConfiguration();
// Get the unique JDBC URL to use for this mapper and update the
// configuration property
// so that the URL is actually used...
int mapperId = this.getMapperId(context);
applyMapperJdbcUrl(context, mapperId);
UpdateMode updateMode = OraOopUtilities.getExportUpdateMode(conf);
boolean useAppendValuesOracleHint = false;
if (updateMode == UpdateMode.Merge || updateMode == UpdateMode.Update) {
// Should we use the APPEND_VALUES Oracle hint?...
useAppendValuesOracleHint = this.canUseOracleAppendValuesHint(context);
}
// Has the user forced the use of APPEND_VALUES either on or off?...
useAppendValuesOracleHint =
allowUserToOverrideUseOfTheOracleAppendValuesHint(context,
useAppendValuesOracleHint);
// If using APPEND_VALUES, check the batch size and commit frequency...
if (useAppendValuesOracleHint) {
updateBatchSizeInConfigurationToAllowOracleAppendValuesHint(context);
}
// Create the Record Writer...
OraOopDBRecordWriterUpdate result = null;
try {
result =
new OraOopDBRecordWriterUpdate(context, mapperId, updateMode,
useAppendValuesOracleHint);
} catch (NoClassDefFoundError ex) {
throw new IOException(String.format(
"Unable to create an instance of OraOopDBRecordWriterUpdate.\n"
+ "The classpath is:\n%s", OraOopUtilities.getJavaClassPath()),
ex);
} catch (Exception ex) {
throw new IOException(ex);
}
try {
result.getExportTableAndColumns(context);
} catch (SQLException ex) {
throw new IOException(ex);
}
return result;
}
/**
* Update an Oracle table based on emitted keys.
*/
public class OraOopDBRecordWriterUpdate extends OraOopDBRecordWriterBase {
private String sqlStatement; // <- The SQL used when updating batches of
// rows into the Oracle table
private String[] updateColumnNames; // <- The name of the column(s) used to
// match a row in the HDFS file to a row
// in the Oracle table. i.e. What as
// specified in the "--update-key" sqoop
// argument.
private UpdateMode updateMode; // <- The modus operandi of this class. i.e.
// Whether we update the Oracle table
// directly, or insert data into a separate
// table and then apply a SQL MERGE
// statement.
private boolean useAppendValuesOracleHint; // <- Whether to use the
// " /*+APPEND_VALUES*/ " hint
// within the Oracle SQL
// statement we generate
private boolean tableHasOraOopPartitions; // <- Indicates whether the export
// table has partitions that were
// creted by OraOop
private long numberOfRowsSkipped; // <- The number of rows encountered
// during configurePreparedStatement()
// that had a NULL value for (one of) the
// update columns. This row was therefore
// skipped.
public OraOopDBRecordWriterUpdate(TaskAttemptContext context, int mapperId,
UpdateMode updateMode, boolean useAppendValuesOracleHint)
throws ClassNotFoundException, SQLException {
super(context, mapperId);
Configuration conf = context.getConfiguration();
this.updateColumnNames =
OraOopUtilities.getExportUpdateKeyColumnNames(conf);
this.useAppendValuesOracleHint = useAppendValuesOracleHint;
this.updateMode = updateMode;
this.tableHasOraOopPartitions =
conf.getBoolean(OraOopConstants.EXPORT_TABLE_HAS_ORAOOP_PARTITIONS,
false);
}
@Override
protected void getExportTableAndColumns(TaskAttemptContext context)
throws SQLException {
Configuration conf = context.getConfiguration();
this.oracleTable = createUniqueMapperTable(context);
setOracleTableColumns(OraOopOracleQueries.getTableColumns(
this.getConnection(),
this.oracleTable,
OraOopUtilities.omitLobAndLongColumnsDuringImport(conf),
OraOopUtilities.recallSqoopJobType(conf),
true, // <- onlyOraOopSupportedTypes
false, // <- omitOraOopPseudoColumns
OracleUtils.isOracleEscapingDisabled(conf))
);
}
@Override
public void closeConnection(TaskAttemptContext context)
throws SQLException {
try {
if (this.numberOfRowsSkipped > 0) {
LOG.warn(String.format(
"%d records were skipped due to a NULL value within one of the "
+ "update-key column(s).\nHaving a NULL value prevents a record "
+ "from being able to be matched to a row in the Oracle table.",
this.numberOfRowsSkipped));
}
// Now update the "main" export table with data that was inserted into
// this mapper's table...
updateMainExportTableFromUniqueMapperTable(context,
this.updateColumnNames);
LOG.debug(String.format("Dropping temporary mapper table %s",
this.oracleTable.toString()));
OraOopOracleQueries.dropTable(this.getConnection(), this.oracleTable);
} finally {
super.closeConnection(context);
}
}
private ExportTableUpdateTechnique getExportTableUpdateTechnique() {
ExportTableUpdateTechnique result;
if (this.tableHasOraOopPartitions) {
switch (this.updateMode) {
case Update:
result = ExportTableUpdateTechnique.ReInsertUpdatedRows;
break;
case Merge:
result = ExportTableUpdateTechnique.ReInsertUpdatedRowsAndNewRows;
break;
default:
throw new RuntimeException(String.format(
"Update %s to cater for the updateMode \"%s\".",
OraOopUtilities.getCurrentMethodName(), this.updateMode
.toString()));
}
} else {
switch (this.updateMode) {
case Update:
result = ExportTableUpdateTechnique.UpdateSql;
break;
case Merge:
result = ExportTableUpdateTechnique.MergeSql;
break;
default:
throw new RuntimeException(String.format(
"Update %s to cater for the updateMode \"%s\".",
OraOopUtilities.getCurrentMethodName(), this.updateMode
.toString()));
}
}
return result;
}
private void updateMainExportTableFromUniqueMapperTable(
TaskAttemptContext context, String[] mergeColumnNames)
throws SQLException {
String schema =
context.getConfiguration().get(OraOopConstants.ORAOOP_TABLE_OWNER);
String localTableName =
context.getConfiguration().get(OraOopConstants.ORAOOP_TABLE_NAME);
OracleTable targetTable = new OracleTable(schema, localTableName);
Object sysDateTime = getJobSysDate(context);
OracleTable changesTable =
OraOopUtilities.generateExportTableMapperTableName(Integer
.toString(this.mapperId)
+ "_CHG", sysDateTime, null);
OraOopOracleQueries.CreateExportChangesTableOptions changesTableOptions;
boolean parallelizationEnabled =
OraOopUtilities.enableOracleParallelProcessingDuringExport(context
.getConfiguration());
ExportTableUpdateTechnique exportTableUpdateTechnique =
getExportTableUpdateTechnique();
switch (exportTableUpdateTechnique) {
case ReInsertUpdatedRows:
case UpdateSql:
changesTableOptions =
CreateExportChangesTableOptions.OnlyRowsThatDiffer;
break;
case ReInsertUpdatedRowsAndNewRows:
case MergeSql:
changesTableOptions =
CreateExportChangesTableOptions.RowsThatDifferPlusNewRows;
break;
default:
throw new RuntimeException(String.format(
"Update %s to cater for the ExportTableUpdateTechnique \"%s\".",
OraOopUtilities.getCurrentMethodName(),
exportTableUpdateTechnique.toString()));
}
String temporaryTableStorageClause =
OraOopUtilities.getTemporaryTableStorageClause(context
.getConfiguration());
try {
int changeTableRowCount =
OraOopOracleQueries.createExportChangesTable(this.getConnection(),
changesTable, temporaryTableStorageClause, this.oracleTable,
targetTable, this.updateColumnNames, changesTableOptions,
parallelizationEnabled, OracleUtils.isOracleEscapingDisabled(context.getConfiguration()));
if (changeTableRowCount == 0) {
LOG.debug(String.format(
"The changes-table does not contain any rows. %s is now exiting.",
OraOopUtilities.getCurrentMethodName()));
return;
}
switch (exportTableUpdateTechnique) {
case ReInsertUpdatedRows:
case ReInsertUpdatedRowsAndNewRows:
OraOopOracleQueries.deleteRowsFromTable(this.getConnection(),
targetTable, changesTable, this.updateColumnNames,
parallelizationEnabled);
OraOopOracleQueries.insertRowsIntoExportTable(this.getConnection(),
targetTable, changesTable, sysDateTime, this.mapperId,
parallelizationEnabled, OracleUtils.isOracleEscapingDisabled(context.getConfiguration()));
break;
case UpdateSql:
long start = System.nanoTime();
OraOopOracleQueries.updateTable(this.getConnection(), targetTable,
changesTable, this.updateColumnNames, this
.getOracleTableColumns(), sysDateTime, this.mapperId,
parallelizationEnabled, context.getConfiguration().getBoolean(SqoopOptions.ORACLE_ESCAPING_DISABLED,false));
double timeInSec = (System.nanoTime() - start) / Math.pow(10, 9);
LOG.info(String.format("Time spent performing an update: %f sec.",
timeInSec));
break;
case MergeSql:
long mergeStart = System.nanoTime();
OraOopOracleQueries.mergeTable(this.getConnection(), targetTable,
changesTable, this.updateColumnNames, this
.getOracleTableColumns(), sysDateTime, this.mapperId,
parallelizationEnabled);
double mergeTimeInSec = (System.nanoTime() - mergeStart)
/ Math.pow(10, 9);
LOG.info(String.format("Time spent performing a merge: %f sec.",
mergeTimeInSec));
break;
default:
throw new RuntimeException(
String.format(
"Update %s to cater for the ExportTableUpdateTechnique \"%s\".",
OraOopUtilities.getCurrentMethodName(),
exportTableUpdateTechnique.toString()));
}
this.getConnection().commit();
} catch (SQLException ex) {
this.getConnection().rollback();
throw ex;
} finally {
OraOopOracleQueries.dropTable(this.getConnection(), changesTable);
}
}
@Override
protected String getBatchSqlStatement() {
if (sqlStatement == null) {
this.sqlStatement =
getBatchInsertSqlStatement(
this.useAppendValuesOracleHint ? "/*+APPEND_VALUES*/"
: "");
}
return this.sqlStatement;
}
@Override
void configurePreparedStatement(PreparedStatement statement,
List userRecords) throws SQLException {
Map fieldMap;
try {
for (SqoopRecord record : userRecords) {
fieldMap = record.getFieldMap();
boolean updateKeyValueIsNull = false;
for (int idx = 0; idx < this.updateColumnNames.length; idx++) {
String updateColumnName = this.updateColumnNames[idx];
Object updateKeyValue = fieldMap.get(updateColumnName);
if (updateKeyValue == null) {
this.numberOfRowsSkipped++;
updateKeyValueIsNull = true;
break;
}
}
if (updateKeyValueIsNull) {
continue;
}
configurePreparedStatementColumns(statement, fieldMap);
}
} catch (Exception ex) {
if (ex instanceof SQLException) {
throw (SQLException) ex;
} else {
LOG.error(String.format("The following error occurred during %s",
OraOopUtilities.getCurrentMethodName()), ex);
throw new SQLException(ex);
}
}
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy