org.apache.sqoop.mapreduce.SQLServerResilientExportOutputFormat Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sqoop Show documentation
Show all versions of sqoop Show documentation
Bandwidth controlled sqoop for network aware data migration
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.mapreduce;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.sqoop.mapreduce.db.DBConfiguration;
import org.apache.sqoop.lib.SqoopRecord;
/**
* Insert the emitted keys as records into a database table.
* Insert failures are handled by the registered Failure Handler class which
* allows for recovering from certain failures like intermittent connection
* or database throttling, .. etc
*
* The number of records per transaction is governed by the
* sqoop.export.records.per.statement configuration value or else default
* value is used
*
* Record objects are buffered before actually performing the INSERT
* statements; this requires that the key implement the
* SqoopRecord interface.
*/
public class SQLServerResilientExportOutputFormat
extends OutputFormat {
private static final Log LOG = LogFactory.getLog(
SQLServerResilientExportOutputFormat.class);
public static final String EXPORT_FAILURE_HANDLER_CLASS =
"sqoop.export.failure.handler.class";
public static final int DEFAULT_RECORDS_PER_STATEMENT = 1000;
private int curListIdx = 0;
@Override
/** {@inheritDoc} */
public void checkOutputSpecs(JobContext context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
DBConfiguration dbConf = new DBConfiguration(conf);
// Sanity check all the configuration values we need.
if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
throw new IOException("Database connection URL is not set.");
} else if (null == dbConf.getOutputTableName()) {
throw new IOException("Table name is not set for export");
} else if (null == dbConf.getOutputFieldNames()
&& 0 == dbConf.getOutputFieldCount()) {
throw new IOException(
"Output field names are null and zero output field count set.");
}
}
@Override
/** {@inheritDoc} */
public RecordWriter getRecordWriter(TaskAttemptContext context)
throws IOException {
try {
return new SQLServerExportRecordWriter(context);
} catch (Exception e) {
throw new IOException(e);
}
}
@Override
/** {@inheritDoc} */
public OutputCommitter getOutputCommitter(TaskAttemptContext context)
throws IOException, InterruptedException {
return new NullOutputCommitter();
}
/**
* RecordWriter to write the output to a row in a database table.
* The actual database updates are executed in a parallel thread in a
* resilient fashion which attempts to recover failed operations
*/
public class SQLServerExportRecordWriter
extends RecordWriter {
private final Log LOG = LogFactory.getLog(
SQLServerExportRecordWriter.class);
private final int LIST_COUNT = 2;
protected Configuration conf;
protected SQLServerAsyncDBExecThread execThread;
// Number of records to buffer before sending as a batch
protected int recordsPerStmt;
// We alternate between 2 lists of records as we go, as one is sent to the
// target database the other gets asynchronously filled
protected List> recordsLists = new ArrayList>();
protected List currentList;
public SQLServerExportRecordWriter(TaskAttemptContext context)
throws IOException {
conf = context.getConfiguration();
recordsPerStmt = conf.getInt(
AsyncSqlOutputFormat.RECORDS_PER_STATEMENT_KEY,
DEFAULT_RECORDS_PER_STATEMENT);
// Create the lists to host incoming records
List newList;
for (int i = 0; i < LIST_COUNT; ++i) {
newList = new ArrayList(recordsPerStmt);
recordsLists.add(newList);
}
currentList = recordsLists.get(0);
// Initialize the DB exec Thread
initializeExecThread();
// Start the DB exec thread
execThread.start();
}
/**
* Initialize the thread used to perform the asynchronous DB operation
*/
protected void initializeExecThread() throws IOException {
execThread = new SQLServerExportDBExecThread();
execThread.initialize(conf);
}
@Override
/** {@inheritDoc} */
public void write(K key, V value)
throws InterruptedException, IOException {
try {
currentList.add((SqoopRecord) key.clone());
if (currentList.size() >= this.recordsPerStmt) {
// Schedule the current list for asynchronous transfer
// This will block if the previous operation is still in progress
execThread.put(currentList);
// Switch to the other list for receiving incoming records
curListIdx = (curListIdx + 1) % recordsLists.size();
// Clear the list to be used in case it has previous records
currentList = recordsLists.get(curListIdx);
currentList.clear();
}
} catch (CloneNotSupportedException cnse) {
throw new IOException("Could not buffer record", cnse);
}
}
@Override
public void close(TaskAttemptContext context) throws IOException,
InterruptedException {
try {
// Ensure we flush the list of records to the database
if (currentList.size() > 0) {
execThread.put(currentList);
}
}
finally {
execThread.close();
execThread.join();
}
// Final check for any exceptions raised when writing to the database
Exception lastException = execThread.getLastError();
if (lastException != null) {
LOG.error("Asynchronous writer thread encountered the following " +
"exception: " + lastException.toString());
throw new IOException(lastException);
}
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy