
com.google.cloud.hadoop.io.bigquery.mapred.BigQueryMapredOutputCommitter Maven / Gradle / Ivy
package com.google.cloud.hadoop.io.bigquery.mapred;
import com.google.cloud.hadoop.io.bigquery.BigQueryOutputFormat;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.gson.JsonObject;
import java.io.IOException;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.mapred.OutputCommitter;
import org.apache.hadoop.mapred.TaskAttemptContext;
import org.apache.hadoop.mapreduce.JobStatus.State;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* OutputCommitter that uses the old mapred API so that we can do
* streaming output with our BigQuery connector.
*/
class BigQueryMapredOutputCommitter extends OutputCommitter {
/**
* Mapping between JobStatus integer codes and JobStatus.State enum members.
*/
public static final ImmutableMap STATUS_TO_STATE =
ImmutableMap.builder()
.put(JobStatus.RUNNING, State.RUNNING)
.put(JobStatus.SUCCEEDED, State.SUCCEEDED)
.put(JobStatus.FAILED, State.FAILED)
.put(JobStatus.PREP, State.PREP)
.put(JobStatus.KILLED, State.KILLED)
.build();
protected static final Logger LOG =
LoggerFactory.getLogger(BigQueryMapredOutputCommitter.class);
private org.apache.hadoop.mapreduce.OutputCommitter mapreduceOutputCommitter;
public BigQueryMapredOutputCommitter() {
// We need to create a BigQueryOutputCommitter, but we don't have
// enough info to do that until we have the TaskAttemptContext,
// so wait until then to create it.
LOG.debug("BigQueryMapredOutputCommitter created");
}
// OutputCommitter methods
@Override
public void abortJob(JobContext jobContext, int status) throws IOException {
LOG.debug("abortJob");
Preconditions.checkState(mapreduceOutputCommitter != null,
"mapreduceOutputCommitter must be initialized before abortJob");
State state;
if (STATUS_TO_STATE.containsKey(status)) {
state = STATUS_TO_STATE.get(status);
} else {
throw new IllegalArgumentException("Bad value for status: " + status);
}
mapreduceOutputCommitter.abortJob(jobContext, state);
}
@Override
public void abortTask(TaskAttemptContext taskContext) throws IOException {
LOG.debug("abortTask");
initMapreduceOutputCommitter(taskContext);
mapreduceOutputCommitter.abortTask(taskContext);
}
@Override
public void cleanupJob(JobContext jobContext) throws IOException {
LOG.debug("cleanupJob");
Preconditions.checkState(mapreduceOutputCommitter != null,
"mapreduceOutputCommitter must be initialized before cleanupJob");
mapreduceOutputCommitter.cleanupJob(jobContext);
}
@Override
public void commitJob(JobContext jobContext) throws IOException {
LOG.debug("commitJob");
Preconditions.checkState(mapreduceOutputCommitter != null,
"mapreduceOutputCommitter must be initialized before commitJob");
mapreduceOutputCommitter.commitJob(jobContext);
}
@Override
public void commitTask(TaskAttemptContext taskContext) throws IOException {
LOG.debug("commitTask");
initMapreduceOutputCommitter(taskContext);
mapreduceOutputCommitter.commitTask(taskContext);
}
@Override
public boolean needsTaskCommit(TaskAttemptContext taskContext)
throws IOException {
LOG.debug("needsTaskCommit");
initMapreduceOutputCommitter(taskContext);
return mapreduceOutputCommitter.needsTaskCommit(taskContext);
}
@Override
public void setupJob(JobContext jobContext) throws IOException {
LOG.debug("setupJob");
Preconditions.checkState(mapreduceOutputCommitter != null,
"mapreduceOutputCommitter must be initialized before setupJob");
mapreduceOutputCommitter.setupJob(jobContext);
}
@Override
public void setupTask(TaskAttemptContext taskContext) throws IOException {
LOG.debug("setupTask");
initMapreduceOutputCommitter(taskContext);
mapreduceOutputCommitter.setupTask(taskContext);
}
// Initialize our mapreduce OutputCommitter the first time we are called.
private void initMapreduceOutputCommitter(TaskAttemptContext taskContext)
throws IOException {
if (mapreduceOutputCommitter != null) {
LOG.debug("Using existing mapreduceOutputCommitter");
return;
}
// It would be nice to use the BigQueryOutputFormat that already exists
// (there is one wrapped inside our BigQueryMapredOutputFormat), but
// there does not seem to be an easy way to do that. So make another one.
LOG.debug("Creating BigQueryOutputFormat");
BigQueryOutputFormat
© 2015 - 2025 Weber Informatics LLC | Privacy Policy