com.google.cloud.hadoop.io.bigquery.QueryBasedExport Maven / Gradle / Ivy
package com.google.cloud.hadoop.io.bigquery;
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.JobConfiguration;
import com.google.api.services.bigquery.model.JobConfigurationQuery;
import com.google.api.services.bigquery.model.JobReference;
import com.google.api.services.bigquery.model.TableReference;
import com.google.cloud.hadoop.util.LogUtil;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.util.Progressable;
import java.io.IOException;
import java.util.List;
/**
* A Export decorator that will attempt to perform a query during the export prepare phase.
*/
public class QueryBasedExport implements Export {
protected static final LogUtil log = new LogUtil(QueryBasedExport.class);
private final String query;
private final BigQueryHelper bigQueryHelper;
private final String projectId;
private final TableReference tableToExport;
private final Export delegate;
private final boolean deleteIntermediateTable;
public QueryBasedExport(
Export delegate,
String query,
String projectId,
BigQueryHelper bigQueryHelper,
TableReference tableToExport,
boolean deleteIntermediateTable) {
this.query = query;
this.bigQueryHelper = bigQueryHelper;
this.projectId = projectId;
this.tableToExport = tableToExport;
this.delegate = delegate;
this.deleteIntermediateTable = deleteIntermediateTable;
}
@Override
public List getSplits(JobContext context) throws IOException, InterruptedException {
return delegate.getSplits(context);
}
@Override
public List getExportPaths() throws IOException {
return delegate.getExportPaths();
}
@Override
public void beginExport() throws IOException {
delegate.beginExport();
}
@Override
public void waitForUsableMapReduceInput() throws IOException, InterruptedException {
delegate.waitForUsableMapReduceInput();
}
@Override
public void prepare() throws IOException {
if (!Strings.isNullOrEmpty(query)) {
log.info("Invoking query '%s' and saving to '%s' before beginning export/read.",
query, BigQueryStrings.toString(tableToExport));
try {
runQuery(bigQueryHelper, projectId, tableToExport, query);
} catch (InterruptedException ie) {
throw new IOException(
String.format("Interrupted during query '%s' into table '%s'",
query, BigQueryStrings.toString(tableToExport)), ie);
}
}
delegate.prepare();
}
@Override
public void cleanupExport() throws IOException {
if (deleteIntermediateTable) {
log.info(
"Deleting input intermediate table: %s:%s.%s",
tableToExport.getProjectId(),
tableToExport.getDatasetId(),
tableToExport.getTableId());
Bigquery.Tables tables = bigQueryHelper.getRawBigquery().tables();
Bigquery.Tables.Delete delete = tables.delete(
tableToExport.getProjectId(), tableToExport.getDatasetId(), tableToExport.getTableId());
delete.execute();
}
delegate.cleanupExport();
}
/**
* Runs the query in BigQuery and writes results to a temporary table.
*
* @param bigquery the Bigquery instance to use.
* @param projectId the project on whose behalf the query will be run.
* @param tableRef the table to write the results to.
* @param query the query to run.
* @throws IOException on IO error.
* @throws InterruptedException on interrupt.
*/
@VisibleForTesting
static void runQuery(
BigQueryHelper bigQueryHelper, String projectId, TableReference tableRef, String query)
throws IOException, InterruptedException {
log.debug("runQuery(bigquery, '%s', '%s', '%s')",
projectId, BigQueryStrings.toString(tableRef), query);
// Create a query statement and query request object.
JobConfigurationQuery queryConfig = new JobConfigurationQuery();
queryConfig.setAllowLargeResults(true);
queryConfig.setQuery(query);
// Set the table to put results into.
queryConfig.setDestinationTable(tableRef);
// Require table to be empty.
queryConfig.setWriteDisposition("WRITE_EMPTY");
JobConfiguration config = new JobConfiguration();
config.setQuery(queryConfig);
JobReference jobReference = bigQueryHelper.createJobReference(projectId, "querybasedexport");
Job job = new Job();
job.setConfiguration(config);
job.setJobReference(jobReference);
// Run the job.
Job response = bigQueryHelper.insertJobOrFetchDuplicate(projectId, job);
log.debug("Got response '%s'", response);
// Create anonymous Progressable object
Progressable progressable = new Progressable() {
@Override
public void progress() {
// TODO(user): ensure task doesn't time out
}
};
// Poll until job is complete.
BigQueryUtils.waitForJobCompletion(
bigQueryHelper.getRawBigquery(), projectId, jobReference, progressable);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy