All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.runners.worker.BigQueryReader Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 ******************************************************************************/

package com.google.cloud.dataflow.sdk.runners.worker;

import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import com.google.cloud.dataflow.sdk.options.BigQueryOptions;
import com.google.cloud.dataflow.sdk.util.BigQueryTableRowIterator;
import com.google.cloud.dataflow.sdk.util.Transport;
import com.google.cloud.dataflow.sdk.util.WindowedValue;
import com.google.cloud.dataflow.sdk.util.common.worker.AbstractBoundedReaderIterator;
import com.google.cloud.dataflow.sdk.util.common.worker.Reader;

import java.io.IOException;

/**
 * A source that reads a BigQuery table and yields TableRow objects.
 *
 * 

The source is a wrapper over the {@code BigQueryTableRowIterator} class, which issues a * query for all rows of a table and then iterates over the result. There is no support for * progress reporting because the source is used only in situations where the entire table must be * read by each worker (i.e. the source is used as a side input). */ public class BigQueryReader extends Reader> { final TableReference tableRef; final BigQueryOptions bigQueryOptions; final Bigquery bigQueryClient; final String query; final String projectId; /** Builds a BigQuery source using pipeline options to instantiate a Bigquery client. */ public BigQueryReader(BigQueryOptions bigQueryOptions, TableReference tableRef) { // Save pipeline options so that we can construct the BigQuery client on-demand whenever an // iterator gets created. this.bigQueryOptions = bigQueryOptions; this.tableRef = tableRef; this.bigQueryClient = null; this.query = null; this.projectId = null; } public BigQueryReader(BigQueryOptions bigQueryOptions, String query, String projectId) { this.bigQueryOptions = bigQueryOptions; this.tableRef = null; this.bigQueryClient = null; this.query = query; this.projectId = projectId; } /** Builds a BigQueryReader directly using a BigQuery client. */ public BigQueryReader(Bigquery bigQueryClient, TableReference tableRef) { this.bigQueryOptions = null; this.tableRef = tableRef; this.bigQueryClient = bigQueryClient; this.query = null; this.projectId = null; } public BigQueryReader(Bigquery bigQueryClient, String query, String projectId) { this.bigQueryOptions = null; this.tableRef = null; this.bigQueryClient = bigQueryClient; this.query = query; this.projectId = projectId; } @Override public ReaderIterator> iterator() throws IOException { if (tableRef != null) { return new BigQueryReaderIterator( bigQueryClient != null ? bigQueryClient : Transport.newBigQueryClient(bigQueryOptions).build(), tableRef); } else { return new BigQueryReaderIterator( bigQueryClient != null ? bigQueryClient : Transport.newBigQueryClient(bigQueryOptions).build(), query, projectId); } } /** * A ReaderIterator that yields TableRow objects for each row of a BigQuery table. */ class BigQueryReaderIterator extends AbstractBoundedReaderIterator> { private BigQueryTableRowIterator rowIterator; public BigQueryReaderIterator(Bigquery bigQueryClient, TableReference tableRef) { rowIterator = new BigQueryTableRowIterator(bigQueryClient, tableRef); } public BigQueryReaderIterator(Bigquery bigQueryClient, String query, String projectId) { rowIterator = new BigQueryTableRowIterator(bigQueryClient, query, projectId); } @Override protected boolean hasNextImpl() { return rowIterator.hasNext(); } @Override protected WindowedValue nextImpl() throws IOException { return WindowedValue.valueInGlobalWindow(rowIterator.next()); } @Override public Progress getProgress() { // For now reporting progress is not supported because this source is used only when // an entire table needs to be read by each worker (used as a side input for instance). return null; } @Override public DynamicSplitResult requestDynamicSplit(DynamicSplitRequest splitRequest) { // For now dynamic splitting is not supported because this source // is used only when an entire table needs to be read by each worker (used // as a side input for instance). return null; } @Override public void close() throws IOException { rowIterator.close(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy