com.google.cloud.bigquery.ConnectionImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of google-cloud-bigquery Show documentation
Show all versions of google-cloud-bigquery Show documentation
Java idiomatic client for Google Cloud BigQuery.
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.bigquery;
import static com.google.cloud.RetryHelper.runWithRetries;
import static java.net.HttpURLConnection.HTTP_NOT_FOUND;
import com.google.api.core.BetaApi;
import com.google.api.core.InternalApi;
import com.google.api.gax.core.FixedCredentialsProvider;
import com.google.api.services.bigquery.model.GetQueryResultsResponse;
import com.google.api.services.bigquery.model.JobConfigurationQuery;
import com.google.api.services.bigquery.model.QueryParameter;
import com.google.api.services.bigquery.model.QueryRequest;
import com.google.api.services.bigquery.model.TableDataList;
import com.google.api.services.bigquery.model.TableRow;
import com.google.cloud.RetryHelper;
import com.google.cloud.Tuple;
import com.google.cloud.bigquery.JobStatistics.QueryStatistics;
import com.google.cloud.bigquery.JobStatistics.SessionInfo;
import com.google.cloud.bigquery.spi.v2.BigQueryRpc;
import com.google.cloud.bigquery.storage.v1.ArrowRecordBatch;
import com.google.cloud.bigquery.storage.v1.ArrowSchema;
import com.google.cloud.bigquery.storage.v1.BigQueryReadClient;
import com.google.cloud.bigquery.storage.v1.BigQueryReadSettings;
import com.google.cloud.bigquery.storage.v1.CreateReadSessionRequest;
import com.google.cloud.bigquery.storage.v1.DataFormat;
import com.google.cloud.bigquery.storage.v1.ReadRowsRequest;
import com.google.cloud.bigquery.storage.v1.ReadRowsResponse;
import com.google.cloud.bigquery.storage.v1.ReadSession;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import java.io.IOException;
import java.math.BigInteger;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.UUID;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.VectorLoader;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.ipc.ReadChannel;
import org.apache.arrow.vector.ipc.message.MessageSerializer;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
/** Implementation for {@link Connection}, the generic BigQuery connection API (not JDBC). */
class ConnectionImpl implements Connection {
private final ConnectionSettings connectionSettings;
private final BigQueryOptions bigQueryOptions;
private final BigQueryRpc bigQueryRpc;
private final BigQueryRetryConfig retryConfig;
private final int bufferSize; // buffer size in Producer Thread
private final int MAX_PROCESS_QUERY_THREADS_CNT = 5;
private final ExecutorService queryTaskExecutor =
Executors.newFixedThreadPool(MAX_PROCESS_QUERY_THREADS_CNT);
private final Logger logger = Logger.getLogger(this.getClass().getName());
private BigQueryReadClient bqReadClient;
private static final long EXECUTOR_TIMEOUT_SEC = 10;
private BlockingQueue>
bufferFvl; // initialized lazily iff we end up using the tabledata.list end point
private BlockingQueue
bufferRow; // initialized lazily iff we end up using Read API
ConnectionImpl(
ConnectionSettings connectionSettings,
BigQueryOptions bigQueryOptions,
BigQueryRpc bigQueryRpc,
BigQueryRetryConfig retryConfig) {
this.connectionSettings = connectionSettings;
this.bigQueryOptions = bigQueryOptions;
this.bigQueryRpc = bigQueryRpc;
this.retryConfig = retryConfig;
// Sets a reasonable buffer size (a blocking queue) if user input is suboptimal
this.bufferSize =
(connectionSettings == null
|| connectionSettings.getNumBufferedRows() == null
|| connectionSettings.getNumBufferedRows() < 10000
? 20000
: Math.min(connectionSettings.getNumBufferedRows() * 2, 100000));
}
/**
* This method returns the number of records to be stored in the buffer and it ensures that it is
* between a reasonable range
*
* @return The max number of records to be stored in the buffer
*/
private int getBufferSize() {
return (connectionSettings == null
|| connectionSettings.getNumBufferedRows() == null
|| connectionSettings.getNumBufferedRows() < 10000
? 20000
: Math.min(connectionSettings.getNumBufferedRows() * 2, 100000));
}
/**
* Cancel method shutdowns the pageFetcher and producerWorker threads gracefully using interrupt.
* The pageFetcher threat will not request for any subsequent threads after interrupting and
* shutdown as soon as any ongoing RPC call returns. The producerWorker will not populate the
* buffer with any further records and clear the buffer, put a EoF marker and shutdown.
*
* @return Boolean value true if the threads were interrupted
* @throws BigQuerySQLException
*/
@BetaApi
@Override
public synchronized boolean close() throws BigQuerySQLException {
flagEndOfStream(); // an End of Stream flag in the buffer so that the `ResultSet.next()` stops
// advancing the cursor
queryTaskExecutor.shutdownNow();
try {
if (queryTaskExecutor.awaitTermination(EXECUTOR_TIMEOUT_SEC, TimeUnit.SECONDS)) {
return true;
} // else queryTaskExecutor.isShutdown() will be returned outside this try block
} catch (InterruptedException e) {
logger.log(
Level.WARNING,
"\n" + Thread.currentThread().getName() + " Exception while awaitTermination",
e); // Logging InterruptedException instead of throwing the exception back, close method
// will return queryTaskExecutor.isShutdown()
}
return queryTaskExecutor.isShutdown(); // check if the executor has been shutdown
}
/**
* This method runs a dry run query
*
* @param sql SQL SELECT statement
* @return BigQueryDryRunResult containing List and Schema
* @throws BigQuerySQLException
*/
@BetaApi
@Override
public BigQueryDryRunResult dryRun(String sql) throws BigQuerySQLException {
com.google.api.services.bigquery.model.Job dryRunJob = createDryRunJob(sql);
Schema schema = Schema.fromPb(dryRunJob.getStatistics().getQuery().getSchema());
List queryParametersPb =
dryRunJob.getStatistics().getQuery().getUndeclaredQueryParameters();
List queryParameters =
queryParametersPb == null
? Collections.emptyList()
: Lists.transform(queryParametersPb, QUERY_PARAMETER_FROM_PB_FUNCTION);
QueryStatistics queryStatistics = JobStatistics.fromPb(dryRunJob);
SessionInfo sessionInfo =
queryStatistics.getSessionInfo() == null ? null : queryStatistics.getSessionInfo();
BigQueryResultStats bigQueryResultStats =
new BigQueryResultStatsImpl(queryStatistics, sessionInfo);
return new BigQueryDryRunResultImpl(schema, queryParameters, bigQueryResultStats);
}
/**
* This method executes a SQL SELECT query
*
* @param sql SQL SELECT statement
* @return BigQueryResult containing the output of the query
* @throws BigQuerySQLException
*/
@BetaApi
@Override
public BigQueryResult executeSelect(String sql) throws BigQuerySQLException {
return getExecuteSelectResponse(sql, null, null);
}
/**
* This method executes a SQL SELECT query
*
* @param sql SQL SELECT query
* @param parameters named or positional parameters. The set of query parameters must either be
* all positional or all named parameters.
* @param labels the labels associated with this query. You can use these to organize and group
* your query jobs. Label keys and values can be no longer than 63 characters, can only
* contain lowercase letters, numeric characters, underscores and dashes. International
* characters are allowed. Label values are optional and Label is a Varargs. You should pass
* all the Labels in a single Map .Label keys must start with a letter and each label in the
* list must have a different key.
* @return BigQueryResult containing the output of the query
* @throws BigQuerySQLException
*/
@BetaApi
@Override
public BigQueryResult executeSelect(
String sql, List parameters, Map... labels)
throws BigQuerySQLException {
return getExecuteSelectResponse(sql, parameters, labels);
}
private BigQueryResult getExecuteSelectResponse(
String sql, List parameters, Map... labels)
throws BigQuerySQLException {
Map labelMap = null;
if (labels != null
&& labels.length == 1) { // We expect label as a key value pair in a single Map
labelMap = labels[0];
}
try {
// use jobs.query if possible
if (isFastQuerySupported()) {
logger.log(Level.INFO, "\n Using Fast Query Path");
final String projectId = bigQueryOptions.getProjectId();
final QueryRequest queryRequest =
createQueryRequest(connectionSettings, sql, parameters, labelMap);
return queryRpc(projectId, queryRequest, sql, parameters != null);
}
// use jobs.insert otherwise
logger.log(Level.INFO, "\n Not Using Fast Query Path, using jobs.insert");
com.google.api.services.bigquery.model.Job queryJob =
createQueryJob(sql, connectionSettings, parameters, labelMap);
JobId jobId = JobId.fromPb(queryJob.getJobReference());
GetQueryResultsResponse firstPage = getQueryResultsFirstPage(jobId);
return getResultSet(firstPage, jobId, sql, parameters != null);
} catch (BigQueryException e) {
throw new BigQuerySQLException(e.getMessage(), e, e.getErrors());
}
}
/**
* Execute a SQL statement that returns a single ResultSet and returns a ListenableFuture to
* process the response asynchronously.
*
* Example of running a query.
*
*
* {
* @code
* ConnectionSettings connectionSettings =
* ConnectionSettings.newBuilder()
* .setUseReadAPI(true)
* .build();
* Connection connection = bigquery.createConnection(connectionSettings);
* String selectQuery = "SELECT corpus FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus;";
* ListenableFuture executeSelectFuture = connection.executeSelectAsync(selectQuery);
* ExecuteSelectResponse executeSelectRes = executeSelectFuture.get();
*
* if(!executeSelectRes.getIsSuccessful()){
* throw executeSelectRes.getBigQuerySQLException();
* }
*
* BigQueryResult bigQueryResult = executeSelectRes.getBigQueryResult();
* ResultSet rs = bigQueryResult.getResultSet();
* while (rs.next()) {
* System.out.println(rs.getString(1));
* }
*
*
*
* @param sql a static SQL SELECT statement
* @return a ListenableFuture that is used to get the data produced by the query
* @throws BigQuerySQLException upon failure
*/
@BetaApi
@Override
public ListenableFuture executeSelectAsync(String sql)
throws BigQuerySQLException {
return getExecuteSelectFuture(sql, null);
}
/** This method calls the overloaded executeSelect(...) methods and returns a Future */
private ListenableFuture getExecuteSelectFuture(
String sql, List parameters, Map... labels)
throws BigQuerySQLException {
ExecutorService execService =
Executors.newFixedThreadPool(
2); // two fixed threads. One for the async operation and the other for processing the
// callback
ListeningExecutorService lExecService = MoreExecutors.listeningDecorator(execService);
ListenableFuture executeSelectFuture =
lExecService.submit(
() -> {
try {
return ExecuteSelectResponse.newBuilder()
.setResultSet(
this.executeSelect(
sql,
parameters,
labels)) // calling the overloaded executeSelect method, it takes care
// of null parameters and labels
.setIsSuccessful(true)
.build();
} catch (BigQuerySQLException ex) {
return ExecuteSelectResponse
.newBuilder() // passing back the null result with isSuccessful set to false
.setIsSuccessful(false)
.setBigQuerySQLException(ex)
.build();
}
});
Futures.addCallback(
executeSelectFuture,
new FutureCallback() {
public void onSuccess(ExecuteSelectResponse result) {
execService.shutdownNow(); // shutdown the executor service as we do not need it
}
public void onFailure(Throwable t) {
logger.log(
Level.WARNING,
"\n"
+ String.format(
"Async task failed or cancelled with error %s", t.getMessage()));
try {
close(); // attempt to stop the execution as the developer might have called
// Future.cancel()
} catch (BigQuerySQLException e) {
logger.log(
Level.WARNING,
"\n"
+ String.format("Exception while closing the connection %s", e.getMessage()));
}
execService.shutdownNow(); // shutdown the executor service as we do not need it
}
},
execService);
return executeSelectFuture;
}
/**
* Execute a SQL statement that returns a single ResultSet and returns a ListenableFuture to
* process the response asynchronously.
*
* Example of running a query.
*
*
* {
* @code
* ConnectionSettings connectionSettings =
* ConnectionSettings.newBuilder()
* ..setUseReadAPI(true)
* .build();
* Connection connection = bigquery.createConnection(connectionSettings);
* String selectQuery =
* "SELECT TimestampField, StringField, BooleanField FROM "
* + MY_TABLE
* + " WHERE StringField = @stringParam"
* + " AND IntegerField IN UNNEST(@integerList)";
* QueryParameterValue stringParameter = QueryParameterValue.string("stringValue");
* QueryParameterValue intArrayParameter =
* QueryParameterValue.array(new Integer[] {3, 4}, Integer.class);
* Parameter stringParam =
* Parameter.newBuilder().setName("stringParam").setValue(stringParameter).build();
* Parameter intArrayParam =
* Parameter.newBuilder().setName("integerList").setValue(intArrayParameter).build();
* List parameters = ImmutableList.of(stringParam, intArrayParam);
*
* ListenableFuture executeSelectFut =
* connection.executeSelectAsync(selectQuery, parameters);
* ExecuteSelectResponse executeSelectRes = executeSelectFuture.get();
*
* if(!executeSelectRes.getIsSuccessful()){
* throw executeSelectRes.getBigQuerySQLException();
* }
*
* BigQueryResult bigQueryResult = executeSelectRes.getBigQueryResult();
* ResultSet rs = bigQueryResult.getResultSet();
* while (rs.next()) {
* System.out.println(rs.getString(1));
* }
*
*
*
* @param sql SQL SELECT query
* @param parameters named or positional parameters. The set of query parameters must either be
* all positional or all named parameters.
* @param labels (optional) the labels associated with this query. You can use these to organize
* and group your query jobs. Label keys and values can be no longer than 63 characters, can
* only contain lowercase letters, numeric characters, underscores and dashes. International
* characters are allowed. Label values are optional and Label is a Varargs. You should pass
* all the Labels in a single Map .Label keys must start with a letter and each label in the
* list must have a different key.
* @return a ListenableFuture that is used to get the data produced by the query
* @throws BigQuerySQLException upon failure
*/
@BetaApi
@Override
public ListenableFuture executeSelectAsync(
String sql, List parameters, Map... labels)
throws BigQuerySQLException {
return getExecuteSelectFuture(sql, parameters, labels);
}
@VisibleForTesting
BigQueryResult getResultSet(
GetQueryResultsResponse firstPage, JobId jobId, String sql, Boolean hasQueryParameters) {
if (firstPage.getTotalRows().compareTo(BigInteger.ZERO) > 0) {
return getSubsequentQueryResultsWithJob(
firstPage.getTotalRows().longValue(),
(long) firstPage.getRows().size(),
jobId,
firstPage,
hasQueryParameters);
}
return new BigQueryResultImpl(Schema.fromPb(firstPage.getSchema()), 0, null, null);
}
static class EndOfFieldValueList
extends AbstractList<
FieldValue> { // A reference of this class is used as a token to inform the thread
// consuming `buffer` BigQueryResultImpl that we have run out of records
@Override
public FieldValue get(int index) {
return null;
}
@Override
public int size() {
return 0;
}
}
private BigQueryResult queryRpc(
final String projectId,
final QueryRequest queryRequest,
String sql,
Boolean hasQueryParameters) {
com.google.api.services.bigquery.model.QueryResponse results;
try {
results =
BigQueryRetryHelper.runWithRetries(
() -> bigQueryRpc.queryRpc(projectId, queryRequest),
bigQueryOptions.getRetrySettings(),
BigQueryBaseService.BIGQUERY_EXCEPTION_HANDLER,
bigQueryOptions.getClock(),
retryConfig);
} catch (BigQueryRetryHelper.BigQueryRetryHelperException e) {
throw BigQueryException.translateAndThrow(e);
}
if (results.getErrors() != null) {
List bigQueryErrors =
results.getErrors().stream()
.map(BigQueryError.FROM_PB_FUNCTION)
.collect(Collectors.toList());
// Throwing BigQueryException since there may be no JobId, and we want to stay consistent
// with the case where there is an HTTP error
throw new BigQueryException(bigQueryErrors);
}
// Query finished running and we can paginate all the results
if (results.getJobComplete() && results.getSchema() != null) {
return processQueryResponseResults(results);
} else {
// Query is long-running (> 10s) and hasn't completed yet, or query completed but didn't
// return the schema, fallback to jobs.insert path. Some operations don't return the schema
// and can be optimized here, but this is left as future work.
Long totalRows = results.getTotalRows() == null ? null : results.getTotalRows().longValue();
Long pageRows = results.getRows() == null ? null : (long) (results.getRows().size());
logger.log(
Level.WARNING,
"\n"
+ String.format(
"results.getJobComplete(): %s, isSchemaNull: %s , totalRows: %s, pageRows: %s",
results.getJobComplete(), results.getSchema() == null, totalRows, pageRows));
JobId jobId = JobId.fromPb(results.getJobReference());
GetQueryResultsResponse firstPage = getQueryResultsFirstPage(jobId);
return getSubsequentQueryResultsWithJob(
totalRows, pageRows, jobId, firstPage, hasQueryParameters);
}
}
@VisibleForTesting
BigQueryResultStats getBigQueryResultSetStats(JobId jobId) {
// Create GetQueryResultsResponse query statistics
Job queryJob = getQueryJobRpc(jobId);
QueryStatistics queryStatistics = queryJob.getStatistics();
SessionInfo sessionInfo =
queryStatistics.getSessionInfo() == null ? null : queryStatistics.getSessionInfo();
return new BigQueryResultStatsImpl(queryStatistics, sessionInfo);
}
/* This method processed the first page of GetQueryResultsResponse and then it uses tabledata.list */
@VisibleForTesting
BigQueryResult tableDataList(GetQueryResultsResponse firstPage, JobId jobId) {
Schema schema;
long numRows;
schema = Schema.fromPb(firstPage.getSchema());
numRows = firstPage.getTotalRows().longValue();
BigQueryResultStats bigQueryResultStats = getBigQueryResultSetStats(jobId);
// Keeps the deserialized records at the row level, which is consumed by BigQueryResult
bufferFvl = new LinkedBlockingDeque<>(getBufferSize());
// Keeps the parsed FieldValueLists
BlockingQueue, Boolean>> pageCache =
new LinkedBlockingDeque<>(
getPageCacheSize(connectionSettings.getNumBufferedRows(), schema));
// Keeps the raw RPC responses
BlockingQueue> rpcResponseQueue =
new LinkedBlockingDeque<>(
getPageCacheSize(connectionSettings.getNumBufferedRows(), schema));
runNextPageTaskAsync(firstPage.getPageToken(), getDestinationTable(jobId), rpcResponseQueue);
parseRpcDataAsync(
firstPage.getRows(),
schema,
pageCache,
rpcResponseQueue); // parses data on a separate thread, thus maximising processing
// throughput
populateBufferAsync(
rpcResponseQueue, pageCache, bufferFvl); // spawns a thread to populate the buffer
// This will work for pagination as well, as buffer is getting updated asynchronously
return new BigQueryResultImpl>(
schema, numRows, bufferFvl, bigQueryResultStats);
}
@VisibleForTesting
BigQueryResult processQueryResponseResults(
com.google.api.services.bigquery.model.QueryResponse results) {
Schema schema;
long numRows;
schema = Schema.fromPb(results.getSchema());
numRows =
results.getTotalRows() == null
? 0
: results.getTotalRows().longValue(); // in case of DML or DDL
// QueryResponse only provides cache hits, dmlStats, and sessionInfo as query processing
// statistics
DmlStats dmlStats =
results.getDmlStats() == null ? null : DmlStats.fromPb(results.getDmlStats());
Boolean cacheHit = results.getCacheHit();
QueryStatistics queryStatistics =
QueryStatistics.newBuilder().setDmlStats(dmlStats).setCacheHit(cacheHit).build();
// We cannot directly set sessionInfo in QueryStatistics
SessionInfo sessionInfo =
results.getSessionInfo() == null
? null
: JobStatistics.SessionInfo.fromPb(results.getSessionInfo());
BigQueryResultStats bigQueryResultStats =
new BigQueryResultStatsImpl(queryStatistics, sessionInfo);
bufferFvl = new LinkedBlockingDeque<>(getBufferSize());
BlockingQueue, Boolean>> pageCache =
new LinkedBlockingDeque<>(
getPageCacheSize(connectionSettings.getNumBufferedRows(), schema));
BlockingQueue> rpcResponseQueue =
new LinkedBlockingDeque<>(
getPageCacheSize(connectionSettings.getNumBufferedRows(), schema));
JobId jobId = JobId.fromPb(results.getJobReference());
// Thread to make rpc calls to fetch data from the server
runNextPageTaskAsync(results.getPageToken(), getDestinationTable(jobId), rpcResponseQueue);
// Thread to parse data received from the server to client library objects
parseRpcDataAsync(results.getRows(), schema, pageCache, rpcResponseQueue);
// Thread to populate the buffer (a blocking queue) shared with the consumer
populateBufferAsync(rpcResponseQueue, pageCache, bufferFvl);
return new BigQueryResultImpl>(
schema, numRows, bufferFvl, bigQueryResultStats);
}
@VisibleForTesting
void runNextPageTaskAsync(
String firstPageToken,
TableId destinationTable,
BlockingQueue> rpcResponseQueue) {
// This thread makes the RPC calls and paginates
Runnable nextPageTask =
() -> {
String pageToken = firstPageToken; // results.getPageToken();
try {
while (pageToken != null) { // paginate for non null token
if (Thread.currentThread().isInterrupted()
|| queryTaskExecutor.isShutdown()) { // do not process further pages and shutdown
logger.log(
Level.WARNING,
"\n"
+ Thread.currentThread().getName()
+ " Interrupted @ runNextPageTaskAsync");
break;
}
TableDataList tabledataList = tableDataListRpc(destinationTable, pageToken);
pageToken = tabledataList.getPageToken();
rpcResponseQueue.put(
Tuple.of(
tabledataList,
true)); // this will be parsed asynchronously without blocking the current
// thread
}
rpcResponseQueue.put(
Tuple.of(
null, false)); // this will stop the parseDataTask as well when the pagination
// completes
} catch (Exception e) {
throw new BigQueryException(0, e.getMessage(), e);
} // We cannot do queryTaskExecutor.shutdownNow() here as populate buffer method may not
// have finished processing the records and even that will be interrupted
};
queryTaskExecutor.execute(nextPageTask);
}
/*
This method takes TableDataList from rpcResponseQueue and populates pageCache with FieldValueList
*/
@VisibleForTesting
void parseRpcDataAsync(
// com.google.api.services.bigquery.model.QueryResponse results,
List tableRows,
Schema schema,
BlockingQueue, Boolean>> pageCache,
BlockingQueue> rpcResponseQueue) {
// parse and put the first page in the pageCache before the other pages are parsed from the RPC
// calls
Iterable firstFieldValueLists = getIterableFieldValueList(tableRows, schema);
try {
pageCache.put(
Tuple.of(firstFieldValueLists, true)); // this is the first page which we have received.
} catch (InterruptedException e) {
logger.log(
Level.WARNING,
"\n" + Thread.currentThread().getName() + " Interrupted @ parseRpcDataAsync");
}
// rpcResponseQueue will get null tuple if Cancel method is called, so no need to explicitly use
// thread interrupt here
Runnable parseDataTask =
() -> {
try {
boolean hasMorePages = true;
while (hasMorePages) {
if (Thread.currentThread().isInterrupted()
|| queryTaskExecutor.isShutdown()) { // do not process further data and shutdown
logger.log(
Level.WARNING,
"\n" + Thread.currentThread().getName() + " Interrupted @ parseRpcDataAsync");
break;
}
// no interrupt received till this point, continue processing
Tuple rpcResponse = rpcResponseQueue.take();
TableDataList tabledataList = rpcResponse.x();
hasMorePages = rpcResponse.y();
if (tabledataList != null) {
Iterable fieldValueLists =
getIterableFieldValueList(tabledataList.getRows(), schema); // Parse
pageCache.put(Tuple.of(fieldValueLists, true));
}
}
} catch (InterruptedException e) {
logger.log(
Level.WARNING,
"\n" + Thread.currentThread().getName() + " Interrupted @ parseRpcDataAsync",
e); // Thread might get interrupted while calling the Cancel method, which is
// expected, so logging this instead of throwing the exception back
}
try {
pageCache.put(Tuple.of(null, false)); // no further pages, graceful exit scenario
} catch (InterruptedException e) {
logger.log(
Level.WARNING,
"\n" + Thread.currentThread().getName() + " Interrupted @ parseRpcDataAsync",
e); // Thread might get interrupted while calling the Cancel method, which is
// expected, so logging this instead of throwing the exception back
} // We cannot do queryTaskExecutor.shutdownNow() here as populate buffer method may not
// have finished processing the records and even that will be interrupted
};
queryTaskExecutor.execute(parseDataTask);
}
@VisibleForTesting
void populateBufferAsync(
BlockingQueue> rpcResponseQueue,
BlockingQueue, Boolean>> pageCache,
BlockingQueue> buffer) {
Runnable populateBufferRunnable =
() -> { // producer thread populating the buffer
Iterable fieldValueLists = null;
boolean hasRows = true; // as we have to process the first page
while (hasRows) {
try {
Tuple, Boolean> nextPageTuple = pageCache.take();
hasRows = nextPageTuple.y();
fieldValueLists = nextPageTuple.x();
} catch (InterruptedException e) {
logger.log(
Level.WARNING,
"\n" + Thread.currentThread().getName() + " Interrupted",
e); // Thread might get interrupted while calling the Cancel method, which is
// expected, so logging this instead of throwing the exception back
break;
}
if (Thread.currentThread().isInterrupted()
|| queryTaskExecutor.isShutdown()
|| fieldValueLists
== null) { // do not process further pages and shutdown (outerloop)
break;
}
for (FieldValueList fieldValueList : fieldValueLists) {
try {
if (Thread.currentThread().isInterrupted()
|| queryTaskExecutor
.isShutdown()) { // do not process further pages and shutdown (inner loop)
break;
}
buffer.put(fieldValueList);
} catch (InterruptedException e) {
throw new BigQueryException(0, e.getMessage(), e);
}
}
}
try {
buffer.put(
new EndOfFieldValueList()); // All the pages has been processed, put this marker
} catch (InterruptedException e) {
logger.log(
Level.WARNING,
"\n" + Thread.currentThread().getName() + " Interrupted @ populateBufferAsync",
e);
} finally {
queryTaskExecutor
.shutdownNow(); // Shutdown the thread pool. All the records are now processed
}
};
queryTaskExecutor.execute(populateBufferRunnable);
}
/**
* In an interrupt scenario, like when the background threads are still working and the user calls
* `connection.close() then we need to add an End of Stream flag in the buffer so that the
* `ResultSet.next()` stops advancing the cursor. We cannot rely on the `populateBufferAsync`
* method to do this as the `BlockingQueue.put()` call will error out after the interrupt is
* triggerred
*/
@InternalApi
void flagEndOfStream() { // package-private
try {
if (bufferFvl != null) { // that is tabledata.list endpoint is used
bufferFvl.put(
new EndOfFieldValueList()); // All the pages has been processed, put this marker
} else if (bufferRow != null) {
bufferRow.put(
new BigQueryResultImpl.Row(
null, true)); // All the pages has been processed, put this marker
} else {
logger.log(
Level.WARNING,
"\n"
+ Thread.currentThread().getName()
+ " Could not flag End of Stream, both the buffer types are null. This might happen when the connection is close without executing a query");
}
} catch (InterruptedException e) {
logger.log(
Level.WARNING,
"\n" + Thread.currentThread().getName() + " Interrupted @ flagEndOfStream",
e);
}
}
/* Helper method that parse and populate a page with TableRows */
private static Iterable getIterableFieldValueList(
Iterable tableDataPb, final Schema schema) {
return ImmutableList.copyOf(
Iterables.transform(
tableDataPb != null ? tableDataPb : ImmutableList.of(),
new Function() {
final FieldList fields = schema != null ? schema.getFields() : null;
@Override
public FieldValueList apply(TableRow rowPb) {
return FieldValueList.fromPb(rowPb.getF(), fields);
}
}));
}
/* Helper method that determines the optimal number of caches pages to improve read performance */
@VisibleForTesting
int getPageCacheSize(Integer numBufferedRows, Schema schema) {
final int MIN_CACHE_SIZE = 3; // Min number of pages to cache
final int MAX_CACHE_SIZE = 20; // //Min number of pages to cache
int numColumns = schema.getFields().size();
int numCachedPages;
long numCachedRows = numBufferedRows == null ? 0 : numBufferedRows.longValue();
// TODO: Further enhance this logic depending on customer feedback on memory consumption
if (numCachedRows > 10000) {
numCachedPages =
2; // the size of numBufferedRows is quite large and as per our tests we should be able to
// do enough even with low
} else if (numColumns > 15
&& numCachedRows
> 5000) { // too many fields are being read, setting the page size on the lower end
numCachedPages = 3;
} else if (numCachedRows < 2000
&& numColumns < 15) { // low pagesize with fewer number of columns, we can cache more pages
numCachedPages = 20;
} else { // default - under 10K numCachedRows with any number of columns
numCachedPages = 5;
}
return numCachedPages < MIN_CACHE_SIZE
? MIN_CACHE_SIZE
: (Math.min(
numCachedPages,
MAX_CACHE_SIZE)); // numCachedPages should be between the defined min and max
}
/* Returns query results using either tabledata.list or the high throughput Read API */
@VisibleForTesting
BigQueryResult getSubsequentQueryResultsWithJob(
Long totalRows,
Long pageRows,
JobId jobId,
GetQueryResultsResponse firstPage,
Boolean hasQueryParameters) {
TableId destinationTable = getDestinationTable(jobId);
return useReadAPI(totalRows, pageRows, Schema.fromPb(firstPage.getSchema()), hasQueryParameters)
? highThroughPutRead(
destinationTable,
firstPage.getTotalRows().longValue(),
Schema.fromPb(firstPage.getSchema()),
getBigQueryResultSetStats(
jobId)) // discord first page and stream the entire BigQueryResult using
// the Read API
: tableDataList(firstPage, jobId);
}
/* Returns query results using either tabledata.list or the high throughput Read API */
@VisibleForTesting
BigQueryResult getSubsequentQueryResultsWithJob(
Long totalRows,
Long pageRows,
JobId jobId,
GetQueryResultsResponse firstPage,
Schema schema,
Boolean hasQueryParameters) {
TableId destinationTable = getDestinationTable(jobId);
return useReadAPI(totalRows, pageRows, schema, hasQueryParameters)
? highThroughPutRead(
destinationTable,
totalRows == null
? -1L
: totalRows, // totalRows is null when the job is still running. TODO: Check if
// any workaround is possible
schema,
getBigQueryResultSetStats(
jobId)) // discord first page and stream the entire BigQueryResult using
// the Read API
: tableDataList(firstPage, jobId);
}
/* Returns Job from jobId by calling the jobs.get API */
private Job getQueryJobRpc(JobId jobId) {
final JobId completeJobId =
jobId
.setProjectId(bigQueryOptions.getProjectId())
.setLocation(
jobId.getLocation() == null && bigQueryOptions.getLocation() != null
? bigQueryOptions.getLocation()
: jobId.getLocation());
com.google.api.services.bigquery.model.Job jobPb;
try {
jobPb =
runWithRetries(
() ->
bigQueryRpc.getQueryJob(
completeJobId.getProject(),
completeJobId.getJob(),
completeJobId.getLocation()),
bigQueryOptions.getRetrySettings(),
BigQueryBaseService.BIGQUERY_EXCEPTION_HANDLER,
bigQueryOptions.getClock());
if (bigQueryOptions.getThrowNotFound() && jobPb == null) {
throw new BigQueryException(HTTP_NOT_FOUND, "Query job not found");
}
} catch (RetryHelper.RetryHelperException e) {
throw BigQueryException.translateAndThrow(e);
}
return Job.fromPb(bigQueryOptions.getService(), jobPb);
}
/* Returns the destinationTable from jobId by calling jobs.get API */
@VisibleForTesting
TableId getDestinationTable(JobId jobId) {
Job job = getQueryJobRpc(jobId);
return ((QueryJobConfiguration) job.getConfiguration()).getDestinationTable();
}
@VisibleForTesting
TableDataList tableDataListRpc(TableId destinationTable, String pageToken) {
try {
final TableId completeTableId =
destinationTable.setProjectId(
Strings.isNullOrEmpty(destinationTable.getProject())
? bigQueryOptions.getProjectId()
: destinationTable.getProject());
TableDataList results =
runWithRetries(
() ->
bigQueryOptions
.getBigQueryRpcV2()
.listTableDataWithRowLimit(
completeTableId.getProject(),
completeTableId.getDataset(),
completeTableId.getTable(),
connectionSettings.getMaxResultPerPage(),
pageToken),
bigQueryOptions.getRetrySettings(),
BigQueryBaseService.BIGQUERY_EXCEPTION_HANDLER,
bigQueryOptions.getClock());
return results;
} catch (RetryHelper.RetryHelperException e) {
throw BigQueryException.translateAndThrow(e);
}
}
@VisibleForTesting
BigQueryResult highThroughPutRead(
TableId destinationTable, long totalRows, Schema schema, BigQueryResultStats stats) {
try {
if (bqReadClient == null) { // if the read client isn't already initialized. Not thread safe.
BigQueryReadSettings settings =
BigQueryReadSettings.newBuilder()
.setCredentialsProvider(
FixedCredentialsProvider.create(bigQueryOptions.getCredentials()))
.build();
bqReadClient = BigQueryReadClient.create(settings);
}
String parent = String.format("projects/%s", destinationTable.getProject());
String srcTable =
String.format(
"projects/%s/datasets/%s/tables/%s",
destinationTable.getProject(),
destinationTable.getDataset(),
destinationTable.getTable());
// Read all the columns if the source table (temp table) and stream the data back in Arrow
// format
ReadSession.Builder sessionBuilder =
ReadSession.newBuilder().setTable(srcTable).setDataFormat(DataFormat.ARROW);
CreateReadSessionRequest.Builder builder =
CreateReadSessionRequest.newBuilder()
.setParent(parent)
.setReadSession(sessionBuilder)
.setMaxStreamCount(1) // Currently just one stream is allowed
// DO a regex check using order by and use multiple streams
;
ReadSession readSession = bqReadClient.createReadSession(builder.build());
bufferRow = new LinkedBlockingDeque<>(getBufferSize());
Map arrowNameToIndex = new HashMap<>();
// deserialize and populate the buffer async, so that the client isn't blocked
processArrowStreamAsync(
readSession,
bufferRow,
new ArrowRowReader(readSession.getArrowSchema(), arrowNameToIndex),
schema);
logger.log(Level.INFO, "\n Using BigQuery Read API");
return new BigQueryResultImpl(schema, totalRows, bufferRow, stats);
} catch (IOException e) {
throw BigQueryException.translateAndThrow(e);
}
}
private void processArrowStreamAsync(
ReadSession readSession,
BlockingQueue buffer,
ArrowRowReader reader,
Schema schema) {
Runnable arrowStreamProcessor =
() -> {
try {
// Use the first stream to perform reading.
String streamName = readSession.getStreams(0).getName();
ReadRowsRequest readRowsRequest =
ReadRowsRequest.newBuilder().setReadStream(streamName).build();
// Process each block of rows as they arrive and decode using our simple row reader.
com.google.api.gax.rpc.ServerStream stream =
bqReadClient.readRowsCallable().call(readRowsRequest);
for (ReadRowsResponse response : stream) {
if (Thread.currentThread().isInterrupted()
|| queryTaskExecutor.isShutdown()) { // do not process and shutdown
break;
}
reader.processRows(response.getArrowRecordBatch(), buffer, schema);
}
} catch (Exception e) {
throw BigQueryException.translateAndThrow(e);
} finally { // logic needed for graceful shutdown
// marking end of stream
try {
buffer.put(
new BigQueryResultImpl.Row(
null, true)); // All the pages has been processed, put this marker
} catch (InterruptedException e) {
logger.log(
Level.WARNING,
"\n" + Thread.currentThread().getName() + " Interrupted @ markLast",
e);
}
queryTaskExecutor.shutdownNow(); // Shutdown the thread pool
}
};
queryTaskExecutor.execute(arrowStreamProcessor);
}
private class ArrowRowReader
implements AutoCloseable { // TODO: Update to recent version of Arrow to avoid memoryleak
BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
// Decoder object will be reused to avoid re-allocation and too much garbage collection.
private final VectorSchemaRoot root;
private final VectorLoader loader;
private ArrowRowReader(ArrowSchema arrowSchema, Map arrowNameToIndex)
throws IOException {
org.apache.arrow.vector.types.pojo.Schema schema =
MessageSerializer.deserializeSchema(
new org.apache.arrow.vector.ipc.ReadChannel(
new ByteArrayReadableSeekableByteChannel(
arrowSchema.getSerializedSchema().toByteArray())));
List vectors = new ArrayList<>();
List fields = schema.getFields();
for (int i = 0; i < fields.size(); i++) {
vectors.add(fields.get(i).createVector(allocator));
arrowNameToIndex.put(
fields.get(i).getName(),
i); // mapping for getting against the field name in the result set
}
root = new VectorSchemaRoot(vectors);
loader = new VectorLoader(root);
}
/** @param batch object returned from the ReadRowsResponse. */
private void processRows(
ArrowRecordBatch batch, BlockingQueue buffer, Schema schema)
throws IOException { // deserialize the values and consume the hash of the values
try {
org.apache.arrow.vector.ipc.message.ArrowRecordBatch deserializedBatch =
MessageSerializer.deserializeRecordBatch(
new ReadChannel(
new ByteArrayReadableSeekableByteChannel(
batch.getSerializedRecordBatch().toByteArray())),
allocator);
loader.load(deserializedBatch);
// Release buffers from batch (they are still held in the vectors in root).
deserializedBatch.close();
// Parse the vectors using BQ Schema. Deserialize the data at the row level and add it to
// the
// buffer
FieldList fields = schema.getFields();
for (int rowNum = 0;
rowNum < root.getRowCount();
rowNum++) { // for the given number of rows in the batch
if (Thread.currentThread().isInterrupted()
|| queryTaskExecutor.isShutdown()) { // do not process and shutdown
break; // exit the loop, root will be cleared in the finally block
}
Map curRow = new HashMap<>();
for (int col = 0; col < fields.size(); col++) { // iterate all the vectors for a given row
com.google.cloud.bigquery.Field field = fields.get(col);
FieldVector curFieldVec =
root.getVector(
field.getName()); // can be accessed using the index or Vector/column name
curRow.put(field.getName(), curFieldVec.getObject(rowNum)); // Added the raw value
}
buffer.put(new BigQueryResultImpl.Row(curRow));
}
root.clear();
} catch (RuntimeException | InterruptedException e) {
throw BigQueryException.translateAndThrow(e);
} finally {
try {
root.clear();
} catch (RuntimeException e) {
logger.log(Level.WARNING, "\n Error while clearing VectorSchemaRoot ", e);
}
}
}
@Override
public void close() {
root.close();
allocator.close();
}
}
/*Returns just the first page of GetQueryResultsResponse using the jobId*/
@VisibleForTesting
GetQueryResultsResponse getQueryResultsFirstPage(JobId jobId) {
JobId completeJobId =
jobId
.setProjectId(bigQueryOptions.getProjectId())
.setLocation(
jobId.getLocation() == null && bigQueryOptions.getLocation() != null
? bigQueryOptions.getLocation()
: jobId.getLocation());
// Implementing logic to poll the Job's status using getQueryResults as
// we do not get rows, rows count and schema unless the job is complete
// Ref: b/241134681
// This logic relies on backend for poll and wait.BigQuery guarantees that jobs make forward
// progress (a job won't get stuck in pending forever).
boolean jobComplete = false;
GetQueryResultsResponse results = null;
long timeoutMs = 10000; // defaulting to 10seconds.
while (!jobComplete) {
try {
results =
BigQueryRetryHelper.runWithRetries(
() ->
bigQueryRpc.getQueryResultsWithRowLimit(
completeJobId.getProject(),
completeJobId.getJob(),
completeJobId.getLocation(),
connectionSettings.getMaxResultPerPage(),
timeoutMs),
bigQueryOptions.getRetrySettings(),
BigQueryBaseService.BIGQUERY_EXCEPTION_HANDLER,
bigQueryOptions.getClock(),
retryConfig);
if (results.getErrors() != null) {
List bigQueryErrors =
results.getErrors().stream()
.map(BigQueryError.FROM_PB_FUNCTION)
.collect(Collectors.toList());
// Throwing BigQueryException since there may be no JobId, and we want to stay consistent
// with the case where there is a HTTP error
throw new BigQueryException(bigQueryErrors);
}
} catch (BigQueryRetryHelper.BigQueryRetryHelperException e) {
logger.log(Level.WARNING, "\n Error occurred while calling getQueryResultsWithRowLimit", e);
throw BigQueryException.translateAndThrow(e);
}
jobComplete = results.getJobComplete();
// This log msg at Level.FINE might indicate that the job is still running and not stuck for
// very long running jobs.
logger.log(
Level.FINE,
String.format(
"jobComplete: %s , Polling getQueryResults with timeoutMs: %s",
jobComplete, timeoutMs));
}
return results;
}
@VisibleForTesting
boolean isFastQuerySupported() {
// TODO: add regex logic to check for scripting
return connectionSettings.getClustering() == null
&& connectionSettings.getCreateDisposition() == null
&& connectionSettings.getDestinationEncryptionConfiguration() == null
&& connectionSettings.getDestinationTable() == null
&& connectionSettings.getJobTimeoutMs() == null
&& connectionSettings.getMaximumBillingTier() == null
&& connectionSettings.getPriority() == null
&& connectionSettings.getRangePartitioning() == null
&& connectionSettings.getSchemaUpdateOptions() == null
&& connectionSettings.getTableDefinitions() == null
&& connectionSettings.getTimePartitioning() == null
&& connectionSettings.getUserDefinedFunctions() == null
&& connectionSettings.getWriteDisposition() == null;
}
@VisibleForTesting
boolean useReadAPI(Long totalRows, Long pageRows, Schema schema, Boolean hasQueryParameters) {
// Read API does not yet support Interval Type or QueryParameters
if (containsIntervalType(schema) || hasQueryParameters) {
logger.log(Level.INFO, "\n Schema has IntervalType, or QueryParameters. Disabling ReadAPI");
return false;
}
if (totalRows == null || pageRows == null) {
return connectionSettings.getUseReadAPI();
}
if (Boolean.TRUE.equals(connectionSettings.getUseReadAPI())) {
long resultRatio = totalRows / pageRows;
return resultRatio >= connectionSettings.getTotalToPageRowCountRatio()
&& totalRows > connectionSettings.getMinResultSize();
} else {
return false;
}
}
// Does a BFS iteration to find out if there's an interval type in the schema. Implementation to
// be used until ReadAPI supports IntervalType
private boolean containsIntervalType(Schema schema) {
Queue fields =
new LinkedList(schema.getFields());
while (!fields.isEmpty()) {
com.google.cloud.bigquery.Field curField = fields.poll();
if (curField.getType().getStandardType() == StandardSQLTypeName.INTERVAL) {
return true;
} else if (curField.getType().getStandardType() == StandardSQLTypeName.STRUCT
|| curField.getType().getStandardType() == StandardSQLTypeName.ARRAY) {
fields.addAll(curField.getSubFields());
}
}
return false;
}
// Used for job.query API endpoint
@VisibleForTesting
QueryRequest createQueryRequest(
ConnectionSettings connectionSettings,
String sql,
List queryParameters,
Map labels) {
QueryRequest content = new QueryRequest();
String requestId = UUID.randomUUID().toString();
if (connectionSettings.getConnectionProperties() != null) {
content.setConnectionProperties(
connectionSettings.getConnectionProperties().stream()
.map(ConnectionProperty.TO_PB_FUNCTION)
.collect(Collectors.toList()));
}
if (connectionSettings.getDefaultDataset() != null) {
content.setDefaultDataset(connectionSettings.getDefaultDataset().toPb());
}
if (connectionSettings.getMaximumBytesBilled() != null) {
content.setMaximumBytesBilled(connectionSettings.getMaximumBytesBilled());
}
if (connectionSettings.getMaxResults() != null) {
content.setMaxResults(connectionSettings.getMaxResults());
}
if (queryParameters != null) {
// content.setQueryParameters(queryParameters);
if (queryParameters.get(0).getName() == null) {
// If query parameter name is unset, then assume mode is positional
content.setParameterMode("POSITIONAL");
// pass query parameters
List queryParametersPb =
Lists.transform(queryParameters, POSITIONAL_PARAMETER_TO_PB_FUNCTION);
content.setQueryParameters(queryParametersPb);
} else {
content.setParameterMode("NAMED");
// pass query parameters
List queryParametersPb =
Lists.transform(queryParameters, NAMED_PARAMETER_TO_PB_FUNCTION);
content.setQueryParameters(queryParametersPb);
}
}
if (connectionSettings.getCreateSession() != null) {
content.setCreateSession(connectionSettings.getCreateSession());
}
if (labels != null) {
content.setLabels(labels);
}
content.setQuery(sql);
content.setRequestId(requestId);
// The new Connection interface only supports StandardSQL dialect
content.setUseLegacySql(false);
return content;
}
// Used by jobs.getQueryResults API endpoint
@VisibleForTesting
com.google.api.services.bigquery.model.Job createQueryJob(
String sql,
ConnectionSettings connectionSettings,
List queryParameters,
Map labels) {
com.google.api.services.bigquery.model.JobConfiguration configurationPb =
new com.google.api.services.bigquery.model.JobConfiguration();
JobConfigurationQuery queryConfigurationPb = new JobConfigurationQuery();
queryConfigurationPb.setQuery(sql);
if (queryParameters != null) {
if (queryParameters.get(0).getName() == null) {
// If query parameter name is unset, then assume mode is positional
queryConfigurationPb.setParameterMode("POSITIONAL");
// pass query parameters
List queryParametersPb =
Lists.transform(queryParameters, POSITIONAL_PARAMETER_TO_PB_FUNCTION);
queryConfigurationPb.setQueryParameters(queryParametersPb);
} else {
queryConfigurationPb.setParameterMode("NAMED");
// pass query parameters
List queryParametersPb =
Lists.transform(queryParameters, NAMED_PARAMETER_TO_PB_FUNCTION);
queryConfigurationPb.setQueryParameters(queryParametersPb);
}
}
if (connectionSettings.getDestinationTable() != null) {
queryConfigurationPb.setDestinationTable(connectionSettings.getDestinationTable().toPb());
}
if (connectionSettings.getTableDefinitions() != null) {
queryConfigurationPb.setTableDefinitions(
Maps.transformValues(
connectionSettings.getTableDefinitions(),
ExternalTableDefinition.TO_EXTERNAL_DATA_FUNCTION));
}
if (connectionSettings.getUserDefinedFunctions() != null) {
queryConfigurationPb.setUserDefinedFunctionResources(
connectionSettings.getUserDefinedFunctions().stream()
.map(UserDefinedFunction.TO_PB_FUNCTION)
.collect(Collectors.toList()));
}
if (connectionSettings.getCreateDisposition() != null) {
queryConfigurationPb.setCreateDisposition(
connectionSettings.getCreateDisposition().toString());
}
if (connectionSettings.getWriteDisposition() != null) {
queryConfigurationPb.setWriteDisposition(connectionSettings.getWriteDisposition().toString());
}
if (connectionSettings.getDefaultDataset() != null) {
queryConfigurationPb.setDefaultDataset(connectionSettings.getDefaultDataset().toPb());
}
if (connectionSettings.getPriority() != null) {
queryConfigurationPb.setPriority(connectionSettings.getPriority().toString());
}
if (connectionSettings.getAllowLargeResults() != null) {
queryConfigurationPb.setAllowLargeResults(connectionSettings.getAllowLargeResults());
}
if (connectionSettings.getUseQueryCache() != null) {
queryConfigurationPb.setUseQueryCache(connectionSettings.getUseQueryCache());
}
if (connectionSettings.getFlattenResults() != null) {
queryConfigurationPb.setFlattenResults(connectionSettings.getFlattenResults());
}
if (connectionSettings.getMaximumBillingTier() != null) {
queryConfigurationPb.setMaximumBillingTier(connectionSettings.getMaximumBillingTier());
}
if (connectionSettings.getMaximumBytesBilled() != null) {
queryConfigurationPb.setMaximumBytesBilled(connectionSettings.getMaximumBytesBilled());
}
if (connectionSettings.getSchemaUpdateOptions() != null) {
ImmutableList.Builder schemaUpdateOptionsBuilder = new ImmutableList.Builder<>();
for (JobInfo.SchemaUpdateOption schemaUpdateOption :
connectionSettings.getSchemaUpdateOptions()) {
schemaUpdateOptionsBuilder.add(schemaUpdateOption.name());
}
queryConfigurationPb.setSchemaUpdateOptions(schemaUpdateOptionsBuilder.build());
}
if (connectionSettings.getDestinationEncryptionConfiguration() != null) {
queryConfigurationPb.setDestinationEncryptionConfiguration(
connectionSettings.getDestinationEncryptionConfiguration().toPb());
}
if (connectionSettings.getTimePartitioning() != null) {
queryConfigurationPb.setTimePartitioning(connectionSettings.getTimePartitioning().toPb());
}
if (connectionSettings.getClustering() != null) {
queryConfigurationPb.setClustering(connectionSettings.getClustering().toPb());
}
if (connectionSettings.getRangePartitioning() != null) {
queryConfigurationPb.setRangePartitioning(connectionSettings.getRangePartitioning().toPb());
}
if (connectionSettings.getConnectionProperties() != null) {
queryConfigurationPb.setConnectionProperties(
connectionSettings.getConnectionProperties().stream()
.map(ConnectionProperty.TO_PB_FUNCTION)
.collect(Collectors.toList()));
}
if (connectionSettings.getCreateSession() != null) {
queryConfigurationPb.setCreateSession(connectionSettings.getCreateSession());
}
if (connectionSettings.getJobTimeoutMs() != null) {
configurationPb.setJobTimeoutMs(connectionSettings.getJobTimeoutMs());
}
if (labels != null) {
configurationPb.setLabels(labels);
}
// The new Connection interface only supports StandardSQL dialect
queryConfigurationPb.setUseLegacySql(false);
configurationPb.setQuery(queryConfigurationPb);
com.google.api.services.bigquery.model.Job jobPb =
JobInfo.of(QueryJobConfiguration.fromPb(configurationPb)).toPb();
com.google.api.services.bigquery.model.Job queryJob;
try {
queryJob =
BigQueryRetryHelper.runWithRetries(
() -> bigQueryRpc.createJobForQuery(jobPb),
bigQueryOptions.getRetrySettings(),
BigQueryBaseService.BIGQUERY_EXCEPTION_HANDLER,
bigQueryOptions.getClock(),
retryConfig);
} catch (BigQueryRetryHelper.BigQueryRetryHelperException e) {
logger.log(Level.WARNING, "\n Error occurred while calling createJobForQuery", e);
throw BigQueryException.translateAndThrow(e);
}
logger.log(Level.INFO, "\n Query job created");
return queryJob;
}
// Used by dryRun
@VisibleForTesting
com.google.api.services.bigquery.model.Job createDryRunJob(String sql) {
com.google.api.services.bigquery.model.JobConfiguration configurationPb =
new com.google.api.services.bigquery.model.JobConfiguration();
configurationPb.setDryRun(true);
JobConfigurationQuery queryConfigurationPb = new JobConfigurationQuery();
String parameterMode = sql.contains("?") ? "POSITIONAL" : "NAMED";
queryConfigurationPb.setParameterMode(parameterMode);
queryConfigurationPb.setQuery(sql);
// UndeclaredQueryParameter is only supported in StandardSQL
queryConfigurationPb.setUseLegacySql(false);
if (connectionSettings.getDefaultDataset() != null) {
queryConfigurationPb.setDefaultDataset(connectionSettings.getDefaultDataset().toPb());
}
if (connectionSettings.getCreateSession() != null) {
queryConfigurationPb.setCreateSession(connectionSettings.getCreateSession());
}
configurationPb.setQuery(queryConfigurationPb);
com.google.api.services.bigquery.model.Job jobPb =
JobInfo.of(QueryJobConfiguration.fromPb(configurationPb)).toPb();
com.google.api.services.bigquery.model.Job dryRunJob;
try {
dryRunJob =
BigQueryRetryHelper.runWithRetries(
() -> bigQueryRpc.createJobForQuery(jobPb),
bigQueryOptions.getRetrySettings(),
BigQueryBaseService.BIGQUERY_EXCEPTION_HANDLER,
bigQueryOptions.getClock(),
retryConfig);
} catch (BigQueryRetryHelper.BigQueryRetryHelperException e) {
throw BigQueryException.translateAndThrow(e);
}
return dryRunJob;
}
// Convert from Parameter wrapper class to positional QueryParameter generated class
private static final Function POSITIONAL_PARAMETER_TO_PB_FUNCTION =
value -> {
QueryParameter queryParameterPb = new QueryParameter();
queryParameterPb.setParameterValue(value.getValue().toValuePb());
queryParameterPb.setParameterType(value.getValue().toTypePb());
return queryParameterPb;
};
// Convert from Parameter wrapper class to name QueryParameter generated class
private static final Function NAMED_PARAMETER_TO_PB_FUNCTION =
value -> {
QueryParameter queryParameterPb = new QueryParameter();
queryParameterPb.setName(value.getName());
queryParameterPb.setParameterValue(value.getValue().toValuePb());
queryParameterPb.setParameterType(value.getValue().toTypePb());
return queryParameterPb;
};
// Convert from QueryParameter class to the Parameter wrapper class
private static final Function QUERY_PARAMETER_FROM_PB_FUNCTION =
pb ->
Parameter.newBuilder()
.setName(pb.getName() == null ? "" : pb.getName())
.setValue(QueryParameterValue.fromPb(pb.getParameterValue(), pb.getParameterType()))
.build();
}