Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.util;
import com.google.api.client.googleapis.services.AbstractGoogleClientRequest;
import com.google.api.client.util.BackOff;
import com.google.api.client.util.BackOffUtils;
import com.google.api.client.util.Data;
import com.google.api.client.util.Preconditions;
import com.google.api.client.util.Sleeper;
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.Bigquery.Jobs.Insert;
import com.google.api.services.bigquery.model.Dataset;
import com.google.api.services.bigquery.model.DatasetReference;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.JobConfiguration;
import com.google.api.services.bigquery.model.JobConfigurationQuery;
import com.google.api.services.bigquery.model.JobReference;
import com.google.api.services.bigquery.model.Table;
import com.google.api.services.bigquery.model.TableDataList;
import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
import org.joda.time.Duration;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Random;
/**
* Iterates over all rows in a table.
*/
public class BigQueryTableRowIterator implements Iterator, Closeable {
private static final Logger LOG = LoggerFactory.getLogger(BigQueryTableRowIterator.class);
private final Bigquery client;
private TableReference ref;
private final String projectId;
private TableSchema schema;
private String pageToken;
private Iterator rowIterator;
// Set true when the final page is seen from the service.
private boolean lastPage = false;
// The maximum number of times a BigQuery request will be retried
private static final int MAX_RETRIES = 3;
// Initial wait time for the backoff implementation
private static final Duration INITIAL_BACKOFF_TIME = Duration.standardSeconds(1);
// After sending a query to BQ service we will be polling the BQ service to check the status with
// following interval to check the status of query execution job
private static final Duration QUERY_COMPLETION_POLL_TIME = Duration.standardSeconds(1);
private final String query;
// Temporary dataset used to store query results.
private String temporaryDatasetId = null;
// Temporary table used to store query results.
private String temporaryTableId = null;
public BigQueryTableRowIterator(Bigquery client, TableReference ref) {
this.client = client;
this.ref = ref;
this.query = null;
this.projectId = ref.getProjectId();
}
public BigQueryTableRowIterator(Bigquery client, String query, String projectId) {
this.client = client;
this.ref = null;
this.query = query;
this.projectId = projectId;
}
@Override
public boolean hasNext() {
try {
if (rowIterator == null || (!rowIterator.hasNext() && !lastPage)) {
readNext();
}
} catch (IOException | InterruptedException e) {
throw new RuntimeException(e);
}
return rowIterator.hasNext();
}
/**
* Adjusts a field returned from the API to
* match the type that will be seen when run on the
* backend service. The end result is:
*
*
{@code TIMESTAMP} columns are {@link String}s that are of the format
* {yyyy-MM-dd HH:mm:ss.SSS UTC}.
*
Every other atomic type is a {@link String}.
*
*
*
Note that currently integers are encoded as strings to match
* the behavior of the backend service.
*/
private Object getTypedCellValue(TableFieldSchema fieldSchema, Object v) {
// In the input from the BQ API, atomic types all come in as
// strings, while on the Dataflow service they have more precise
// types.
if (Data.isNull(v)) {
return null;
}
if (Objects.equals(fieldSchema.getMode(), "REPEATED")) {
TableFieldSchema elementSchema = fieldSchema.clone().setMode("REQUIRED");
@SuppressWarnings("unchecked")
List