com.google.cloud.bigquery.ExternalTableDefinition Maven / Gradle / Ivy
Show all versions of google-cloud-bigquery Show documentation
/*
* Copyright 2016 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.bigquery;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Strings.isNullOrEmpty;
import com.google.api.services.bigquery.model.ExternalDataConfiguration;
import com.google.api.services.bigquery.model.Table;
import com.google.auto.value.AutoValue;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import java.util.List;
import javax.annotation.Nullable;
/**
* Google BigQuery external table definition. BigQuery's external tables are tables whose data
* reside outside of BigQuery but can be queried as normal BigQuery tables. External tables are
* experimental and might be subject to change or removed.
*
* @see Federated Data Sources
*
*/
@AutoValue
public abstract class ExternalTableDefinition extends TableDefinition {
static final Function
FROM_EXTERNAL_DATA_FUNCTION =
new Function() {
@Override
public ExternalTableDefinition apply(ExternalDataConfiguration pb) {
return ExternalTableDefinition.fromExternalDataConfiguration(pb);
}
};
static final Function
TO_EXTERNAL_DATA_FUNCTION =
new Function() {
@Override
public ExternalDataConfiguration apply(ExternalTableDefinition tableInfo) {
return tableInfo.toExternalDataConfigurationPb();
}
};
private static final long serialVersionUID = -5951580238459622025L;
@AutoValue.Builder
public abstract static class Builder
extends TableDefinition.Builder {
/**
* Sets the fully-qualified URIs that point to your data in Google Cloud Storage (e.g.
* gs://bucket/path). Each URI can contain one '*' wildcard character that must come after the
* bucket's name. Size limits related to load jobs apply to external data sources, plus an
* additional limit of 10 GB maximum size across all URIs.
*
* For Google Cloud Bigtable URIs: Exactly one URI can be specified and it has be a fully
* specified and valid HTTPS URL for a Google Cloud Bigtable table.
*
*
For Google Cloud Datastore backup URIs: Exactly one URI can be specified. Also, the '*'
* wildcard character is not allowed.
*
* @see Quota
*/
public Builder setSourceUris(List sourceUris) {
return setSourceUrisImmut(ImmutableList.copyOf(sourceUris));
}
abstract Builder setFileSetSpecTypeInner(String spec);
abstract Builder setSourceUrisImmut(ImmutableList sourceUris);
/**
* Defines how to interpret files denoted by URIs. By default the files are assumed to be data
* files (this can be specified explicitly via FILE_SET_SPEC_TYPE_FILE_SYSTEM_MATCH). A second
* option is "FILE_SET_SPEC_TYPE_NEW_LINE_DELIMITED_MANIFEST" which interprets each file as a
* manifest file, where each line is a reference to a file.
*/
public Builder setFileSetSpecType(String fileSetSpecType) {
return setFileSetSpecTypeInner(fileSetSpecType);
}
/**
* Sets the source format, and possibly some parsing options, of the external data. Supported
* formats are {@code CSV} and {@code NEWLINE_DELIMITED_JSON}.
*
*
* Source Format
*/
public Builder setFormatOptions(FormatOptions formatOptions) {
return setFormatOptionsInner(formatOptions);
}
/**
* Defines the list of possible SQL data types to which the source decimal values are converted.
* This list and the precision and the scale parameters of the decimal field determine the
* target type. In the order of NUMERIC, BIGNUMERIC, and STRING, a type is picked if it is in
* the specified list and if it supports the precision and the scale. STRING supports all
* precision and scale values.
*
* @param decimalTargetTypes decimalTargetType or {@code null} for none
*/
public abstract Builder setDecimalTargetTypes(List decimalTargetTypes);
abstract Builder setFormatOptionsInner(FormatOptions formatOptions);
/**
* Sets the maximum number of bad records that BigQuery can ignore when reading data. If the
* number of bad records exceeds this value, an invalid error is returned in the job result. The
* default value is 0, which requires that all records are valid.
*/
public abstract Builder setMaxBadRecords(Integer maxBadRecords);
/**
* Sets whether BigQuery should allow extra values that are not represented in the table schema.
* If true, the extra values are ignored. If false, records with extra columns are treated as
* bad records, and if there are too many bad records, an invalid error is returned in the job
* result. The default value is false. The value set with {@link
* #setFormatOptions(FormatOptions)} property determines what BigQuery treats as an extra value.
*
* @see
* Ignore Unknown Values
*/
public abstract Builder setIgnoreUnknownValues(Boolean ignoreUnknownValues);
/**
* Sets compression type of the data source. By default no compression is assumed.
*
* @see
* Compression
*/
public abstract Builder setCompression(String compression);
/**
* [Optional, Trusted Tester] connectionId for external data source. The value may be {@code
* null}.
*/
public abstract Builder setConnectionId(String connectionId);
/**
* [Experimental] Sets detection of schema and format options automatically. Any option
* specified explicitly will be honored.
*/
public abstract Builder setAutodetect(Boolean autodetect);
public abstract Builder setType(Type type);
/** Sets the table schema. */
public abstract Builder setSchema(Schema schema);
/** Sets the table Hive partitioning options. */
public Builder setHivePartitioningOptions(HivePartitioningOptions hivePartitioningOptions) {
return setHivePartitioningOptionsInner(hivePartitioningOptions);
};
/**
* When creating an external table, the user can provide a reference file with the table schema.
* This is enabled for the following formats: AVRO, PARQUET, ORC.
*
* @param referenceFileSchemaUri or {@code null} for none
*/
public abstract Builder setReferenceFileSchemaUri(String referenceFileSchemaUri);
abstract Builder setHivePartitioningOptionsInner(
HivePartitioningOptions hivePartitioningOptions);
public Builder setObjectMetadata(String objectMetadata) {
return setObjectMetadataInner(objectMetadata);
}
abstract Builder setObjectMetadataInner(String objectMetadata);
/**
* [Optional] Metadata Cache Mode for the table. Set this to enable caching of metadata from
* external data source.
*
* @see
* MetadataCacheMode
*/
public Builder setMetadataCacheMode(String metadataCacheMode) {
return setMetadataCacheModeInner(metadataCacheMode);
}
abstract Builder setMetadataCacheModeInner(String metadataCacheMode);
/** Creates an {@code ExternalTableDefinition} object. */
@Override
public abstract ExternalTableDefinition build();
}
/**
* Returns the compression type of the data source.
*
* @see
* Compression
*/
@Nullable
public abstract String getCompression();
/**
* Returns the connection ID used to connect to external data source.
*
* @see
* ConnectionId
*/
@Nullable
public abstract String getConnectionId();
/**
* Returns whether BigQuery should allow extra values that are not represented in the table
* schema. If true, the extra values are ignored. If false, records with extra columns are treated
* as bad records, and if there are too many bad records, an invalid error is returned in the job
* result. The default value is false. The value of {@link #getFormatOptions()} determines what
* BigQuery treats as an extra value.
*
* @see
* Ignore Unknown Values
*/
@Nullable
public Boolean ignoreUnknownValues() {
return getIgnoreUnknownValues();
};
@Nullable
public abstract Boolean getIgnoreUnknownValues();
/**
* Returns the maximum number of bad records that BigQuery can ignore when reading data. If the
* number of bad records exceeds this value, an invalid error is returned in the job result.
*/
@Nullable
public abstract Integer getMaxBadRecords();
/**
* Returns the fully-qualified URIs that point to your data in Google Cloud Storage. Each URI can
* contain one '*' wildcard character that must come after the bucket's name. Size limits related
* to load jobs apply to external data sources, plus an additional limit of 10 GB maximum size
* across all URIs.
*
* @see Quota
*/
@Nullable
public List getSourceUris() {
return getSourceUrisImmut();
}
@Nullable
public String getFileSetSpecType() {
return getFileSetSpecTypeInner();
}
@Nullable
abstract String getFileSetSpecTypeInner();
@Nullable
public abstract ImmutableList getSourceUrisImmut();
/**
* Returns the object metadata.
*
* @see
* ObjectMetadata
*/
@Nullable
public String getObjectMetadata() {
return getObjectMetadataInner();
}
@Nullable
abstract String getObjectMetadataInner();
/**
* Returns the metadata cache mode.
*
* @see
* MetadataCacheMode
*/
@Nullable
public String getMetadataCacheMode() {
return getMetadataCacheModeInner();
}
@Nullable
abstract String getMetadataCacheModeInner();
/**
* Returns the source format, and possibly some parsing options, of the external data. Supported
* formats are {@code CSV} and {@code NEWLINE_DELIMITED_JSON}.
*/
@SuppressWarnings("unchecked")
@Nullable
public F getFormatOptions() {
return (F) getFormatOptionsInner();
}
@Nullable
abstract FormatOptions getFormatOptionsInner();
@Nullable
public abstract ImmutableList getDecimalTargetTypes();
/**
* [Experimental] Returns whether automatic detection of schema and format options should be
* performed.
*/
@Nullable
public abstract Boolean getAutodetect();
@Nullable
public abstract String getReferenceFileSchemaUri();
/**
* [Experimental] Returns the HivePartitioningOptions when the data layout follows Hive
* partitioning convention
*/
@SuppressWarnings("unchecked")
@Nullable
public HivePartitioningOptions getHivePartitioningOptions() {
return getHivePartitioningOptionsInner();
}
@Nullable
abstract HivePartitioningOptions getHivePartitioningOptionsInner();
/** Returns a builder for the {@code ExternalTableDefinition} object. */
public abstract Builder toBuilder();
@Override
com.google.api.services.bigquery.model.Table toPb() {
Table tablePb = super.toPb();
tablePb.setExternalDataConfiguration(toExternalDataConfigurationPb());
return tablePb;
}
com.google.api.services.bigquery.model.ExternalDataConfiguration toExternalDataConfigurationPb() {
com.google.api.services.bigquery.model.ExternalDataConfiguration externalConfigurationPb =
new com.google.api.services.bigquery.model.ExternalDataConfiguration();
if (getCompression() != null) {
externalConfigurationPb.setCompression(getCompression());
}
if (getConnectionId() != null) {
externalConfigurationPb.setConnectionId(getConnectionId());
}
if (ignoreUnknownValues() != null) {
externalConfigurationPb.setIgnoreUnknownValues(ignoreUnknownValues());
}
if (getMaxBadRecords() != null) {
externalConfigurationPb.setMaxBadRecords(getMaxBadRecords());
}
if (getSchema() != null) {
externalConfigurationPb.setSchema(getSchema().toPb());
}
if (getFormatOptions() != null) {
externalConfigurationPb.setSourceFormat(getFormatOptions().getType());
}
if (getSourceUris() != null) {
externalConfigurationPb.setSourceUris(getSourceUris());
}
if (getDecimalTargetTypes() != null) {
externalConfigurationPb.setDecimalTargetTypes(getDecimalTargetTypes());
}
if (getFormatOptions() != null && FormatOptions.PARQUET.equals(getFormatOptions().getType())) {
externalConfigurationPb.setParquetOptions(((ParquetOptions) getFormatOptions()).toPb());
}
if (getFormatOptions() != null && FormatOptions.AVRO.equals(getFormatOptions().getType())) {
externalConfigurationPb.setAvroOptions(((AvroOptions) getFormatOptions()).toPb());
}
if (getFormatOptions() != null && FormatOptions.CSV.equals(getFormatOptions().getType())) {
externalConfigurationPb.setCsvOptions(((CsvOptions) getFormatOptions()).toPb());
}
if (getFormatOptions() != null
&& FormatOptions.GOOGLE_SHEETS.equals(getFormatOptions().getType())) {
externalConfigurationPb.setGoogleSheetsOptions(
((GoogleSheetsOptions) getFormatOptions()).toPb());
}
if (getFormatOptions() != null && FormatOptions.BIGTABLE.equals(getFormatOptions().getType())) {
externalConfigurationPb.setBigtableOptions(((BigtableOptions) getFormatOptions()).toPb());
}
if (getAutodetect() != null) {
externalConfigurationPb.setAutodetect(getAutodetect());
}
if (getReferenceFileSchemaUri() != null) {
externalConfigurationPb.setReferenceFileSchemaUri(getReferenceFileSchemaUri());
}
if (getHivePartitioningOptions() != null) {
externalConfigurationPb.setHivePartitioningOptions(getHivePartitioningOptions().toPb());
}
if (getFileSetSpecType() != null) {
externalConfigurationPb.setFileSetSpecType(getFileSetSpecType());
}
if (getObjectMetadata() != null) {
externalConfigurationPb.setObjectMetadata(getObjectMetadata());
}
if (getMetadataCacheMode() != null) {
externalConfigurationPb.setMetadataCacheMode(getMetadataCacheMode());
}
return externalConfigurationPb;
}
static Builder newBuilder() {
return new AutoValue_ExternalTableDefinition.Builder().setType(Type.EXTERNAL);
}
/**
* Creates a builder for an ExternalTableDefinition object.
*
* @param sourceUris the fully-qualified URIs that point to your data in Google Cloud Storage.
* Each URI can contain one '*' wildcard character that must come after the bucket's name.
* Size limits related to load jobs apply to external data sources, plus an additional limit
* of 10 GB maximum size across all URIs.
* @param schema the schema for the external data
* @param format the source format of the external data
* @return a builder for an ExternalTableDefinition object given source URIs, schema and format
* @see Quota
* @see
* Source Format
*/
public static Builder newBuilder(List sourceUris, Schema schema, FormatOptions format) {
return newBuilder().setSourceUris(sourceUris).setSchema(schema).setFormatOptions(format);
}
/**
* Creates a builder for an ExternalTableDefinition object.
*
* @param sourceUri a fully-qualified URI that points to your data in Google Cloud Storage. The
* URI can contain one '*' wildcard character that must come after the bucket's name. Size
* limits related to load jobs apply to external data sources.
* @param schema the schema for the external data
* @param format the source format of the external data
* @return a builder for an ExternalTableDefinition object given source URI, schema and format
* @see Quota
* @see
* Source Format
*/
public static Builder newBuilder(String sourceUri, Schema schema, FormatOptions format) {
checkArgument(!isNullOrEmpty(sourceUri), "Provided sourceUri is null or empty");
return newBuilder(ImmutableList.of(sourceUri), schema, format);
}
/**
* Creates a builder for an ExternalTableDefinition object.
*
* @param sourceUri the fully-qualified URIs that point to your data in Google Cloud. For Google
* Cloud Bigtable URIs: Exactly one URI can be specified and it has be a fully specified and
* valid HTTPS URL for a Google Cloud Bigtable table. Size limits related to load jobs apply
* to external data sources, plus an additional limit of 10 GB maximum size across all URIs.
* @param format the source format of the external data
* @return a builder for an ExternalTableDefinition object given source URIs and format
* @see Quota
* @see
* Source Format
*/
public static Builder newBuilder(String sourceUri, FormatOptions format) {
checkArgument(!isNullOrEmpty(sourceUri), "Provided sourceUri is null or empty");
return newBuilder().setSourceUris(ImmutableList.of(sourceUri)).setFormatOptions(format);
}
/**
* Creates a builder for an ExternalTableDefinition object.
*
* @param sourceUri the fully-qualified URIs that point to your data in Google Cloud. For Google
* Cloud Bigtable URIs: Exactly one URI can be specified and it has be a fully specified and
* valid HTTPS URL for a Google Cloud Bigtable table. Size limits related to load jobs apply
* to external data sources, plus an additional limit of 10 GB maximum size across all URIs.
* @return a builder for an ExternalTableDefinition object given source URIs and format
* @see Quota
* @see
* Source Format
*/
public static Builder newBuilder(String sourceUri) {
checkArgument(!isNullOrEmpty(sourceUri), "Provided sourceUri is null or empty");
return newBuilder().setSourceUris(ImmutableList.of(sourceUri));
}
/**
* Creates an ExternalTableDefinition object.
*
* @param sourceUris the fully-qualified URIs that point to your data in Google Cloud Storage.
* Each URI can contain one '*' wildcard character that must come after the bucket's name.
* Size limits related to load jobs apply to external data sources, plus an additional limit
* of 10 GB maximum size across all URIs.
* @param schema the schema for the external data
* @param format the source format of the external data
* @return an ExternalTableDefinition object given source URIs, schema and format
* @see Quota
* @see
* Source Format
*/
public static ExternalTableDefinition of(
List sourceUris, Schema schema, FormatOptions format) {
return newBuilder(sourceUris, schema, format).build();
}
/**
* Creates an ExternalTableDefinition object.
*
* @param sourceUri a fully-qualified URI that points to your data in Google Cloud Storage. The
* URI can contain one '*' wildcard character that must come after the bucket's name. Size
* limits related to load jobs apply to external data sources.
* @param schema the schema for the external data
* @param format the source format of the external data
* @return an ExternalTableDefinition object given source URIs, schema and format
* @see Quota
* @see
* Source Format
*/
public static ExternalTableDefinition of(String sourceUri, Schema schema, FormatOptions format) {
return newBuilder(sourceUri, schema, format).build();
}
/**
* Creates a builder for an ExternalTableDefinition object.
*
* @param sourceUri the fully-qualified URIs that point to your data in Google Cloud. For Google
* Cloud Bigtable URIs: Exactly one URI can be specified and it has be a fully specified and
* valid HTTPS URL for a Google Cloud Bigtable table. Size limits related to load jobs apply
* to external data sources, plus an additional limit of 10 GB maximum size across all URIs.
* @param format the source format of the external data
* @return a builder for an ExternalTableDefinition object given source URIs and format
* @see Quota
* @see
* Source Format
*/
public static ExternalTableDefinition of(String sourceUri, FormatOptions format) {
return newBuilder(sourceUri, format).build();
}
@SuppressWarnings("unchecked")
static ExternalTableDefinition fromPb(Table tablePb) {
Builder builder = newBuilder().table(tablePb);
com.google.api.services.bigquery.model.ExternalDataConfiguration externalDataConfiguration =
tablePb.getExternalDataConfiguration();
if (externalDataConfiguration != null) {
if (externalDataConfiguration.getSourceUris() != null) {
builder.setSourceUris(ImmutableList.copyOf(externalDataConfiguration.getSourceUris()));
}
if (externalDataConfiguration.getDecimalTargetTypes() != null) {
builder.setDecimalTargetTypes(
ImmutableList.copyOf(externalDataConfiguration.getDecimalTargetTypes()));
}
if (externalDataConfiguration.getSourceFormat() != null) {
builder.setFormatOptions(FormatOptions.of(externalDataConfiguration.getSourceFormat()));
}
builder.setCompression(externalDataConfiguration.getCompression());
if (externalDataConfiguration.getConnectionId() != null) {
builder.setConnectionId(externalDataConfiguration.getConnectionId());
}
builder.setIgnoreUnknownValues(externalDataConfiguration.getIgnoreUnknownValues());
if (externalDataConfiguration.getAvroOptions() != null) {
builder.setFormatOptions(AvroOptions.fromPb(externalDataConfiguration.getAvroOptions()));
}
if (externalDataConfiguration.getCsvOptions() != null) {
builder.setFormatOptions(CsvOptions.fromPb(externalDataConfiguration.getCsvOptions()));
}
if (externalDataConfiguration.getGoogleSheetsOptions() != null) {
builder.setFormatOptions(
GoogleSheetsOptions.fromPb(externalDataConfiguration.getGoogleSheetsOptions()));
}
if (externalDataConfiguration.getBigtableOptions() != null) {
builder.setFormatOptions(
BigtableOptions.fromPb(externalDataConfiguration.getBigtableOptions()));
}
if (externalDataConfiguration.getParquetOptions() != null) {
builder.setFormatOptions(
ParquetOptions.fromPb(externalDataConfiguration.getParquetOptions()));
}
builder.setMaxBadRecords(externalDataConfiguration.getMaxBadRecords());
builder.setAutodetect(externalDataConfiguration.getAutodetect());
if (externalDataConfiguration.getHivePartitioningOptions() != null) {
builder.setHivePartitioningOptions(
HivePartitioningOptions.fromPb(externalDataConfiguration.getHivePartitioningOptions()));
}
if (externalDataConfiguration.getReferenceFileSchemaUri() != null) {
builder.setReferenceFileSchemaUri(externalDataConfiguration.getReferenceFileSchemaUri());
}
if (externalDataConfiguration.getFileSetSpecType() != null) {
builder.setFileSetSpecType(externalDataConfiguration.getFileSetSpecType());
}
if (externalDataConfiguration.getObjectMetadata() != null) {
builder.setObjectMetadata(externalDataConfiguration.getObjectMetadata());
}
if (externalDataConfiguration.getMetadataCacheMode() != null) {
builder.setMetadataCacheMode(externalDataConfiguration.getMetadataCacheMode());
}
}
return builder.build();
}
static ExternalTableDefinition fromExternalDataConfiguration(
ExternalDataConfiguration externalDataConfiguration) {
Builder builder = newBuilder();
if (externalDataConfiguration.getSourceUris() != null) {
builder.setSourceUris(externalDataConfiguration.getSourceUris());
}
if (externalDataConfiguration.getDecimalTargetTypes() != null) {
builder.setDecimalTargetTypes(externalDataConfiguration.getDecimalTargetTypes());
}
if (externalDataConfiguration.getSchema() != null) {
builder.setSchema(Schema.fromPb(externalDataConfiguration.getSchema()));
}
if (externalDataConfiguration.getSourceFormat() != null) {
builder.setFormatOptions(FormatOptions.of(externalDataConfiguration.getSourceFormat()));
}
if (externalDataConfiguration.getCompression() != null) {
builder.setCompression(externalDataConfiguration.getCompression());
}
if (externalDataConfiguration.getConnectionId() != null) {
builder.setConnectionId(externalDataConfiguration.getConnectionId());
}
if (externalDataConfiguration.getIgnoreUnknownValues() != null) {
builder.setIgnoreUnknownValues(externalDataConfiguration.getIgnoreUnknownValues());
}
if (externalDataConfiguration.getAvroOptions() != null) {
builder.setFormatOptions(AvroOptions.fromPb(externalDataConfiguration.getAvroOptions()));
}
if (externalDataConfiguration.getCsvOptions() != null) {
builder.setFormatOptions(CsvOptions.fromPb(externalDataConfiguration.getCsvOptions()));
}
if (externalDataConfiguration.getGoogleSheetsOptions() != null) {
builder.setFormatOptions(
GoogleSheetsOptions.fromPb(externalDataConfiguration.getGoogleSheetsOptions()));
}
if (externalDataConfiguration.getBigtableOptions() != null) {
builder.setFormatOptions(
BigtableOptions.fromPb(externalDataConfiguration.getBigtableOptions()));
}
if (externalDataConfiguration.getParquetOptions() != null) {
builder.setFormatOptions(
ParquetOptions.fromPb(externalDataConfiguration.getParquetOptions()));
}
if (externalDataConfiguration.getMaxBadRecords() != null) {
builder.setMaxBadRecords(externalDataConfiguration.getMaxBadRecords());
}
if (externalDataConfiguration.getAutodetect() != null) {
builder.setAutodetect(externalDataConfiguration.getAutodetect());
}
if (externalDataConfiguration.getReferenceFileSchemaUri() != null) {
builder.setReferenceFileSchemaUri(externalDataConfiguration.getReferenceFileSchemaUri());
}
if (externalDataConfiguration.getHivePartitioningOptions() != null) {
builder.setHivePartitioningOptions(
HivePartitioningOptions.fromPb(externalDataConfiguration.getHivePartitioningOptions()));
}
if (externalDataConfiguration.getFileSetSpecType() != null) {
builder.setFileSetSpecType(externalDataConfiguration.getFileSetSpecType());
}
if (externalDataConfiguration.getObjectMetadata() != null) {
builder.setObjectMetadata(externalDataConfiguration.getObjectMetadata());
}
if (externalDataConfiguration.getMetadataCacheMode() != null) {
builder.setMetadataCacheMode(externalDataConfiguration.getMetadataCacheMode());
}
return builder.build();
}
}