com.google.cloud.bigquery.LoadConfiguration Maven / Gradle / Ivy

Go to download

/* * Copyright 2015 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.cloud.bigquery; import com.google.cloud.bigquery.JobInfo.CreateDisposition; import com.google.cloud.bigquery.JobInfo.SchemaUpdateOption; import com.google.cloud.bigquery.JobInfo.WriteDisposition; import java.util.List; /** * Common interface for a load configuration. A load configuration ({@link * WriteChannelConfiguration}) can be used to load data into a table with a {@link * com.google.cloud.WriteChannel} ({@link BigQuery#writer(WriteChannelConfiguration)}). A load * configuration ({@link LoadJobConfiguration}) can also be used to create a load job ({@link * JobInfo#of(JobConfiguration)}). */ public interface LoadConfiguration { interface Builder { /** Sets the destination table to load the data into. */ Builder setDestinationTable(TableId destinationTable); Builder setDestinationEncryptionConfiguration(EncryptionConfiguration encryptionConfiguration); /** * Sets whether the job is allowed to create new tables. * * @see * Create Disposition */ Builder setCreateDisposition(CreateDisposition createDisposition); /** * Sets the action that should occur if the destination table already exists. * * @see * Write Disposition */ Builder setWriteDisposition(WriteDisposition writeDisposition); /** * Sets the source format, and possibly some parsing options, of the external data. Supported * formats are {@code CSV}, {@code NEWLINE_DELIMITED_JSON} and {@code DATASTORE_BACKUP}. If not * specified, {@code CSV} format is assumed. * *

* Source Format */ Builder setFormatOptions(FormatOptions formatOptions); /** * Sets the string that represents a null value in a CSV file. For example, if you specify "\N", * BigQuery interprets "\N" as a null value when loading a CSV file. The default value is the * empty string. If you set this property to a custom value, BigQuery throws an error if an * empty string is present for all data types except for {@code STRING} and {@code BYTE}. For * {@code STRING} and {@code BYTE} columns, BigQuery interprets the empty string as an empty * value. */ Builder setNullMarker(String nullMarker); /** * Sets the maximum number of bad records that BigQuery can ignore when running the job. If the * number of bad records exceeds this value, an invalid error is returned in the job result. By * default no bad record is ignored. */ Builder setMaxBadRecords(Integer maxBadRecords); /** * Sets the schema for the destination table. The schema can be omitted if the destination table * already exists, or if you're loading data from a Google Cloud Datastore backup (i.e. {@code * DATASTORE_BACKUP} format option). */ Builder setSchema(Schema schema); /** * Sets whether BigQuery should allow extra values that are not represented in the table schema. * If {@code true}, the extra values are ignored. If {@code false}, records with extra columns * are treated as bad records, and if there are too many bad records, an invalid error is * returned in the job result. By default unknown values are not allowed. */ Builder setIgnoreUnknownValues(Boolean ignoreUnknownValues); /** * [Experimental] Sets options allowing the schema of the destination table to be updated as a * side effect of the load job. Schema update options are supported in two cases: when * writeDisposition is WRITE_APPEND; when writeDisposition is WRITE_TRUNCATE and the destination * table is a partition of a table, specified by partition decorators. For normal tables, * WRITE_TRUNCATE will always overwrite the schema. */ Builder setSchemaUpdateOptions(List schemaUpdateOptions); /** * [Experimental] Sets automatic inference of the options and schema for CSV and JSON sources. */ Builder setAutodetect(Boolean autodetect); /** Sets the time partitioning specification for the destination table. */ Builder setTimePartitioning(TimePartitioning timePartitioning); /** Sets the clustering specification for the destination table. */ Builder setClustering(Clustering clustering); /** * If FormatOptions is set to AVRO, you can interpret logical types into their corresponding * types (such as TIMESTAMP) instead of only using their raw types (such as INTEGER). The value * may be {@code null}. */ Builder setUseAvroLogicalTypes(Boolean useAvroLogicalTypes); /** * Defines the list of possible SQL data types to which the source decimal values are converted. * This list and the precision and the scale parameters of the decimal field determine the * target type. In the order of NUMERIC, BIGNUMERIC, and STRING, a type is picked if it is in * the specified list and if it supports the precision and the scale. STRING supports all * precision and scale values. * * @param decimalTargetTypes decimalTargetType or {@code null} for none */ Builder setDecimalTargetTypes(List decimalTargetTypes); LoadConfiguration build(); } /** Returns the destination table to load the data into. */ TableId getDestinationTable(); EncryptionConfiguration getDestinationEncryptionConfiguration(); /** * Returns whether the job is allowed to create new tables. * * @see * Create Disposition */ CreateDisposition getCreateDisposition(); /** * Returns the action that should occur if the destination table already exists. * * @see * Write Disposition */ WriteDisposition getWriteDisposition(); /** * Returns the string that represents a null value in a CSV file. * * @see * Null Marker */ String getNullMarker(); /** * Returns additional properties used to parse CSV data (used when {@link #getFormat()} is set to * CSV). Returns {@code null} if not set. */ CsvOptions getCsvOptions(); /** * Returns the maximum number of bad records that BigQuery can ignore when running the job. If the * number of bad records exceeds this value, an invalid error is returned in the job result. By * default no bad record is ignored. */ Integer getMaxBadRecords(); /** Returns the schema for the destination table, if set. Returns {@code null} otherwise. */ Schema getSchema(); /** Returns the format of the data files. */ String getFormat(); /** * Returns whether BigQuery should allow extra values that are not represented in the table * schema. If {@code true}, the extra values are ignored. If {@code true}, records with extra * columns are treated as bad records, and if there are too many bad records, an invalid error is * returned in the job result. By default unknown values are not allowed. */ Boolean ignoreUnknownValues(); /** Returns additional options used to load from a Cloud datastore backup. */ DatastoreBackupOptions getDatastoreBackupOptions(); /** * [Experimental] Returns options allowing the schema of the destination table to be updated as a * side effect of the load job. Schema update options are supported in two cases: when * writeDisposition is WRITE_APPEND; when writeDisposition is WRITE_TRUNCATE and the destination * table is a partition of a table, specified by partition decorators. For normal tables, * WRITE_TRUNCATE will always overwrite the schema. */ List getSchemaUpdateOptions(); /** * [Experimental] Returns whether automatic inference of the options and schema for CSV and JSON * sources is set. */ Boolean getAutodetect(); /** Returns the time partitioning specification defined for the destination table. */ TimePartitioning getTimePartitioning(); /** Returns the clustering specification for the definition table. */ Clustering getClustering(); /** Returns True/False. Indicates whether the logical type is interpreted. */ Boolean getUseAvroLogicalTypes(); /** * Returns the list of possible SQL data types to which the source decimal values are converted. * This list and the precision and the scale parameters of the decimal field determine the target * type. In the order of NUMERIC, BIGNUMERIC, and STRING, a type is picked if it is in the * specified list and if it supports the precision and the scale. STRING supports all precision * and scale values. */ List getDecimalTargetTypes(); /** Returns a builder for the load configuration object. */ Builder toBuilder(); }