All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.bigquery.QueryJobConfiguration Maven / Gradle / Ivy

There is a newer version: 0.2.8
Show newest version
/*
 * Copyright 2016 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.cloud.bigquery;

import static com.google.common.base.Preconditions.checkNotNull;

import com.google.api.services.bigquery.model.JobConfigurationQuery;
import com.google.cloud.bigquery.JobInfo.CreateDisposition;
import com.google.cloud.bigquery.JobInfo.WriteDisposition;
import com.google.common.base.MoreObjects.ToStringHelper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import java.util.List;
import java.util.Map;
import java.util.Objects;

/**
 * Google BigQuery Query Job configuration. A Query Job runs a query against BigQuery data. Query
 * job configurations have {@link JobConfiguration.Type#QUERY} type.
 */
public final class QueryJobConfiguration extends JobConfiguration {

  private static final long serialVersionUID = -1108948249081804890L;

  private final String query;
  private final TableId destinationTable;
  private final Map tableDefinitions;
  private final List userDefinedFunctions;
  private final CreateDisposition createDisposition;
  private final WriteDisposition writeDisposition;
  private final DatasetId defaultDataset;
  private final Priority priority;
  private final Boolean allowLargeResults;
  private final Boolean useQueryCache;
  private final Boolean flattenResults;
  private final Boolean dryRun;

  /**
   * Priority levels for a query. If not specified the priority is assumed to be
   * {@link Priority#INTERACTIVE}.
   */
  public enum Priority {
    /**
     * Query is executed as soon as possible and count towards the
     * concurrent rate limit and the daily
     * rate limit.
     */
    INTERACTIVE,

    /**
     * Query is queued and started as soon as idle resources are available, usually within a few
     * minutes. If the query hasn't started within 3 hours, its priority is changed to
     * {@link Priority#INTERACTIVE}.
     */
    BATCH
  }

  public static final class Builder
      extends JobConfiguration.Builder {

    private String query;
    private TableId destinationTable;
    private Map tableDefinitions;
    private List userDefinedFunctions;
    private CreateDisposition createDisposition;
    private WriteDisposition writeDisposition;
    private DatasetId defaultDataset;
    private Priority priority;
    private Boolean allowLargeResults;
    private Boolean useQueryCache;
    private Boolean flattenResults;
    private Boolean dryRun;

    private Builder() {
      super(Type.QUERY);
    }

    private Builder(QueryJobConfiguration jobConfiguration) {
      this();
      this.query = jobConfiguration.query;
      this.destinationTable = jobConfiguration.destinationTable;
      this.tableDefinitions = jobConfiguration.tableDefinitions;
      this.userDefinedFunctions = jobConfiguration.userDefinedFunctions;
      this.createDisposition = jobConfiguration.createDisposition;
      this.writeDisposition = jobConfiguration.writeDisposition;
      this.defaultDataset = jobConfiguration.defaultDataset;
      this.priority = jobConfiguration.priority;
      this.allowLargeResults = jobConfiguration.allowLargeResults;
      this.useQueryCache = jobConfiguration.useQueryCache;
      this.flattenResults = jobConfiguration.flattenResults;
      this.dryRun = jobConfiguration.dryRun;
    }

    private Builder(com.google.api.services.bigquery.model.JobConfiguration configurationPb) {
      this();
      JobConfigurationQuery queryConfigurationPb = configurationPb.getQuery();
      this.query = queryConfigurationPb.getQuery();
      allowLargeResults = queryConfigurationPb.getAllowLargeResults();
      useQueryCache = queryConfigurationPb.getUseQueryCache();
      flattenResults = queryConfigurationPb.getFlattenResults();
      dryRun = configurationPb.getDryRun();
      if (queryConfigurationPb.getDestinationTable() != null) {
        destinationTable = TableId.fromPb(queryConfigurationPb.getDestinationTable());
      }
      if (queryConfigurationPb.getDefaultDataset() != null) {
        defaultDataset = DatasetId.fromPb(queryConfigurationPb.getDefaultDataset());
      }
      if (queryConfigurationPb.getPriority() != null) {
        priority = Priority.valueOf(queryConfigurationPb.getPriority());
      }
      if (queryConfigurationPb.getTableDefinitions() != null) {
        tableDefinitions = Maps.transformValues(queryConfigurationPb.getTableDefinitions(),
            ExternalTableDefinition.FROM_EXTERNAL_DATA_FUNCTION);
      }
      if (queryConfigurationPb.getUserDefinedFunctionResources() != null) {
        userDefinedFunctions = Lists.transform(
            queryConfigurationPb.getUserDefinedFunctionResources(),
            UserDefinedFunction.FROM_PB_FUNCTION);
      }
      if (queryConfigurationPb.getCreateDisposition() != null) {
        createDisposition =
            CreateDisposition.valueOf(queryConfigurationPb.getCreateDisposition());
      }
      if (queryConfigurationPb.getWriteDisposition() != null) {
        writeDisposition =
            WriteDisposition.valueOf(queryConfigurationPb.getWriteDisposition());
      }
    }

    /**
     * Sets the BigQuery SQL query to execute.
     */
    public Builder query(String query) {
      this.query = query;
      return this;
    }

    /**
     * Sets the table where to put query results. If not provided a new table is created. This value
     * is required if {@link Builder#allowLargeResults(Boolean)} is set to {@code true}.
     */
    public Builder destinationTable(TableId destinationTable) {
      this.destinationTable = destinationTable;
      return this;
    }

    /**
     * Sets the external tables definitions. If querying external data sources outside of BigQuery,
     * this value describes the data format, location and other properties of the data
     * sources. By defining these properties, the data sources can be queried as if they were
     * standard BigQuery tables.
     */
    public Builder tableDefinitions(Map tableDefinitions) {
      this.tableDefinitions = tableDefinitions != null ? Maps.newHashMap(tableDefinitions) : null;
      return this;
    }

    /**
     * Adds a new external table definition. If a definition already exists for {@code tableName}
     * it is updated.
     *
     * @param tableName name of the table
     * @param tableDefinition external data configuration for the table used by this query
     */
    public Builder addTableDefinition(String tableName, ExternalTableDefinition tableDefinition) {
      if (this.tableDefinitions == null) {
        this.tableDefinitions = Maps.newHashMap();
      }
      this.tableDefinitions.put(checkNotNull(tableName), checkNotNull(tableDefinition));
      return this;
    }

    /**
     * Sets user defined function resources that can be used by this query. Function resources
     * can either be defined inline ({@link UserDefinedFunction#inline(String)}) or loaded from
     * a Google Cloud Storage URI ({@link UserDefinedFunction#fromUri(String)}.
     */
    public Builder userDefinedFunctions(List userDefinedFunctions) {
      this.userDefinedFunctions =
          userDefinedFunctions != null ? ImmutableList.copyOf(userDefinedFunctions) : null;
      return this;
    }

    /**
     * Sets whether the job is allowed to create tables.
     *
     * @see 
     *     Create Disposition
     */
    public Builder createDisposition(CreateDisposition createDisposition) {
      this.createDisposition = createDisposition;
      return this;
    }

    /**
     * Sets the action that should occur if the destination table already exists.
     *
     * @see 
     *     Write Disposition
     */
    public Builder writeDisposition(WriteDisposition writeDisposition) {
      this.writeDisposition = writeDisposition;
      return this;
    }

    /**
     * Sets the default dataset. This dataset is used for all unqualified table names used in the
     * query.
     */
    public Builder defaultDataset(DatasetId defaultDataset) {
      this.defaultDataset = defaultDataset;
      return this;
    }

    /**
     * Sets the default dataset. This dataset is used for all unqualified table names used in the
     * query.
     */
    public Builder defaultDataset(String defaultDataset) {
      return defaultDataset(DatasetId.of(defaultDataset));
    }

    /**
     * Sets a priority for the query. If not specified the priority is assumed to be
     * {@link Priority#INTERACTIVE}.
     */
    public Builder priority(Priority priority) {
      this.priority = priority;
      return this;
    }

    /**
     * Sets whether the job is enabled to create arbitrarily large results. If {@code true}
     * the query is allowed to create large results at a slight cost in performance. If {@code true}
     * {@link Builder#destinationTable(TableId)} must be provided.
     *
     * @see 
     *     Returning Large Query Results
     */
    public Builder allowLargeResults(Boolean allowLargeResults) {
      this.allowLargeResults = allowLargeResults;
      return this;
    }

    /**
     * Sets whether to look for the result in the query cache. The query cache is a best-effort
     * cache that will be flushed whenever tables in the query are modified. Moreover, the query
     * cache is only available when {@link Builder#destinationTable(TableId)} is not set.
     *
     * @see Query Caching
     */
    public Builder useQueryCache(Boolean useQueryCache) {
      this.useQueryCache = useQueryCache;
      return this;
    }

    /**
     * Sets whether nested and repeated fields should be flattened. If set to {@code false}
     * {@link Builder#allowLargeResults(Boolean)} must be {@code true}. By default results are
     * flattened.
     *
     * @see Flatten
     */
    public Builder flattenResults(Boolean flattenResults) {
      this.flattenResults = flattenResults;
      return this;
    }

    /**
     * Sets whether the job has to be dry run or not. If set, the job is not executed. A valid query
     * will return a mostly empty response with some processing statistics, while an invalid query
     * will return the same error it would if it wasn't a dry run.
     */
    public Builder dryRun(Boolean dryRun) {
      this.dryRun = dryRun;
      return this;
    }

    public QueryJobConfiguration build() {
      return new QueryJobConfiguration(this);
    }
  }

  private QueryJobConfiguration(Builder builder) {
    super(builder);
    this.query = checkNotNull(builder.query);
    this.allowLargeResults = builder.allowLargeResults;
    this.createDisposition = builder.createDisposition;
    this.defaultDataset = builder.defaultDataset;
    this.destinationTable = builder.destinationTable;
    this.flattenResults = builder.flattenResults;
    this.priority = builder.priority;
    this.useQueryCache = builder.useQueryCache;
    this.userDefinedFunctions = builder.userDefinedFunctions;
    this.writeDisposition = builder.writeDisposition;
    this.tableDefinitions =
        builder.tableDefinitions != null ? ImmutableMap.copyOf(builder.tableDefinitions) : null;
    this.dryRun = builder.dryRun;
  }

  /**
   * Returns whether the job is enabled to create arbitrarily large results. If {@code true}
   * the query is allowed to create large results at a slight cost in performance.
   * the query is allowed to create large results at a slight cost in performance.
   *
   * @see 
   *     Returning Large Query Results
   */
  public Boolean allowLargeResults() {
    return allowLargeResults;
  }

  /**
   * Returns whether the job is allowed to create new tables.
   *
   * @see 
   *     Create Disposition
   */
  public CreateDisposition createDisposition() {
    return createDisposition;
  }

  /**
   * Returns the default dataset. This dataset is used for all unqualified table names used in the
   * query.
   */
  public DatasetId defaultDataset() {
    return defaultDataset;
  }

  /**
   * Returns the table where to put query results. If not provided a new table is created. This
   * value is required if {@link #allowLargeResults()} is {@code true}.
   */
  public TableId destinationTable() {
    return destinationTable;
  }

  /**
   * Returns whether nested and repeated fields should be flattened. If set to {@code false}
   * {@link Builder#allowLargeResults(Boolean)} must be {@code true}.
   *
   * @see Flatten
   */
  public Boolean flattenResults() {
    return flattenResults;
  }

  /**
   * Returns the query priority.
   */
  public Priority priority() {
    return priority;
  }

  /**
   * Returns the Google BigQuery SQL query.
   */
  public String query() {
    return query;
  }

  /**
   * Returns the external tables definitions. If querying external data sources outside of BigQuery,
   * this value describes the data format, location and other properties of the data
   * sources. By defining these properties, the data sources can be queried as if they were
   * standard BigQuery tables.
   */
  public Map tableDefinitions() {
    return tableDefinitions;
  }

  /**
   * Returns whether to look for the result in the query cache. The query cache is a best-effort
   * cache that will be flushed whenever tables in the query are modified. Moreover, the query
   * cache is only available when {@link Builder#destinationTable(TableId)} is not set.
   *
   * @see Query Caching
   */
  public Boolean useQueryCache() {
    return useQueryCache;
  }

  /**
   * Returns user defined function resources that can be used by this query. Function resources
   * can either be defined inline ({@link UserDefinedFunction.Type#INLINE}) or loaded from
   * a Google Cloud Storage URI ({@link UserDefinedFunction.Type#FROM_URI}.
   */
  public List userDefinedFunctions() {
    return userDefinedFunctions;
  }

  /**
   * Returns the action that should occur if the destination table already exists.
   *
   * @see 
   *     Write Disposition
   */
  public WriteDisposition writeDisposition() {
    return writeDisposition;
  }

  /**
   * Returns whether the job has to be dry run or not. If set, the job is not executed. A valid
   * query will return a mostly empty response with some processing statistics, while an invalid
   * query will return the same error it would if it wasn't a dry run.
   */
  public Boolean dryRun() {
    return dryRun;
  }

  @Override
  public Builder toBuilder() {
    return new Builder(this);
  }

  @Override
  ToStringHelper toStringHelper() {
    return super.toStringHelper()
        .add("query", query)
        .add("destinationTable", destinationTable)
        .add("defaultDataset", defaultDataset)
        .add("allowLargeResults", allowLargeResults)
        .add("flattenResults", flattenResults)
        .add("priority", priority)
        .add("tableDefinitions", tableDefinitions)
        .add("userQueryCache", useQueryCache)
        .add("userDefinedFunctions", userDefinedFunctions)
        .add("createDisposition", createDisposition)
        .add("writeDisposition", writeDisposition)
        .add("dryRun", dryRun);
  }

  @Override
  public boolean equals(Object obj) {
    return obj == this
        || obj instanceof QueryJobConfiguration
        && baseEquals((QueryJobConfiguration) obj);
  }

  @Override
  public int hashCode() {
    return Objects.hash(baseHashCode(), allowLargeResults, createDisposition, destinationTable,
        defaultDataset, flattenResults, priority, query, tableDefinitions, useQueryCache,
        userDefinedFunctions, writeDisposition, dryRun);
  }

  @Override
  QueryJobConfiguration setProjectId(String projectId) {
    Builder builder = toBuilder();
    if (destinationTable() != null) {
      builder.destinationTable(destinationTable().setProjectId(projectId));
    }
    if (defaultDataset() != null) {
      builder.defaultDataset(defaultDataset().setProjectId(projectId));
    }
    return builder.build();
  }

  @Override
  com.google.api.services.bigquery.model.JobConfiguration toPb() {
    com.google.api.services.bigquery.model.JobConfiguration configurationPb =
        new com.google.api.services.bigquery.model.JobConfiguration();
    JobConfigurationQuery queryConfigurationPb = new JobConfigurationQuery();
    queryConfigurationPb.setQuery(query);
    configurationPb.setDryRun(dryRun());
    if (allowLargeResults != null) {
      queryConfigurationPb.setAllowLargeResults(allowLargeResults);
    }
    if (createDisposition != null) {
      queryConfigurationPb.setCreateDisposition(createDisposition.toString());
    }
    if (destinationTable != null) {
      queryConfigurationPb.setDestinationTable(destinationTable.toPb());
    }
    if (defaultDataset != null) {
      queryConfigurationPb.setDefaultDataset(defaultDataset.toPb());
    }
    if (flattenResults != null) {
      queryConfigurationPb.setFlattenResults(flattenResults);
    }
    if (priority != null) {
      queryConfigurationPb.setPriority(priority.toString());
    }
    if (tableDefinitions != null) {
      queryConfigurationPb.setTableDefinitions(Maps.transformValues(tableDefinitions,
          ExternalTableDefinition.TO_EXTERNAL_DATA_FUNCTION));
    }
    if (useQueryCache != null) {
      queryConfigurationPb.setUseQueryCache(useQueryCache);
    }
    if (userDefinedFunctions != null) {
      queryConfigurationPb.setUserDefinedFunctionResources(
          Lists.transform(userDefinedFunctions, UserDefinedFunction.TO_PB_FUNCTION));
    }
    if (writeDisposition != null) {
      queryConfigurationPb.setWriteDisposition(writeDisposition.toString());
    }
    return configurationPb.setQuery(queryConfigurationPb);
  }

  /**
   * Creates a builder for a BigQuery Query Job given the query to be run.
   */
  public static Builder builder(String query) {
    return new Builder().query(query);
  }

  /**
   * Returns a BigQuery Copy Job for the given the query to be run. Job's id is chosen by the
   * service.
   */
  public static QueryJobConfiguration of(String query) {
    return builder(query).build();
  }

  @SuppressWarnings("unchecked")
  static QueryJobConfiguration fromPb(
      com.google.api.services.bigquery.model.JobConfiguration jobPb) {
    return new Builder(jobPb).build();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy