org.apache.drill.exec.physical.base.GroupScan Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.physical.base;

import com.fasterxml.jackson.annotation.JsonIgnore;
import java.util.Collection;
import java.util.List;

import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.drill.common.exceptions.ExecutionSetupException;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.expr.fn.FunctionImplementationRegistry;
import org.apache.drill.exec.metastore.analyze.AnalyzeInfoProvider;
import org.apache.drill.exec.ops.UdfUtilities;
import org.apache.drill.exec.physical.PhysicalOperatorSetupException;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint;
import org.apache.drill.exec.server.options.OptionManager;
import org.apache.drill.metastore.metadata.TableMetadata;
import org.apache.drill.metastore.metadata.TableMetadataProvider;
import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
import org.apache.hadoop.fs.Path;

/**
 * A GroupScan operator represents all data which will be scanned by a given physical
 * plan. It is the superset of all SubScans for the plan.
 */
public interface GroupScan extends Scan, HasAffinity {

  /**
   * columns list in GroupScan : 1) empty_column is for skipAll query.
   *                             2) NULL is interpreted as ALL_COLUMNS.
   *  How to handle skipAll query is up to each storage plugin, with different policy in corresponding RecordReader.
   */
  List ALL_COLUMNS = ImmutableList.of(SchemaPath.STAR_COLUMN);

  void applyAssignments(List endpoints) throws PhysicalOperatorSetupException;

  SubScan getSpecificScan(int minorFragmentId) throws ExecutionSetupException;

  @JsonIgnore
  int getMaxParallelizationWidth();

  @JsonIgnore
  boolean isDistributed();

  /**
   * At minimum, the GroupScan requires these many fragments to run.
   * Currently, this is used in {@link org.apache.drill.exec.planner.fragment.SimpleParallelizer}
   * @return the minimum number of fragments that should run
   */
  @JsonIgnore
  int getMinParallelizationWidth();

  /**
   * Check if GroupScan enforces width to be maximum parallelization width.
   * Currently, this is used in {@link org.apache.drill.exec.planner.physical.visitor.ExcessiveExchangeIdentifier}
   * @return if maximum width should be enforced
   *
   * @deprecated Use {@link #getMinParallelizationWidth()} to determine whether this GroupScan spans more than one
   * fragment.
   */
  @JsonIgnore
  @Deprecated
  boolean enforceWidth();

  /**
   * Returns a signature of the {@link GroupScan} which should usually be composed of
   * all its attributes which could describe it uniquely.
   */
  @JsonIgnore
  String getDigest();

  @JsonIgnore
  ScanStats getScanStats(PlannerSettings settings);

  @JsonIgnore
  ScanStats getScanStats(RelMetadataQuery mq);

  /**
   * Returns a clone of GroupScan instance, except that the new GroupScan will use the provided list of columns .
   */
  GroupScan clone(List columns);

  /**
   * GroupScan should check the list of columns, and see if it could support all the columns in the list.
   */
  boolean canPushdownProjects(List columns);

  /**
   * Return the number of non-null value in the specified column. Raise exception, if groupscan does not
   * have exact column row count.
   */
  long getColumnValueCount(SchemaPath column);

  /**
   * Whether or not this GroupScan supports pushdown of partition filters (directories for filesystems)
   */
  boolean supportsPartitionFilterPushdown();

  /**
   * Returns a list of columns scanned by this group scan
   *
   */
  List getColumns();

  /**
   * Returns a list of columns that can be used for partition pruning
   *
   */
  @JsonIgnore
  List getPartitionColumns();

  /**
   * Whether or not this GroupScan supports limit pushdown
   */
  boolean supportsLimitPushdown();

  /**
   * Apply rowcount based prune for "LIMIT n" query.
   * @param maxRecords : the number of rows requested from group scan.
   * @return  a new instance of group scan if the prune is successful.
   *          null when either if row-based prune is not supported, or if prune is not successful.
   */
  GroupScan applyLimit(int maxRecords);

  /**
   * Return true if this GroupScan can return its selection as a list of file names (retrieved by getFiles()).
   */
  @JsonIgnore
  boolean hasFiles();

  /**
   * Returns path to the selection root. If this GroupScan cannot provide selection root, it returns null.
   *
   * @return path to the selection root
   */
  Path getSelectionRoot();

  /**
   * Returns a collection of file names associated with this GroupScan. This should be called after checking
   * hasFiles(). If this GroupScan cannot provide file names, it returns null.
   *
   * @return collection of files paths
   */
  Collection getFiles();

  @JsonIgnore
  LogicalExpression getFilter();

  GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtilities,
                        FunctionImplementationRegistry functionImplementationRegistry, OptionManager optionManager);

  /**
   * Returns {@link TableMetadataProvider} instance which is used for providing metadata for current {@link GroupScan}.
   *
   * @return {@link TableMetadataProvider} instance the source of metadata
   */
  @JsonIgnore
  TableMetadataProvider getMetadataProvider();

  @JsonIgnore
  TableMetadata getTableMetadata();

  /**
   * Returns {@code true} if current group scan uses metadata obtained from the Metastore.
   *
   * @return {@code true} if current group scan uses metadata obtained from the Metastore, {@code false} otherwise.
   */
  @JsonIgnore
  boolean usedMetastore();

  /**
   * Returns {@link AnalyzeInfoProvider} instance which will be used when running ANALYZE statement.
   *
   * @return {@link AnalyzeInfoProvider} instance
   */
  @JsonIgnore
  AnalyzeInfoProvider getAnalyzeInfoProvider();

  /**
   * Checks whether this group scan supports filter push down.
   *
   * @return {@code true} if this group scan supports filter push down,
   * {@code false} otherwise
   */
  @JsonIgnore
  boolean supportsFilterPushDown();
}