All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netease.arctic.shade.org.apache.iceberg.BaseTableScan Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.netease.arctic.shade.org.apache.iceberg;

import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
import com.netease.arctic.shade.org.apache.iceberg.events.Listeners;
import com.netease.arctic.shade.org.apache.iceberg.events.ScanEvent;
import com.netease.arctic.shade.org.apache.iceberg.expressions.Binder;
import com.netease.arctic.shade.org.apache.iceberg.expressions.Expression;
import com.netease.arctic.shade.org.apache.iceberg.expressions.Expressions;
import com.netease.arctic.shade.org.apache.iceberg.io.CloseableIterable;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Sets;
import com.netease.arctic.shade.org.apache.iceberg.types.TypeUtil;
import com.netease.arctic.shade.org.apache.iceberg.util.DateTimeUtil;
import com.netease.arctic.shade.org.apache.iceberg.util.PropertyUtil;
import com.netease.arctic.shade.org.apache.iceberg.util.SnapshotUtil;
import com.netease.arctic.shade.org.apache.iceberg.util.TableScanUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Base class for {@link TableScan} implementations.
 */
abstract class BaseTableScan implements TableScan {
  private static final Logger LOG = LoggerFactory.getLogger(BaseTableScan.class);

  private final TableOperations ops;
  private final Table table;
  private final Schema schema;
  private final TableScanContext context;

  protected BaseTableScan(TableOperations ops, Table table, Schema schema) {
    this(ops, table, schema, new TableScanContext());
  }

  protected BaseTableScan(TableOperations ops, Table table, Schema schema, TableScanContext context) {
    this.ops = ops;
    this.table = table;
    this.schema = schema;
    this.context = context;
  }

  protected Schema tableSchema() {
    return schema;
  }

  protected TableOperations tableOps() {
    return ops;
  }

  protected Long snapshotId() {
    return context.snapshotId();
  }

  protected boolean colStats() {
    return context.returnColumnStats();
  }

  protected boolean shouldIgnoreResiduals() {
    return context.ignoreResiduals();
  }

  protected Collection selectedColumns() {
    return context.selectedColumns();
  }

  protected Map options() {
    return context.options();
  }

  protected  TableScanContext context() {
    return context;
  }

  @SuppressWarnings("checkstyle:HiddenField")
  protected abstract TableScan newRefinedScan(
      TableOperations ops, Table table, Schema schema, TableScanContext context);

  @SuppressWarnings("checkstyle:HiddenField")
  protected abstract CloseableIterable planFiles(
      TableOperations ops, Snapshot snapshot, Expression rowFilter,
      boolean ignoreResiduals, boolean caseSensitive, boolean colStats);

  @Override
  public Table table() {
    return table;
  }

  @Override
  public TableScan appendsBetween(long fromSnapshotId, long toSnapshotId) {
    throw new UnsupportedOperationException("Incremental scan is not supported");
  }

  @Override
  public TableScan appendsAfter(long fromSnapshotId) {
    throw new UnsupportedOperationException("Incremental scan is not supported");
  }

  @Override
  public TableScan useSnapshot(long scanSnapshotId) {
    Preconditions.checkArgument(context.snapshotId() == null,
        "Cannot override snapshot, already set to id=%s", context.snapshotId());
    Preconditions.checkArgument(ops.current().snapshot(scanSnapshotId) != null,
        "Cannot find snapshot with ID %s", scanSnapshotId);
    return newRefinedScan(
        ops, table, schema, context.useSnapshotId(scanSnapshotId));
  }

  @Override
  public TableScan asOfTime(long timestampMillis) {
    Preconditions.checkArgument(context.snapshotId() == null,
        "Cannot override snapshot, already set to id=%s", context.snapshotId());

    return useSnapshot(SnapshotUtil.snapshotIdAsOfTime(table(), timestampMillis));
  }

  @Override
  public TableScan option(String property, String value) {
    return newRefinedScan(
        ops, table, schema, context.withOption(property, value));
  }

  @Override
  public TableScan project(Schema projectedSchema) {
    return newRefinedScan(
        ops, table, schema, context.project(projectedSchema));
  }

  @Override
  public TableScan caseSensitive(boolean scanCaseSensitive) {
    return newRefinedScan(
        ops, table, schema, context.setCaseSensitive(scanCaseSensitive));
  }

  @Override
  public TableScan includeColumnStats() {
    return newRefinedScan(
        ops, table, schema, context.shouldReturnColumnStats(true));
  }

  @Override
  public TableScan select(Collection columns) {
    return newRefinedScan(
        ops, table, schema, context.selectColumns(columns));
  }

  @Override
  public TableScan filter(Expression expr) {
    return newRefinedScan(ops, table, schema,
        context.filterRows(Expressions.and(context.rowFilter(), expr)));
  }

  @Override
  public Expression filter() {
    return context.rowFilter();
  }

  @Override
  public TableScan ignoreResiduals() {
    return newRefinedScan(
        ops, table, schema, context.ignoreResiduals(true));
  }

  @Override
  public CloseableIterable planFiles() {
    Snapshot snapshot = snapshot();
    if (snapshot != null) {
      LOG.info("Scanning table {} snapshot {} created at {} with filter {}", table,
          snapshot.snapshotId(), DateTimeUtil.formatTimestampMillis(snapshot.timestampMillis()),
          context.rowFilter());

      Listeners.notifyAll(
          new ScanEvent(table.name(), snapshot.snapshotId(), context.rowFilter(), schema()));

      return planFiles(ops, snapshot,
          context.rowFilter(), context.ignoreResiduals(), context.caseSensitive(), context.returnColumnStats());

    } else {
      LOG.info("Scanning empty table {}", table);
      return CloseableIterable.empty();
    }
  }

  @Override
  public CloseableIterable planTasks() {
    CloseableIterable fileScanTasks = planFiles();
    CloseableIterable splitFiles = TableScanUtil.splitFiles(fileScanTasks, targetSplitSize());
    return TableScanUtil.planTasks(splitFiles, targetSplitSize(), splitLookback(), splitOpenFileCost());
  }

  @Override
  public int splitLookback() {
    int tableValue = tableOps().current().propertyAsInt(
        TableProperties.SPLIT_LOOKBACK,
        TableProperties.SPLIT_LOOKBACK_DEFAULT);
    return PropertyUtil.propertyAsInt(options(), TableProperties.SPLIT_LOOKBACK, tableValue);
  }

  @Override
  public long splitOpenFileCost() {
    long tableValue = tableOps().current().propertyAsLong(
        TableProperties.SPLIT_OPEN_FILE_COST,
        TableProperties.SPLIT_OPEN_FILE_COST_DEFAULT);
    return PropertyUtil.propertyAsLong(options(), TableProperties.SPLIT_OPEN_FILE_COST, tableValue);
  }

  @Override
  public Schema schema() {
    return lazyColumnProjection();
  }

  @Override
  public Snapshot snapshot() {
    return context.snapshotId() != null ?
        ops.current().snapshot(context.snapshotId()) :
        ops.current().currentSnapshot();
  }

  @Override
  public boolean isCaseSensitive() {
    return context.caseSensitive();
  }

  @Override
  public String toString() {
    return MoreObjects.toStringHelper(this)
        .add("table", table)
        .add("projection", schema().asStruct())
        .add("filter", context.rowFilter())
        .add("ignoreResiduals", context.ignoreResiduals())
        .add("caseSensitive", context.caseSensitive())
        .toString();
  }

  /**
   * To be able to make refinements {@link #select(Collection)} and {@link #caseSensitive(boolean)} in any order,
   * we resolve the schema to be projected lazily here.
   *
   * @return the Schema to project
   */
  private Schema lazyColumnProjection() {
    Collection selectedColumns = context.selectedColumns();
    if (selectedColumns != null) {
      Set requiredFieldIds = Sets.newHashSet();

      // all of the filter columns are required
      requiredFieldIds.addAll(
          Binder.boundReferences(schema.asStruct(),
              Collections.singletonList(context.rowFilter()), context.caseSensitive()));

      // all of the projection columns are required
      Set selectedIds;
      if (context.caseSensitive()) {
        selectedIds = TypeUtil.getProjectedIds(schema.select(selectedColumns));
      } else {
        selectedIds = TypeUtil.getProjectedIds(schema.caseInsensitiveSelect(selectedColumns));
      }
      requiredFieldIds.addAll(selectedIds);

      return TypeUtil.project(schema, requiredFieldIds);

    } else if (context.projectedSchema() != null) {
      return context.projectedSchema();
    }

    return schema;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy