All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netease.arctic.shade.org.apache.iceberg.TableScan Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.netease.arctic.shade.org.apache.iceberg;

import java.util.Collection;
import com.netease.arctic.shade.org.apache.iceberg.expressions.Expression;
import com.netease.arctic.shade.org.apache.iceberg.io.CloseableIterable;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Lists;

/**
 * API for configuring a table scan.
 * 

* TableScan objects are immutable and can be shared between threads. Refinement methods, like * {@link #select(Collection)} and {@link #filter(Expression)}, create new TableScan instances. */ public interface TableScan { /** * Returns the {@link Table} from which this scan loads data. * * @return this scan's table */ Table table(); /** * Create a new {@link TableScan} from this scan's configuration that will use the given snapshot * by ID. * * @param snapshotId a snapshot ID * @return a new scan based on this with the given snapshot ID * @throws IllegalArgumentException if the snapshot cannot be found */ TableScan useSnapshot(long snapshotId); /** * Create a new {@link TableScan} from this scan's configuration that will use the most recent * snapshot as of the given time in milliseconds. * * @param timestampMillis a timestamp in milliseconds. * @return a new scan based on this with the current snapshot at the given time * @throws IllegalArgumentException if the snapshot cannot be found */ TableScan asOfTime(long timestampMillis); /** * Create a new {@link TableScan} from this scan's configuration that will override the {@link Table}'s behavior based * on the incoming pair. Unknown properties will be ignored. * * @param property name of the table property to be overridden * @param value value to override with * @return a new scan based on this with overridden behavior */ TableScan option(String property, String value); /** * Create a new {@link TableScan} from this with the schema as its projection. * * @param schema a projection schema * @return a new scan based on this with the given projection */ TableScan project(Schema schema); /** * Create a new {@link TableScan} from this that, if data columns where selected * via {@link #select(java.util.Collection)}, controls whether the match to the schema will be done * with case sensitivity. * * @return a new scan based on this with case sensitivity as stated */ TableScan caseSensitive(boolean caseSensitive); /** * Create a new {@link TableScan} from this that loads the column stats with each data file. *

* Column stats include: value count, null value count, lower bounds, and upper bounds. * * @return a new scan based on this that loads column stats. */ TableScan includeColumnStats(); /** * Create a new {@link TableScan} from this that will read the given data columns. This produces * an expected schema that includes all fields that are either selected or used by this scan's * filter expression. * * @param columns column names from the table's schema * @return a new scan based on this with the given projection columns */ default TableScan select(String... columns) { return select(Lists.newArrayList(columns)); } /** * Create a new {@link TableScan} from this that will read the given data columns. This produces * an expected schema that includes all fields that are either selected or used by this scan's * filter expression. * * @param columns column names from the table's schema * @return a new scan based on this with the given projection columns */ TableScan select(Collection columns); /** * Create a new {@link TableScan} from the results of this filtered by the {@link Expression}. * * @param expr a filter expression * @return a new scan based on this with results filtered by the expression */ TableScan filter(Expression expr); /** * Returns this scan's filter {@link Expression}. * * @return this scan's filter expression */ Expression filter(); /** * Create a new {@link TableScan} from this that applies data filtering to files but not to rows in those files. * * @return a new scan based on this that does not filter rows in files. */ TableScan ignoreResiduals(); /** * Create a new {@link TableScan} to read appended data from {@code fromSnapshotId} exclusive to {@code toSnapshotId} * inclusive. * * @param fromSnapshotId the last snapshot id read by the user, exclusive * @param toSnapshotId read append data up to this snapshot id * @return a table scan which can read append data from {@code fromSnapshotId} * exclusive and up to {@code toSnapshotId} inclusive */ TableScan appendsBetween(long fromSnapshotId, long toSnapshotId); /** * Create a new {@link TableScan} to read appended data from {@code fromSnapshotId} exclusive to the current snapshot * inclusive. * * @param fromSnapshotId - the last snapshot id read by the user, exclusive * @return a table scan which can read append data from {@code fromSnapshotId} * exclusive and up to current snapshot inclusive */ TableScan appendsAfter(long fromSnapshotId); /** * Plan the {@link FileScanTask files} that will be read by this scan. *

* Each file has a residual expression that should be applied to filter the file's rows. *

* This simple plan returns file scans for each file from position 0 to the file's length. For * planning that will combine small files, split large files, and attempt to balance work, use * {@link #planTasks()} instead. * * @return an Iterable of file tasks that are required by this scan */ CloseableIterable planFiles(); /** * Plan the {@link CombinedScanTask tasks} for this scan. *

* Tasks created by this method may read partial input files, multiple input files, or both. * * @return an Iterable of tasks for this scan */ CloseableIterable planTasks(); /** * Returns this scan's projection {@link Schema}. *

* If the projection schema was set directly using {@link #project(Schema)}, returns that schema. *

* If the projection schema was set by calling {@link #select(Collection)}, returns a projection * schema that includes the selected data fields and any fields used in the filter expression. * * @return this scan's projection schema */ Schema schema(); /** * Returns the {@link Snapshot} that will be used by this scan. *

* If the snapshot was not configured using {@link #asOfTime(long)} or {@link #useSnapshot(long)}, the current table * snapshot will be used. * * @return the Snapshot this scan will use */ Snapshot snapshot(); /** * Returns whether this scan should apply column name case sensitiveness as per {@link #caseSensitive(boolean)}. * @return true if case sensitive, false otherwise. */ boolean isCaseSensitive(); /** * Returns the target split size for this scan. */ long targetSplitSize(); /** * Returns the split lookback for this scan. */ int splitLookback(); /** * Returns the split open file cost for this scan. */ long splitOpenFileCost(); }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy