All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.drill.exec.physical.resultSet.ResultSetLoader Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.physical.resultSet;

import org.apache.drill.common.exceptions.CustomErrorContext;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.vector.BaseValueVector;
import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;

/**
 * Builds a result set (series of zero or more row sets) based on a defined
 * schema which may
 * evolve (expand) over time. Automatically rolls "overflow" rows over
 * when a batch fills.
 * 

* Many of the methods in this interface verify that the loader is * in the proper state. For example, an exception is thrown if the caller * attempts to save a row before starting a batch. However, the per-column * write methods are checked only through assertions that should enabled * during testing, but will be disabled during production. * * @see {@link VectorContainerWriter}, the class which this class * replaces */ public interface ResultSetLoader { public static final int DEFAULT_ROW_COUNT = BaseValueVector.INITIAL_VALUE_ALLOCATION; /** * Context for error messages. */ CustomErrorContext errorContext(); /** * Current schema version. The version increments by one each time * a column is added. * @return the current schema version */ int schemaVersion(); /** * Adjust the number of rows to produce in the next batch. Takes * affect after the next call to {@link #startBatch()}. * * @param count target batch row count */ void setTargetRowCount(int count); /** * The number of rows produced by this loader (as configured in the loader * options.) * * @return the target row count for batches that this loader produces */ int targetRowCount(); /** * The maximum number of rows for the present batch. Will be the lesser * of the {@link #targetRowCount()) and the overall scan limit remaining. */ int maxBatchSize(); /** * The largest vector size produced by this loader (as specified by * the value vector limit.) * * @return the largest vector size. Attempting to extend a vector beyond * this limit causes automatic vector overflow and terminates the * in-flight batch, even if the batch has not yet reached the target * row count */ int targetVectorSize(); /** * Total number of batches created. Includes the current batch if * the row count in this batch is non-zero. * @return the number of batches produced including the current * one */ int batchCount(); /** * Total number of rows loaded for all previous batches and the * current batch. * @return total row count */ long totalRowCount(); /** * Report whether the loader currently holds rows. If within a batch, * reports if at least one row has been read (which might be a look-ahead * row.) If between batches, reports if a look-ahead row is available. * * @return true if at least one row is available to harvest, false * otherwise */ boolean hasRows(); /** * Start a new row batch. Valid only when first started, or after the * previous batch has been harvested. * * @return {@code true} if another batch can be read, {@code false} if * the reader has reached the given scan limit. */ boolean startBatch(); /** * Writer for the top-level tuple (the entire row). Valid only when * the mutator is actively writing a batch (after startBatch() * but before harvest().) * * @return writer for the top-level columns */ RowSetLoader writer(); /** * Reports whether the loader is in a writable state. The writable state * occurs only when a batch has been started, and before that batch * becomes full. * * @return true if the client can add a row to the loader, false if * not */ boolean writeable(); /** * Load a row using column values passed as variable-length arguments. Expects * map values to represented as an array. * A schema of (a:int, b:map(c:varchar)) would be> * set as
loadRow(10, new Object[] {"foo"});
* Values of arrays can be expressed as a Java * array. A schema of (a:int, b:int[]) can be set as
* loadRow(10, new int[] {100, 200});
. * Primarily for testing, too slow for production code. *

* If the row consists of a single map or list, then the one value will be an * Object array, creating an ambiguity. Use writer().set(0, value); * in this case. * * @param values column values in column index order * @return this loader */ ResultSetLoader setRow(Object...values); /** * Requests to skip the given number of rows. Returns the number of rows * actually skipped (which is limited by batch count.) *

* Used in SELECT COUNT(*) style queries when the downstream * operators want just record count, but no actual rows. *

* Also used to fill in a batch of only null values (such a filling * in a set of null vectors for unprojected columns.) * * @param requestedCount * the number of rows to skip * @return the actual number of rows skipped, which may be less than the * requested amount. If less, the client should call this method for * multiple batches until the requested count is reached */ int skipRows(int requestedCount); /** * Reports if this is an empty projection such as occurs in a * SELECT COUNT(*) query. If the projection is empty, then * the downstream needs only the row count set in each batch, but no * actual vectors will be created. In this case, the client can do * the work to populate rows (the data will be discarded), or can call * {@link #skipRows(int)} to skip over the number of rows that would * have been read if any data had been projected. *

* Note that the empty schema case can also occur if the project list * from the SELECT clause is disjoint from the table schema. * For example, SELECT a, b from a table with schema * (c, d). * * @return true if no columns are actually projected, false if at * least one column is projected */ boolean isProjectionEmpty(); /** * Returns the active output schema; the schema used by the writers, * minus any unprojected columns. This is usually the same as the * output schema, but may differ if the writer adds columns during * an overflow row. Unlike the output schema, this schema is defined * as long as the loader is open. */ TupleMetadata activeSchema(); /** * Returns the output container which holds (or will hold) batches * from this loader. For use when the container is needed prior * to "harvesting" a batch. The data is not valid until * {@link #harvest()} is called, and is no longer valid once * {@link #startBatch()} is called. * * @return container used to publish results from this loader */ VectorContainer outputContainer(); /** * Harvest the current row batch, and reset the mutator * to the start of the next row batch (which may already contain * an overflow row. *

* The schema of the returned container is defined as: *

    *
  • The schema as passed in via the loader options, plus
  • *
  • Columns added dynamically during write, minus
  • *
  • Any columns not included in the project list, minus
  • *
  • Any columns added in the overflow row.
  • *
* That is, column order is as defined by the initial schema and column * additions. In particular, the schema order is not defined by * the projection list. (Another mechanism is required to reorder columns * for the actual projection.) * * @return the row batch to send downstream */ VectorContainer harvest(); /** * After a {@link #harvest()}, call, call this method to determine if * the scan limit has been hit. If so, treat this as the final batch * for the reader, even if more data is available to read. * * @return {@code true} if the scan has reached a set scan row limit, * {@code false} if there is no limit, or more rows can be read. */ boolean atLimit(); /** * The schema of the harvested batch. Valid until the start of the * next batch. * * @return the extended schema of the harvested batch which includes * any allocation hints used when creating the batch */ TupleMetadata outputSchema(); /** * Peek at the internal vector cache for readers that need a bit of help * resolving types based on what was previously seen. * * @return real or dummy vector cache */ ResultVectorCache vectorCache(); /** * Called after all rows are returned, whether because no more data is * available, or the caller wishes to cancel the current row batch * and complete. */ void close(); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy