All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.drill.exec.physical.rowSet.RowSet Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.physical.rowSet;

import java.util.Set;

import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
import org.apache.drill.exec.record.VectorAccessible;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.metadata.TupleMetadata;
import org.apache.drill.exec.record.selection.SelectionVector2;
import org.apache.drill.exec.record.selection.SelectionVector4;
import org.apache.drill.shaded.guava.com.google.common.annotations.VisibleForTesting;

/**
 * A row set is a collection of rows stored as value vectors. Elsewhere in
 * Drill we call this a "record batch", but that term has been overloaded to
 * mean the runtime implementation of an operator.
 * 

* A row set encapsulates a set of vectors and provides access to Drill's * various "views" of vectors: {@link VectorContainer}, * {@link VectorAccessible}, etc. The row set wraps a {#link TupleModel} * which holds the vectors and column metadata. This form is optimized * for easy use in testing; use other implementations for production code. *

* A row set is defined by a {@link TupleMetadata}. For testing purposes, a row * set has a fixed schema; we don't allow changing the set of vectors * dynamically. *

* The row set also provides a simple way to write and read records using the * {@link RowSetWriter} and {@link RowSetReader} interfaces. As per Drill * conventions, a row set can be written (once), read many times, and finally * cleared. *

* Drill provides a large number of vector (data) types. Each requires a * type-specific way to set data. The row set writer uses a * {@link org.apache.drill.exec.vector.accessor.ColumnWriter} * to set each value in a way unique to the specific data type. Similarly, the * row set reader provides a {@link org.apache.drill.exec.vector.accessor.ScalarReader} * interface. In both cases, columns can be accessed by index number * (as defined in the schema) or by name. *

* A row set follows a schema. The schema starts as a * {@link BatchSchema}, but is parsed and restructured into a variety of * forms. In the original form, maps contain their value vectors. In the * flattened form, all vectors for all maps (and the top-level tuple) are * collected into a single structure. Since this structure is for testing, * this somewhat-static structure works just file; we don't need the added * complexity that comes from building the schema and data dynamically. *

* Putting this all together, the typical life-cycle flow is: *

    *
  • Define the schema using {@link org.apache.drill.exec.record.metadata.SchemaBuilder}.
  • *
  • Create the row set from the schema.
  • *
  • Populate the row set using a writer from {@link ExtendableRowSet#writer(int)}.
  • *
  • Process the vector container using the code under test.
  • *
  • Retrieve the results using a reader from {@link #reader()}.
  • *
  • Dispose of vector memory with {@link #clear()}.
  • *
*/ public interface RowSet { boolean isExtendable(); boolean isWritable(); VectorAccessible vectorAccessible(); VectorContainer container(); int rowCount(); RowSetReader reader(); void clear(); TupleMetadata schema(); BufferAllocator allocator(); SelectionVectorMode indirectionType(); /** * Debug-only tool to visualize a row set for inspection. * Do not use this in production code. */ @VisibleForTesting void print(); /** * Return the size in memory of this record set, including indirection * vectors, null vectors, offset vectors and the entire (used and unused) * data vectors. * * @return memory size in bytes */ long size(); BatchSchema batchSchema(); /** * Row set that manages a single batch of rows. */ interface SingleRowSet extends RowSet { SingleRowSet toIndirect(); SingleRowSet toIndirect(Set skipIndices); SelectionVector2 getSv2(); } /** * Single row set which is empty and allows writing. * Once writing is complete, the row set becomes an * immutable direct row set. */ interface ExtendableRowSet extends SingleRowSet { void allocate(int recordCount); RowSetWriter writer(); RowSetWriter writer(int initialRowCount); } /** * Row set comprised of multiple single row sets, along with * an indirection vector (SV4). */ interface HyperRowSet extends RowSet { SelectionVector4 getSv4(); } interface HyperRowSetBuilder { void addBatch(SingleRowSet rowSet); void addBatch(VectorContainer container); HyperRowSet build() throws SchemaChangeException; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy