com.hazelcast.org.apache.calcite.adapter.clone.ArrayTable Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.org.apache.calcite.adapter.clone;
import com.hazelcast.org.apache.calcite.DataContext;
import com.hazelcast.org.apache.calcite.adapter.java.AbstractQueryableTable;
import com.hazelcast.org.apache.calcite.linq4j.AbstractEnumerable;
import com.hazelcast.org.apache.calcite.linq4j.Enumerable;
import com.hazelcast.org.apache.calcite.linq4j.Enumerator;
import com.hazelcast.org.apache.calcite.linq4j.Ord;
import com.hazelcast.org.apache.calcite.linq4j.QueryProvider;
import com.hazelcast.org.apache.calcite.linq4j.Queryable;
import com.hazelcast.org.apache.calcite.linq4j.tree.Primitive;
import com.hazelcast.org.apache.calcite.rel.RelCollation;
import com.hazelcast.org.apache.calcite.rel.RelCollations;
import com.hazelcast.org.apache.calcite.rel.type.RelDataType;
import com.hazelcast.org.apache.calcite.rel.type.RelDataTypeFactory;
import com.hazelcast.org.apache.calcite.rel.type.RelProtoDataType;
import com.hazelcast.org.apache.calcite.schema.ScannableTable;
import com.hazelcast.org.apache.calcite.schema.SchemaPlus;
import com.hazelcast.org.apache.calcite.schema.Statistic;
import com.hazelcast.org.apache.calcite.schema.Statistics;
import com.hazelcast.org.apache.calcite.schema.impl.AbstractTableQueryable;
import com.hazelcast.org.apache.calcite.util.ImmutableBitSet;
import com.hazelcast.org.apache.calcite.util.Pair;
import com.hazelcast.com.google.common.base.Supplier;
import com.hazelcast.com.google.common.collect.ImmutableList;
import com.hazelcast.org.checkerframework.checker.nullness.qual.Nullable;
import java.lang.reflect.Array;
import java.lang.reflect.Type;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import static java.util.Objects.requireNonNull;
/**
* Implementation of table that reads rows from column stores, one per column.
* Column store formats are chosen based on the type and distribution of the
* values in the column; see {@link Representation} and
* {@link RepresentationType}.
*/
class ArrayTable extends AbstractQueryableTable implements ScannableTable {
private final RelProtoDataType protoRowType;
private final Supplier supplier;
/** Creates an ArrayTable. */
ArrayTable(Type elementType, RelProtoDataType protoRowType,
Supplier supplier) {
super(elementType);
this.protoRowType = protoRowType;
this.supplier = supplier;
}
@Override public RelDataType getRowType(RelDataTypeFactory typeFactory) {
return protoRowType.apply(typeFactory);
}
@Override public Statistic getStatistic() {
final List keys = new ArrayList<>();
final Content content = supplier.get();
for (Ord ord : Ord.zip(content.columns)) {
if (ord.e.cardinality == content.size) {
keys.add(ImmutableBitSet.of(ord.i));
}
}
return Statistics.of(content.size, keys, content.collations);
}
@Override public Enumerable<@Nullable Object[]> scan(DataContext root) {
return new AbstractEnumerable<@Nullable Object[]>() {
@Override public Enumerator<@Nullable Object[]> enumerator() {
final Content content = supplier.get();
return content.arrayEnumerator();
}
};
}
@Override public Queryable asQueryable(final QueryProvider queryProvider,
SchemaPlus schema, String tableName) {
return new AbstractTableQueryable(queryProvider, schema, this,
tableName) {
@SuppressWarnings("unchecked")
@Override public Enumerator enumerator() {
final Content content = supplier.get();
return content.enumerator();
}
};
}
/** How a column's values are represented. */
enum RepresentationType {
/** Constant. Contains only one value.
*
* We can't store 0-bit values in
* an array: we'd have no way of knowing how many there were.
*
* @see Constant
*/
CONSTANT,
/** Object array. Null values are represented by null. Values may or may
* not be canonized; if canonized, = and != can be implemented using
* pointer.
*
* @see ObjectArray
*/
OBJECT_ARRAY,
/**
* Array of primitives. Null values not possible. Only for primitive
* types (and not optimal for boolean).
*
* @see PrimitiveArray
*/
PRIMITIVE_ARRAY,
/** Bit-sliced primitive array. Values are {@code bitCount} bits each,
* and interpreted as signed. Stored as an array of long values.
*
* If gcd(bitCount, 64) != 0, some values will cross boundaries.
* bits each. But for all of those values except 4, there is a primitive
* type (8 byte, 16 short, 32 int) which is more efficient.
*
* @see BitSlicedPrimitiveArray
*/
BIT_SLICED_PRIMITIVE_ARRAY,
/**
* Dictionary of primitives. Use one of the previous methods to store
* unsigned offsets into the dictionary. Dictionary is canonized and
* sorted, so v1 < v2 if and only if code(v1) < code(v2). The
* dictionary may or may not contain a null value.
*
*
The dictionary is not beneficial unless the codes are
* significantly shorter than the values. A column of {@code long}
* values with many duplicates is a win; a column of mostly distinct
* {@code short} values is likely a loss. The other win is if there are
* null values; otherwise the best option would be an
* {@link #OBJECT_ARRAY}.
*
* @see PrimitiveDictionary
*/
PRIMITIVE_DICTIONARY,
/**
* Dictionary of objects. Use one of the previous methods to store
* unsigned offsets into the dictionary.
*
* @see ObjectDictionary
*/
OBJECT_DICTIONARY,
/**
* Compressed string table. Block of char data. Strings represented
* using an unsigned offset into the table (stored using one of the
* previous methods).
*
* First 2 bytes are unsigned length; subsequent bytes are string
* contents. The null value, strings longer than 64k and strings that
* occur very commonly are held in an 'exceptions' array and are
* recognized by their high offsets. Other strings are created on demand
* (this reduces the number of objects that need to be created during
* deserialization from cache.
*
* @see StringDictionary
*/
STRING_DICTIONARY,
/**
* Compressed byte array table. Similar to compressed string table.
*
* @see ByteStringDictionary
*/
BYTE_STRING_DICTIONARY,
}
/** Column definition and value set. */
public static class Column {
final Representation representation;
final Object dataSet;
final int cardinality;
Column(Representation representation, Object data, int cardinality) {
this.representation = representation;
this.dataSet = data;
this.cardinality = cardinality;
}
public Column permute(int[] sources) {
return new Column(
representation,
representation.permute(dataSet, sources),
cardinality);
}
@Override public String toString() {
return "Column(representation=" + representation
+ ", value=" + representation.toString(dataSet) + ")";
}
/** Returns a list view onto a data set. */
public static List asList(final Representation representation,
final Object dataSet) {
// Cache size. It might be expensive to compute.
final int size = representation.size(dataSet);
return new AbstractList() {
@Override public @Nullable Object get(int index) {
return representation.getObject(dataSet, index);
}
@Override public int size() {
return size;
}
};
}
}
/** Representation of the values of a column. */
public interface Representation {
/** Returns the representation type. */
RepresentationType getType();
/** Converts a value set into a compact representation. If
* {@code sources} is not null, permutes. */
Object freeze(ColumnLoader.ValueSet valueSet, int @Nullable [] sources);
@Nullable Object getObject(Object dataSet, int ordinal);
int getInt(Object dataSet, int ordinal);
/** Creates a data set that is the same as a given data set
* but re-ordered. */
Object permute(Object dataSet, int[] sources);
/** Returns the number of elements in a data set. (Some representations
* return the capacity, which may be slightly larger than the actual
* size.) */
int size(Object dataSet);
/** Converts a data set to a string. */
String toString(Object dataSet);
}
/** Representation that stores the column values in an array. */
public static class ObjectArray implements Representation {
final int ordinal;
ObjectArray(int ordinal) {
this.ordinal = ordinal;
}
@Override public String toString() {
return "ObjectArray(ordinal=" + ordinal + ")";
}
@Override public RepresentationType getType() {
return RepresentationType.OBJECT_ARRAY;
}
@Override public Object freeze(ColumnLoader.ValueSet valueSet, int @Nullable [] sources) {
// We assume the values have been canonized.
final List list = permuteList(valueSet.values, sources);
return list.toArray(new Comparable[0]);
}
@Override public Object permute(Object dataSet, int[] sources) {
@Nullable Comparable[] list = (@Nullable Comparable[]) dataSet;
final int size = list.length;
final @Nullable Comparable[] comparables = new Comparable[size];
for (int i = 0; i < size; i++) {
comparables[i] = list[sources[i]];
}
return comparables;
}
@Override public @Nullable Object getObject(Object dataSet, int ordinal) {
return ((@Nullable Comparable[]) dataSet)[ordinal];
}
@Override public int getInt(Object dataSet, int ordinal) {
Number value = (Number) getObject(dataSet, ordinal);
return requireNonNull(value, "value").intValue();
}
@Override public int size(Object dataSet) {
return ((Comparable[]) dataSet).length;
}
@Override public String toString(Object dataSet) {
return Arrays.toString((Comparable[]) dataSet);
}
}
/** Representation that stores the values of a column in an array of
* primitive values. */
public static class PrimitiveArray implements Representation {
final int ordinal;
private final Primitive primitive;
private final Primitive p;
PrimitiveArray(int ordinal, Primitive primitive, Primitive p) {
this.ordinal = ordinal;
this.primitive = primitive;
this.p = p;
}
@Override public String toString() {
return "PrimitiveArray(ordinal=" + ordinal
+ ", primitive=" + primitive
+ ", p=" + p
+ ")";
}
@Override public RepresentationType getType() {
return RepresentationType.PRIMITIVE_ARRAY;
}
@Override public Object freeze(ColumnLoader.ValueSet valueSet, int @Nullable [] sources) {
//noinspection unchecked
return primitive.toArray2(
permuteList((List) valueSet.values, sources));
}
@Override public Object permute(Object dataSet, int[] sources) {
return primitive.permute(dataSet, sources);
}
@Override public @Nullable Object getObject(Object dataSet, int ordinal) {
return p.arrayItem(dataSet, ordinal);
}
@Override public int getInt(Object dataSet, int ordinal) {
return Array.getInt(dataSet, ordinal);
}
@Override public int size(Object dataSet) {
return Array.getLength(dataSet);
}
@Override public String toString(Object dataSet) {
return p.arrayToString(dataSet);
}
}
/** Representation that stores column values in a dictionary of
* primitive values, then uses a short code for each row. */
public static class PrimitiveDictionary implements Representation {
PrimitiveDictionary() {
}
@Override public String toString() {
return "PrimitiveDictionary()";
}
@Override public RepresentationType getType() {
return RepresentationType.PRIMITIVE_DICTIONARY;
}
@Override public Object freeze(ColumnLoader.ValueSet valueSet, int @Nullable [] sources) {
throw new UnsupportedOperationException(); // TODO:
}
@Override public Object permute(Object dataSet, int[] sources) {
throw new UnsupportedOperationException(); // TODO:
}
@Override public Object getObject(Object dataSet, int ordinal) {
throw new UnsupportedOperationException(); // TODO:
}
@Override public int getInt(Object dataSet, int ordinal) {
throw new UnsupportedOperationException(); // TODO:
}
@Override public int size(Object dataSet) {
throw new UnsupportedOperationException(); // TODO:
}
@Override public String toString(Object dataSet) {
throw new UnsupportedOperationException(); // TODO:
}
}
/** Representation that stores the values of a column as a
* dictionary of objects. */
public static class ObjectDictionary implements Representation {
final int ordinal;
final Representation representation;
ObjectDictionary(
int ordinal,
Representation representation) {
this.ordinal = ordinal;
this.representation = representation;
}
@Override public String toString() {
return "ObjectDictionary(ordinal=" + ordinal
+ ", representation=" + representation
+ ")";
}
@Override public RepresentationType getType() {
return RepresentationType.OBJECT_DICTIONARY;
}
@Override public Object freeze(ColumnLoader.ValueSet valueSet, int @Nullable [] sources) {
final int n = valueSet.map.keySet().size();
int extra = valueSet.containsNull ? 1 : 0;
@SuppressWarnings("all")
@Nullable Comparable[] codeValues =
valueSet.map.keySet().toArray(new Comparable[n + extra]);
// codeValues[0..n] is non-null since valueSet.map.keySet is non-null
// There might be null at the very end, however, it won't participate in Arrays.sort
@SuppressWarnings("assignment.type.incompatible")
Comparable[] nonNullCodeValues = codeValues;
Arrays.sort(nonNullCodeValues, 0, n);
ColumnLoader.ValueSet codeValueSet =
new ColumnLoader.ValueSet(int.class);
final List list = permuteList(valueSet.values, sources);
for (Comparable value : list) {
int code;
if (value == null) {
code = n;
} else {
code = Arrays.binarySearch(codeValues, value);
assert code >= 0 : code + ", " + value;
}
codeValueSet.add(code);
}
Object codes = representation.freeze(codeValueSet, null);
return Pair.of(codes, codeValues);
}
private static Pair