com.facebook.presto.orc.reader.SelectiveStreamReaders Maven / Gradle / Ivy
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc.reader;
import com.facebook.presto.common.Subfield;
import com.facebook.presto.common.predicate.TupleDomainFilter;
import com.facebook.presto.common.type.ArrayType;
import com.facebook.presto.common.type.BigintType;
import com.facebook.presto.common.type.BooleanType;
import com.facebook.presto.common.type.CharType;
import com.facebook.presto.common.type.DateType;
import com.facebook.presto.common.type.DecimalType;
import com.facebook.presto.common.type.DoubleType;
import com.facebook.presto.common.type.IntegerType;
import com.facebook.presto.common.type.MapType;
import com.facebook.presto.common.type.RealType;
import com.facebook.presto.common.type.RowType;
import com.facebook.presto.common.type.SmallintType;
import com.facebook.presto.common.type.TimestampType;
import com.facebook.presto.common.type.TinyintType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.VarbinaryType;
import com.facebook.presto.common.type.VarcharType;
import com.facebook.presto.orc.OrcAggregatedMemoryContext;
import com.facebook.presto.orc.OrcRecordReaderOptions;
import com.facebook.presto.orc.StreamDescriptor;
import com.facebook.presto.orc.metadata.OrcType.OrcTypeKind;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import org.joda.time.DateTimeZone;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Predicate;
import static com.facebook.presto.common.array.Arrays.ensureCapacity;
import static com.facebook.presto.common.type.Decimals.MAX_SHORT_PRECISION;
import static com.facebook.presto.common.type.TimestampType.TIMESTAMP_MICROSECONDS;
import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.String.format;
public final class SelectiveStreamReaders
{
private SelectiveStreamReaders() {}
public static SelectiveStreamReader createStreamReader(
StreamDescriptor streamDescriptor,
Map filters,
Optional outputType,
List requiredSubfields,
DateTimeZone hiveStorageTimeZone,
OrcRecordReaderOptions options,
OrcAggregatedMemoryContext systemMemoryContext,
boolean isLowMemory)
{
OrcTypeKind type = streamDescriptor.getOrcTypeKind();
switch (type) {
case BOOLEAN: {
checkArgument(requiredSubfields.isEmpty(), "Boolean stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, BooleanType.class::isInstance);
return new BooleanSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
}
case BYTE: {
checkArgument(requiredSubfields.isEmpty(), "Byte stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, TinyintType.class::isInstance);
return new ByteSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
}
case SHORT:
case INT:
case LONG:
case DATE: {
checkArgument(requiredSubfields.isEmpty(), "Primitive type stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, t -> t instanceof BigintType || t instanceof IntegerType || t instanceof SmallintType || t instanceof DateType);
return new LongSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext, isLowMemory);
}
case FLOAT: {
checkArgument(requiredSubfields.isEmpty(), "Float type stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, RealType.class::isInstance);
return new FloatSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
}
case DOUBLE:
checkArgument(requiredSubfields.isEmpty(), "Double stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, DoubleType.class::isInstance);
return new DoubleSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
case BINARY:
case STRING:
case VARCHAR:
case CHAR:
checkArgument(requiredSubfields.isEmpty(), "Primitive stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, t -> t instanceof VarcharType || t instanceof CharType || t instanceof VarbinaryType);
return new SliceSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext, isLowMemory);
case TIMESTAMP:
case TIMESTAMP_MICROSECONDS: {
boolean enableMicroPrecision = outputType.isPresent() && outputType.get() == TIMESTAMP_MICROSECONDS;
checkArgument(requiredSubfields.isEmpty(), "Timestamp stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, TimestampType.class::isInstance);
return new TimestampSelectiveStreamReader(
streamDescriptor,
getOptionalOnlyFilter(type, filters),
hiveStorageTimeZone,
outputType.isPresent(),
systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()),
enableMicroPrecision);
}
case LIST:
verifyStreamType(streamDescriptor, outputType, ArrayType.class::isInstance);
return new ListSelectiveStreamReader(streamDescriptor, filters, requiredSubfields, null, 0, outputType, hiveStorageTimeZone, options, systemMemoryContext, isLowMemory);
case STRUCT:
verifyStreamType(streamDescriptor, outputType, RowType.class::isInstance);
return new StructSelectiveStreamReader(streamDescriptor, filters, requiredSubfields, outputType, hiveStorageTimeZone, options, systemMemoryContext, isLowMemory);
case MAP:
verifyStreamType(streamDescriptor, outputType, MapType.class::isInstance);
return new MapSelectiveStreamReader(streamDescriptor, filters, requiredSubfields, outputType, hiveStorageTimeZone, options, systemMemoryContext, isLowMemory);
case DECIMAL: {
verifyStreamType(streamDescriptor, outputType, DecimalType.class::isInstance);
if (streamDescriptor.getOrcType().getPrecision().get() <= MAX_SHORT_PRECISION) {
return new ShortDecimalSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
}
else {
return new LongDecimalSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
}
}
case UNION:
default:
throw new IllegalArgumentException("Unsupported type: " + type);
}
}
private static void verifyStreamType(StreamDescriptor streamDescriptor, Optional outputType, Predicate predicate)
{
if (outputType.isPresent()) {
ReaderUtils.verifyStreamType(streamDescriptor, outputType.get(), predicate);
}
}
private static Optional getOptionalOnlyFilter(OrcTypeKind type, Map filters)
{
if (filters.isEmpty()) {
return Optional.empty();
}
checkArgument(filters.size() == 1, format("Stream reader for %s doesn't support multiple range filters", type));
return Optional.of(Iterables.getOnlyElement(filters.values()));
}
public static SelectiveStreamReader createNestedStreamReader(
StreamDescriptor streamDescriptor,
int level,
Optional parentFilter,
Optional outputType,
List requiredSubfields,
DateTimeZone hiveStorageTimeZone,
OrcRecordReaderOptions options,
OrcAggregatedMemoryContext systemMemoryContext,
boolean isLowMemory)
{
switch (streamDescriptor.getOrcTypeKind()) {
case BOOLEAN:
case BYTE:
case SHORT:
case INT:
case LONG:
case DATE:
case FLOAT:
case DOUBLE:
case BINARY:
case STRING:
case VARCHAR:
case CHAR:
case TIMESTAMP:
case TIMESTAMP_MICROSECONDS:
case DECIMAL:
Map elementFilters = ImmutableMap.of();
if (parentFilter.isPresent()) {
TupleDomainFilter.PositionalFilter positionalFilter = parentFilter.get().getPositionalFilter();
if (positionalFilter != null) {
elementFilters = ImmutableMap.of(new Subfield("c"), positionalFilter);
}
}
if (!outputType.isPresent() && elementFilters.isEmpty()) {
// No need to read the elements when output is not required and the filter is a simple IS [NOT] NULL
return null;
}
return createStreamReader(streamDescriptor, elementFilters, outputType, requiredSubfields, hiveStorageTimeZone, options, systemMemoryContext.newOrcAggregatedMemoryContext(), isLowMemory);
case LIST:
Optional childFilter = parentFilter.map(HierarchicalFilter::getChild).map(ListFilter.class::cast);
return new ListSelectiveStreamReader(streamDescriptor, ImmutableMap.of(), requiredSubfields, childFilter.orElse(null), level, outputType, hiveStorageTimeZone, options, systemMemoryContext.newOrcAggregatedMemoryContext(), isLowMemory);
case STRUCT:
checkArgument(!parentFilter.isPresent(), "Filters on nested structs are not supported yet");
return new StructSelectiveStreamReader(streamDescriptor, ImmutableMap.of(), requiredSubfields, outputType, hiveStorageTimeZone, options, systemMemoryContext.newOrcAggregatedMemoryContext(), isLowMemory);
case MAP:
checkArgument(!parentFilter.isPresent(), "Filters on nested maps are not supported yet");
return new MapSelectiveStreamReader(streamDescriptor, ImmutableMap.of(), requiredSubfields, outputType, hiveStorageTimeZone, options, systemMemoryContext.newOrcAggregatedMemoryContext(), isLowMemory);
case UNION:
default:
throw new IllegalArgumentException("Unsupported type: " + streamDescriptor.getOrcTypeKind());
}
}
public static int[] initializeOutputPositions(int[] outputPositions, int[] positions, int positionCount)
{
outputPositions = ensureCapacity(outputPositions, positionCount);
System.arraycopy(positions, 0, outputPositions, 0, positionCount);
return outputPositions;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy