com.facebook.presto.orc.reader.TimestampSelectiveStreamReader Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc.reader;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockLease;
import com.facebook.presto.common.block.ClosingBlockLease;
import com.facebook.presto.common.block.LongArrayBlock;
import com.facebook.presto.common.block.RunLengthEncodedBlock;
import com.facebook.presto.orc.DecodeTimestampOptions;
import com.facebook.presto.orc.OrcLocalMemoryContext;
import com.facebook.presto.orc.OrcRecordReaderOptions;
import com.facebook.presto.orc.StreamDescriptor;
import com.facebook.presto.orc.Stripe;
import com.facebook.presto.orc.TupleDomainFilter;
import com.facebook.presto.orc.stream.BooleanInputStream;
import com.facebook.presto.orc.stream.InputStreamSource;
import com.facebook.presto.orc.stream.InputStreamSources;
import com.facebook.presto.orc.stream.LongInputStream;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.openjdk.jol.info.ClassLayout;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Optional;
import static com.facebook.presto.common.array.Arrays.ensureCapacity;
import static com.facebook.presto.common.type.TimestampType.TIMESTAMP;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.DATA;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.PRESENT;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.SECONDARY;
import static com.facebook.presto.orc.reader.ApacheHiveTimestampDecoder.decodeTimestamp;
import static com.facebook.presto.orc.reader.SelectiveStreamReaders.initializeOutputPositions;
import static com.facebook.presto.orc.stream.MissingInputStreamSource.missingStreamSource;
import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static io.airlift.slice.SizeOf.sizeOf;
import static java.util.Objects.requireNonNull;
public class TimestampSelectiveStreamReader
implements SelectiveStreamReader
{
private static final int INSTANCE_SIZE = ClassLayout.parseClass(TimestampSelectiveStreamReader.class).instanceSize();
private static final Block NULL_BLOCK = TIMESTAMP.createBlockBuilder(null, 1).appendNull().build();
private final StreamDescriptor streamDescriptor;
private final TupleDomainFilter filter;
private final boolean nullsAllowed;
private final boolean outputRequired;
private final OrcLocalMemoryContext systemMemoryContext;
private final long baseTimestampInSeconds;
private final boolean nonDeterministicFilter;
private final DecodeTimestampOptions decodeTimestampOptions;
private InputStreamSource presentStreamSource = missingStreamSource(BooleanInputStream.class);
private InputStreamSource secondsStreamSource = missingStreamSource(LongInputStream.class);
private InputStreamSource nanosStreamSource = missingStreamSource(LongInputStream.class);
@Nullable
private BooleanInputStream presentStream;
private LongInputStream secondsStream;
private LongInputStream nanosStream;
private boolean rowGroupOpen;
private int readOffset;
@Nullable
private long[] values;
@Nullable
private boolean[] nulls;
@Nullable
private int[] outputPositions;
private int outputPositionCount;
private boolean allNulls;
private boolean valuesInUse;
public TimestampSelectiveStreamReader(
StreamDescriptor streamDescriptor,
Optional filter,
DateTimeZone hiveStorageTimeZone,
boolean outputRequired,
OrcLocalMemoryContext systemMemoryContext,
OrcRecordReaderOptions options)
{
this.decodeTimestampOptions = new DecodeTimestampOptions(hiveStorageTimeZone, options.enableTimestampMicroPrecision());
requireNonNull(filter, "filter is null");
checkArgument(filter.isPresent() || outputRequired, "filter must be present if outputRequired is false");
this.streamDescriptor = requireNonNull(streamDescriptor, "streamDescriptor is null");
this.filter = filter.orElse(null);
this.outputRequired = outputRequired;
this.systemMemoryContext = requireNonNull(systemMemoryContext, "systemMemoryContext is null");
this.nonDeterministicFilter = this.filter != null && !this.filter.isDeterministic();
this.nullsAllowed = this.filter == null || nonDeterministicFilter || this.filter.testNull();
this.baseTimestampInSeconds = new DateTime(2015, 1, 1, 0, 0, requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null")).getMillis() / 1000;
}
@Override
public void startStripe(Stripe stripe)
{
presentStreamSource = missingStreamSource(BooleanInputStream.class);
secondsStreamSource = missingStreamSource(LongInputStream.class);
nanosStreamSource = missingStreamSource(LongInputStream.class);
readOffset = 0;
presentStream = null;
secondsStream = null;
nanosStream = null;
rowGroupOpen = false;
}
@Override
public void startRowGroup(InputStreamSources dataStreamSources)
{
presentStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, PRESENT, BooleanInputStream.class);
secondsStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, DATA, LongInputStream.class);
nanosStreamSource = dataStreamSources.getInputStreamSource(streamDescriptor, SECONDARY, LongInputStream.class);
readOffset = 0;
presentStream = null;
secondsStream = null;
nanosStream = null;
rowGroupOpen = false;
}
@Override
public long getRetainedSizeInBytes()
{
return INSTANCE_SIZE + sizeOf(values) + sizeOf(nulls) + sizeOf(outputPositions);
}
private void openRowGroup()
throws IOException
{
presentStream = presentStreamSource.openStream();
secondsStream = secondsStreamSource.openStream();
nanosStream = nanosStreamSource.openStream();
rowGroupOpen = true;
}
@Override
public int read(int offset, int[] positions, int positionCount)
throws IOException
{
checkArgument(positionCount > 0, "positionCount must be greater than zero");
checkState(!valuesInUse, "BlockLease hasn't been closed yet");
if (!rowGroupOpen) {
openRowGroup();
}
allNulls = false;
if (outputRequired) {
ensureValuesCapacity(positionCount, nullsAllowed && presentStream != null);
}
outputPositions = initializeOutputPositions(outputPositions, positions, positionCount);
// account memory used by values, nulls and outputPositions
systemMemoryContext.setBytes(getRetainedSizeInBytes());
if (readOffset < offset) {
skip(offset - readOffset);
}
int streamPosition = 0;
if (secondsStream == null && nanosStream == null && presentStream != null) {
streamPosition = readAllNulls(positions, positionCount);
}
else if (filter == null) {
streamPosition = readNoFilter(positions, positionCount);
}
else {
streamPosition = readWithFilter(positions, positionCount);
}
readOffset = offset + streamPosition;
return outputPositionCount;
}
private int readWithFilter(int[] positions, int positionCount)
throws IOException
{
int streamPosition = 0;
outputPositionCount = 0;
for (int i = 0; i < positionCount; i++) {
int position = positions[i];
if (position > streamPosition) {
skip(position - streamPosition);
streamPosition = position;
}
if (presentStream != null && !presentStream.nextBit()) {
if ((nonDeterministicFilter && filter.testNull()) || nullsAllowed) {
if (outputRequired) {
nulls[outputPositionCount] = true;
}
outputPositions[outputPositionCount] = position;
outputPositionCount++;
}
}
else {
long value = decodeTimestamp(secondsStream.next(), nanosStream.next(), decodeTimestampOptions);
if (filter.testLong(value)) {
if (outputRequired) {
values[outputPositionCount] = value;
if (nullsAllowed && presentStream != null) {
nulls[outputPositionCount] = false;
}
}
outputPositions[outputPositionCount] = position;
outputPositionCount++;
}
}
streamPosition++;
if (filter != null) {
outputPositionCount -= filter.getPrecedingPositionsToFail();
int succeedingPositionsToFail = filter.getSucceedingPositionsToFail();
if (succeedingPositionsToFail > 0) {
int positionsToSkip = 0;
for (int j = 0; j < succeedingPositionsToFail; j++) {
i++;
int nextPosition = positions[i];
positionsToSkip += 1 + nextPosition - streamPosition;
streamPosition = nextPosition + 1;
}
skip(positionsToSkip);
}
}
}
return streamPosition;
}
private int readAllNulls(int[] positions, int positionCount)
throws IOException
{
presentStream.skip(positions[positionCount - 1]);
if (nonDeterministicFilter) {
outputPositionCount = 0;
for (int i = 0; i < positionCount; i++) {
if (filter.testNull()) {
outputPositionCount++;
}
else {
outputPositionCount -= filter.getPrecedingPositionsToFail();
i += filter.getSucceedingPositionsToFail();
}
}
}
else if (nullsAllowed) {
outputPositionCount = positionCount;
}
else {
outputPositionCount = 0;
}
allNulls = true;
return positions[positionCount - 1] + 1;
}
private int readNoFilter(int[] positions, int positionCount)
throws IOException
{
// filter == null implies outputRequired == true
int streamPosition = 0;
for (int i = 0; i < positionCount; i++) {
int position = positions[i];
if (position > streamPosition) {
skip(position - streamPosition);
streamPosition = position;
}
if (presentStream != null && !presentStream.nextBit()) {
nulls[i] = true;
}
else {
values[i] = decodeTimestamp(secondsStream.next(), nanosStream.next(), decodeTimestampOptions);
if (presentStream != null) {
nulls[i] = false;
}
}
streamPosition++;
}
outputPositionCount = positionCount;
return streamPosition;
}
private void skip(int items)
throws IOException
{
if (secondsStream == null && nanosStream == null) {
presentStream.skip(items);
}
else if (presentStream != null) {
int dataToSkip = presentStream.countBitsSet(items);
secondsStream.skip(dataToSkip);
nanosStream.skip(dataToSkip);
}
else {
secondsStream.skip(items);
nanosStream.skip(items);
}
}
private void ensureValuesCapacity(int capacity, boolean recordNulls)
{
values = ensureCapacity(values, capacity);
if (recordNulls) {
nulls = ensureCapacity(nulls, capacity);
}
}
@Override
public int[] getReadPositions()
{
return outputPositions;
}
@Override
public Block getBlock(int[] positions, int positionCount)
{
checkArgument(outputPositionCount > 0, "outputPositionCount must be greater than zero");
checkState(outputRequired, "This stream reader doesn't produce output");
checkState(positionCount <= outputPositionCount, "Not enough values");
checkState(!valuesInUse, "BlockLease hasn't been closed yet");
if (allNulls) {
return new RunLengthEncodedBlock(NULL_BLOCK, positionCount);
}
boolean includeNulls = nullsAllowed && presentStream != null;
if (positionCount == outputPositionCount) {
Block block = new LongArrayBlock(positionCount, Optional.ofNullable(includeNulls ? nulls : null), values);
nulls = null;
values = null;
return block;
}
long[] valuesCopy = new long[positionCount];
boolean[] nullsCopy = null;
if (includeNulls) {
nullsCopy = new boolean[positionCount];
}
int positionIndex = 0;
int nextPosition = positions[positionIndex];
for (int i = 0; i < outputPositionCount; i++) {
if (outputPositions[i] < nextPosition) {
continue;
}
assert outputPositions[i] == nextPosition;
valuesCopy[positionIndex] = this.values[i];
if (nullsCopy != null) {
nullsCopy[positionIndex] = this.nulls[i];
}
positionIndex++;
if (positionIndex >= positionCount) {
break;
}
nextPosition = positions[positionIndex];
}
return new LongArrayBlock(positionCount, Optional.ofNullable(nullsCopy), valuesCopy);
}
@Override
public BlockLease getBlockView(int[] positions, int positionCount)
{
checkArgument(outputPositionCount > 0, "outputPositionCount must be greater than zero");
checkState(outputRequired, "This stream reader doesn't produce output");
checkState(positionCount <= outputPositionCount, "Not enough values");
checkState(!valuesInUse, "BlockLease hasn't been closed yet");
if (allNulls) {
return newLease(new RunLengthEncodedBlock(NULL_BLOCK, positionCount));
}
boolean includeNulls = nullsAllowed && presentStream != null;
if (positionCount != outputPositionCount) {
compactValues(positions, positionCount, includeNulls);
}
return newLease(new LongArrayBlock(positionCount, Optional.ofNullable(includeNulls ? nulls : null), values));
}
private BlockLease newLease(Block block)
{
valuesInUse = true;
return ClosingBlockLease.newLease(block, () -> valuesInUse = false);
}
private void compactValues(int[] positions, int positionCount, boolean compactNulls)
{
int positionIndex = 0;
int nextPosition = positions[positionIndex];
for (int i = 0; i < outputPositionCount; i++) {
if (outputPositions[i] < nextPosition) {
continue;
}
assert outputPositions[i] == nextPosition;
values[positionIndex] = values[i];
if (compactNulls) {
nulls[positionIndex] = nulls[i];
}
outputPositions[positionIndex] = nextPosition;
positionIndex++;
if (positionIndex >= positionCount) {
break;
}
nextPosition = positions[positionIndex];
}
outputPositionCount = positionCount;
}
@Override
public String toString()
{
return toStringHelper(this)
.addValue(streamDescriptor)
.toString();
}
@Override
public void close()
{
values = null;
outputPositions = null;
nulls = null;
presentStream = null;
presentStreamSource = null;
secondsStream = null;
secondsStreamSource = null;
nanosStream = null;
nanosStreamSource = null;
systemMemoryContext.close();
}
@Override
public void throwAnyError(int[] positions, int positionCount)
{
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy