com.facebook.presto.hive.HivePageSource Maven / Gradle / Ivy
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;
import com.facebook.presto.common.Page;
import com.facebook.presto.common.RuntimeStats;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.LazyBlock;
import com.facebook.presto.common.block.LazyBlockLoader;
import com.facebook.presto.common.block.RunLengthEncodedBlock;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.hive.HivePageSourceProvider.ColumnMapping;
import com.facebook.presto.spi.ConnectorPageSource;
import com.facebook.presto.spi.PrestoException;
import com.google.common.annotations.VisibleForTesting;
import io.airlift.slice.Slices;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.joda.time.DateTimeZone;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.function.Function;
import static com.facebook.presto.hive.HiveBucketing.getHiveBucket;
import static com.facebook.presto.hive.HiveCoercer.createCoercer;
import static com.facebook.presto.hive.HiveColumnHandle.isRowIdColumnHandle;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_CURSOR_ERROR;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES;
import static com.facebook.presto.hive.HivePageSourceProvider.ColumnMappingKind.PREFILLED;
import static com.facebook.presto.hive.HiveUtil.typedPartitionKey;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
public class HivePageSource
implements ConnectorPageSource
{
private final List columnMappings;
private final Optional bucketAdapter;
private final Object[] prefilledValues;
private final Type[] types;
private final Function[] coercers;
private final ConnectorPageSource delegate;
public HivePageSource(
List columnMappings,
Optional bucketAdaptation,
DateTimeZone hiveStorageTimeZone,
TypeManager typeManager,
ConnectorPageSource delegate,
String path,
Optional rowIdPartitionComponent)
{
requireNonNull(columnMappings, "columnMappings is null");
requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");
requireNonNull(typeManager, "typeManager is null");
this.delegate = requireNonNull(delegate, "delegate is null");
this.columnMappings = columnMappings;
this.bucketAdapter = bucketAdaptation.map(BucketAdapter::new);
int size = columnMappings.size();
prefilledValues = new Object[size];
types = new Type[size];
coercers = new Function[size];
for (int columnIndex = 0; columnIndex < size; columnIndex++) {
ColumnMapping columnMapping = columnMappings.get(columnIndex);
HiveColumnHandle column = columnMapping.getHiveColumnHandle();
String name = column.getName();
Type type = typeManager.getType(column.getTypeSignature());
types[columnIndex] = type;
if (columnMapping.getCoercionFrom().isPresent()) {
coercers[columnIndex] = createCoercer(typeManager, columnMapping.getCoercionFrom().get(), columnMapping.getHiveColumnHandle().getHiveType());
}
else if (isRowIdColumnHandle(columnMapping.getHiveColumnHandle())) {
// If there's no row ID partition component, then path + row numbers will be supplied for $row_id
byte[] component = rowIdPartitionComponent.orElse(new byte[0]);
String rowGroupId = Paths.get(path).getFileName().toString();
coercers[columnIndex] = new RowIDCoercer(component, rowGroupId);
}
if (columnMapping.getKind() == PREFILLED) {
prefilledValues[columnIndex] = typedPartitionKey(columnMapping.getPrefilledValue(), type, name, hiveStorageTimeZone);
}
}
}
@Override
public long getCompletedBytes()
{
return delegate.getCompletedBytes();
}
@Override
public long getCompletedPositions()
{
return delegate.getCompletedPositions();
}
@Override
public long getReadTimeNanos()
{
return delegate.getReadTimeNanos();
}
@Override
public boolean isFinished()
{
return delegate.isFinished();
}
@Override
public Page getNextPage()
{
try {
Page dataPage = delegate.getNextPage();
if (dataPage == null) {
return null;
}
if (bucketAdapter.isPresent()) {
dataPage = bucketAdapter.get().filterPageToEligibleRowsOrDiscard(dataPage);
if (dataPage == null) {
return null;
}
}
int batchSize = dataPage.getPositionCount();
List blocks = new ArrayList<>();
for (int fieldId = 0; fieldId < columnMappings.size(); fieldId++) {
ColumnMapping columnMapping = columnMappings.get(fieldId);
switch (columnMapping.getKind()) {
case PREFILLED:
blocks.add(RunLengthEncodedBlock.create(types[fieldId], prefilledValues[fieldId], batchSize));
break;
case POSTFILLED:
// Use empty slice for the value since this will be replaced later, after the data is read
blocks.add(RunLengthEncodedBlock.create(types[fieldId], Slices.EMPTY_SLICE, batchSize));
break;
case REGULAR:
Block block = dataPage.getBlock(columnMapping.getIndex());
if (coercers[fieldId] != null) {
block = new LazyBlock(batchSize, new CoercionLazyBlockLoader(block, coercers[fieldId]));
}
blocks.add(block);
break;
case INTERIM:
// interim columns don't show up in output
break;
case AGGREGATED:
// aggregated columns only produce one page with the required value from header/footer
// do not require data read and do not show up in output
break;
default:
throw new UnsupportedOperationException();
}
}
return new Page(batchSize, blocks.toArray(new Block[0]));
}
catch (PrestoException e) {
closeWithSuppression(e);
throw e;
}
catch (RuntimeException e) {
closeWithSuppression(e);
throw new PrestoException(HIVE_CURSOR_ERROR, e);
}
}
@Override
public void close()
{
try {
delegate.close();
}
catch (IOException e) {
throw new UncheckedIOException(e);
}
}
@Override
public RuntimeStats getRuntimeStats()
{
return delegate.getRuntimeStats();
}
@Override
public String toString()
{
return delegate.toString();
}
@Override
public long getSystemMemoryUsage()
{
return delegate.getSystemMemoryUsage();
}
private void closeWithSuppression(Throwable throwable)
{
requireNonNull(throwable, "throwable is null");
try {
close();
}
catch (RuntimeException e) {
// Self-suppression not permitted
if (throwable != e) {
throwable.addSuppressed(e);
}
}
}
@VisibleForTesting
ConnectorPageSource getPageSource()
{
return delegate;
}
private static final class CoercionLazyBlockLoader
implements LazyBlockLoader
{
private final Function coercer;
private Block block;
public CoercionLazyBlockLoader(Block block, Function coercer)
{
this.block = requireNonNull(block, "block is null");
this.coercer = requireNonNull(coercer, "coercer is null");
}
@Override
public void load(LazyBlock lazyBlock)
{
if (block == null) {
return;
}
lazyBlock.setBlock(coercer.apply(block.getLoadedBlock()));
// clear reference to loader to free resources, since load was successful
block = null;
}
}
private static final class RowFilterLazyBlockLoader
implements LazyBlockLoader
{
private Block block;
private final IntArrayList rowsToKeep;
public RowFilterLazyBlockLoader(Block block, IntArrayList rowsToKeep)
{
this.block = requireNonNull(block, "block is null");
this.rowsToKeep = requireNonNull(rowsToKeep, "rowsToKeep is null");
}
@Override
public void load(LazyBlock lazyBlock)
{
if (block == null) {
return;
}
lazyBlock.setBlock(block.getPositions(rowsToKeep.elements(), 0, rowsToKeep.size()));
// clear reference to loader to free resources, since load was successful
block = null;
}
}
private static class BucketAdapter
{
public final int[] bucketColumns;
public final int bucketToKeep;
public final int tableBucketCount;
public final int partitionBucketCount; // for sanity check only
private final List typeInfoList;
private final boolean useLegacyTimestampBucketing;
public BucketAdapter(BucketAdaptation bucketAdaptation)
{
this.bucketColumns = bucketAdaptation.getBucketColumnIndices();
this.bucketToKeep = bucketAdaptation.getBucketToKeep();
this.typeInfoList = bucketAdaptation.getBucketColumnHiveTypes().stream()
.map(HiveType::getTypeInfo)
.collect(toImmutableList());
this.tableBucketCount = bucketAdaptation.getTableBucketCount();
this.partitionBucketCount = bucketAdaptation.getPartitionBucketCount();
this.useLegacyTimestampBucketing = bucketAdaptation.useLegacyTimestampBucketing();
}
@Nullable
public Page filterPageToEligibleRowsOrDiscard(Page page)
{
IntArrayList ids = new IntArrayList(page.getPositionCount());
Page bucketColumnsPage = page.extractChannels(bucketColumns);
for (int position = 0; position < page.getPositionCount(); position++) {
int bucket = getHiveBucket(tableBucketCount, typeInfoList, bucketColumnsPage, position, useLegacyTimestampBucketing);
if ((bucket - bucketToKeep) % partitionBucketCount != 0) {
throw new PrestoException(HIVE_INVALID_BUCKET_FILES, format(
"A row that is supposed to be in bucket %s is encountered. Only rows in bucket %s (modulo %s) are expected",
bucket, bucketToKeep % partitionBucketCount, partitionBucketCount));
}
if (bucket == bucketToKeep) {
ids.add(position);
}
}
int retainedRowCount = ids.size();
if (retainedRowCount == 0) {
return null; // Empty page after filtering
}
if (retainedRowCount == page.getPositionCount()) {
return page; // Unchanged after filtering
}
Block[] adaptedBlocks = new Block[page.getChannelCount()];
for (int i = 0; i < adaptedBlocks.length; i++) {
Block block = page.getBlock(i);
if (block instanceof LazyBlock && !((LazyBlock) block).isLoaded()) {
adaptedBlocks[i] = new LazyBlock(retainedRowCount, new RowFilterLazyBlockLoader(block, ids));
}
else {
adaptedBlocks[i] = block.getPositions(ids.elements(), 0, retainedRowCount);
}
}
return new Page(retainedRowCount, adaptedBlocks);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy