All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.facebook.presto.hudi.HudiPageSourceProvider Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hudi;
import com.facebook.presto.common.RuntimeStats;
import com.facebook.presto.common.predicate.TupleDomain;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.hive.FileFormatDataSourceStats;
import com.facebook.presto.hive.HdfsContext;
import com.facebook.presto.hive.HdfsEnvironment;
import com.facebook.presto.hive.metastore.Column;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ConnectorPageSource;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorSplit;
import com.facebook.presto.spi.ConnectorTableLayoutHandle;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.RecordCursor;
import com.facebook.presto.spi.RecordPageSource;
import com.facebook.presto.spi.SplitContext;
import com.facebook.presto.spi.connector.ConnectorPageSourceProvider;
import com.facebook.presto.spi.connector.ConnectorTransactionHandle;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import javax.inject.Inject;
import java.time.ZoneId;
import java.util.List;
import java.util.Optional;
import java.util.Properties;
import static com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema;
import static com.facebook.presto.hudi.HudiErrorCode.HUDI_CANNOT_OPEN_SPLIT;
import static com.facebook.presto.hudi.HudiParquetPageSources.createParquetPageSource;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
public class HudiPageSourceProvider
implements ConnectorPageSourceProvider
{
private final HdfsEnvironment hdfsEnvironment;
private final FileFormatDataSourceStats fileFormatDataSourceStats;
private final TypeManager typeManager;
@Inject
public HudiPageSourceProvider(
HdfsEnvironment hdfsEnvironment,
FileFormatDataSourceStats fileFormatDataSourceStats,
TypeManager typeManager)
{
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.fileFormatDataSourceStats = requireNonNull(fileFormatDataSourceStats, "fileFormatDataSourceStats is null");
this.typeManager = requireNonNull(typeManager, "typeManager is null");
}
@Override
public ConnectorPageSource createPageSource(
ConnectorTransactionHandle transactionHandle,
ConnectorSession session,
ConnectorSplit split,
ConnectorTableLayoutHandle layoutHandle,
List columns,
SplitContext splitContext,
RuntimeStats runtimeStats)
{
HudiTableLayoutHandle layout = (HudiTableLayoutHandle) layoutHandle;
HudiSplit hudiSplit = (HudiSplit) split;
HudiTableType tableType = layout.getTable().getTableType();
List hudiColumnHandles = columns.stream().map(HudiColumnHandle.class::cast).collect(toList());
List dataColumns = hudiColumnHandles.stream().filter(HudiColumnHandle::isRegularColumn).collect(toList());
final ConnectorPageSource dataColumnPageSource;
if (tableType == HudiTableType.COW) {
HudiFile baseFile = hudiSplit.getBaseFile().orElseThrow(() ->
new PrestoException(HUDI_CANNOT_OPEN_SPLIT, "Split without base file is invalid"));
Path path = new Path(baseFile.getPath());
Configuration configuration = hdfsEnvironment.getConfiguration(
new HdfsContext(session,
layout.getTable().getSchemaName(),
layout.getTable().getTableName(),
baseFile.getPath(),
false),
path);
dataColumnPageSource = createParquetPageSource(
typeManager,
hdfsEnvironment,
session,
configuration,
path,
baseFile.getStart(),
baseFile.getLength(),
dataColumns,
TupleDomain.all(), // TODO: predicates
fileFormatDataSourceStats);
}
else if (tableType == HudiTableType.MOR) {
Properties schema = getHiveSchema(
hudiSplit.getPartition().getStorage(),
toMetastoreColumns(hudiSplit.getPartition().getDataColumns()),
toMetastoreColumns(layout.getDataColumns()),
layout.getTableParameters(),
layout.getTable().getSchemaName(),
layout.getTable().getTableName(),
layout.getPartitionColumns().stream().map(HudiColumnHandle::getName).collect(toImmutableList()),
layout.getPartitionColumns().stream().map(HudiColumnHandle::getHiveType).collect(toImmutableList()));
RecordCursor recordCursor = HudiRecordCursors.createRealtimeRecordCursor(
hdfsEnvironment,
session,
schema,
hudiSplit,
dataColumns,
ZoneId.of("UTC"), // TODO configurable
typeManager);
List types = dataColumns.stream()
.map(column -> column.getHiveType().getType(typeManager))
.collect(toImmutableList());
dataColumnPageSource = new RecordPageSource(types, recordCursor);
}
else {
throw new PrestoException(NOT_SUPPORTED, "Could not create page source for table type " + tableType);
}
return new HudiPageSource(
hudiColumnHandles,
hudiSplit.getPartition().getKeyValues(),
dataColumnPageSource,
session.getSqlFunctionProperties().getTimeZoneKey(),
typeManager);
}
private static List toMetastoreColumns(List hudiColumnHandles)
{
return hudiColumnHandles.stream()
.map(column -> new Column(column.getName(), column.getHiveType(), Optional.empty(), Optional.empty()))
.collect(toImmutableList());
}
}