com.facebook.presto.hive.HivePartitionManager Maven / Gradle / Ivy
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;
import com.facebook.airlift.concurrent.ThreadPoolExecutorMBean;
import com.facebook.airlift.log.Logger;
import com.facebook.presto.common.predicate.Domain;
import com.facebook.presto.common.predicate.NullableValue;
import com.facebook.presto.common.predicate.TupleDomain;
import com.facebook.presto.common.type.CharType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.TypeManager;
import com.facebook.presto.common.type.VarcharType;
import com.facebook.presto.hive.HiveBucketing.HiveBucketFilter;
import com.facebook.presto.hive.metastore.Column;
import com.facebook.presto.hive.metastore.MetastoreContext;
import com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore;
import com.facebook.presto.hive.metastore.Table;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorTableHandle;
import com.facebook.presto.spi.Constraint;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.TableNotFoundException;
import com.google.common.base.Predicates;
import com.google.common.base.VerifyException;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import org.joda.time.DateTimeZone;
import org.weakref.jmx.Managed;
import org.weakref.jmx.Nested;
import javax.inject.Inject;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import static com.facebook.airlift.concurrent.Threads.daemonThreadsNamed;
import static com.facebook.presto.hive.HiveBucketing.getHiveBucketFilter;
import static com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle;
import static com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_EXCEEDED_PARTITION_LIMIT;
import static com.facebook.presto.hive.HiveSessionProperties.getMaxBucketsForGroupedExecution;
import static com.facebook.presto.hive.HiveSessionProperties.getMinBucketCountToNotIgnoreTableBucketing;
import static com.facebook.presto.hive.HiveSessionProperties.isLegacyTimestampBucketing;
import static com.facebook.presto.hive.HiveSessionProperties.isOfflineDataDebugModeEnabled;
import static com.facebook.presto.hive.HiveSessionProperties.isParallelParsingOfPartitionValuesEnabled;
import static com.facebook.presto.hive.HiveSessionProperties.shouldIgnoreTableBucketing;
import static com.facebook.presto.hive.HiveUtil.getPartitionKeyColumnHandles;
import static com.facebook.presto.hive.HiveUtil.parsePartitionValue;
import static com.facebook.presto.hive.metastore.MetastoreUtil.extractPartitionValues;
import static com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders;
import static com.facebook.presto.hive.metastore.MetastoreUtil.getProtectMode;
import static com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled;
import static com.facebook.presto.hive.metastore.MetastoreUtil.makePartName;
import static com.facebook.presto.hive.metastore.MetastoreUtil.verifyOnline;
import static com.facebook.presto.hive.metastore.PrestoTableType.TEMPORARY_TABLE;
import static com.facebook.presto.spi.Constraint.alwaysTrue;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Predicates.not;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
import static com.google.common.util.concurrent.MoreExecutors.listeningDecorator;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
public class HivePartitionManager
{
private static final Logger log = Logger.get(HivePartitionManager.class);
private static final int PARTITION_NAMES_BATCH_SIZE = 500;
private final DateTimeZone timeZone;
private final boolean assumeCanonicalPartitionKeys;
private final TypeManager typeManager;
private final int maxPartitionsPerScan;
private final int domainCompactionThreshold;
private final boolean partitionFilteringFromMetastoreEnabled;
private final ListeningExecutorService executorService;
private final ThreadPoolExecutorMBean executorServiceMBean;
@Inject
public HivePartitionManager(
TypeManager typeManager,
HiveClientConfig hiveClientConfig)
{
this(
typeManager,
hiveClientConfig.getDateTimeZone(),
hiveClientConfig.isAssumeCanonicalPartitionKeys(),
hiveClientConfig.getMaxPartitionsPerScan(),
hiveClientConfig.getDomainCompactionThreshold(),
hiveClientConfig.isPartitionFilteringFromMetastoreEnabled(),
hiveClientConfig.getMaxParallelParsingConcurrency());
}
public HivePartitionManager(
TypeManager typeManager,
DateTimeZone timeZone,
boolean assumeCanonicalPartitionKeys,
int maxPartitionsPerScan,
int domainCompactionThreshold,
boolean partitionFilteringFromMetastoreEnabled,
int maxParallelParsingConcurrency)
{
this.timeZone = requireNonNull(timeZone, "timeZone is null");
this.assumeCanonicalPartitionKeys = assumeCanonicalPartitionKeys;
this.typeManager = requireNonNull(typeManager, "typeManager is null");
this.maxPartitionsPerScan = maxPartitionsPerScan;
checkArgument(domainCompactionThreshold >= 1, "domainCompactionThreshold must be at least 1");
this.domainCompactionThreshold = domainCompactionThreshold;
this.partitionFilteringFromMetastoreEnabled = partitionFilteringFromMetastoreEnabled;
ExecutorService threadPoolExecutor = new ThreadPoolExecutor(0, maxParallelParsingConcurrency,
60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), daemonThreadsNamed("partition-value-parser-%s"));
this.executorService = listeningDecorator(threadPoolExecutor);
this.executorServiceMBean = new ThreadPoolExecutorMBean((ThreadPoolExecutor) threadPoolExecutor);
}
public List getPartitionsList(
SemiTransactionalHiveMetastore metastore,
ConnectorTableHandle tableHandle,
Constraint constraint,
ConnectorSession session)
{
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
TupleDomain effectivePredicateColumnHandles = constraint.getSummary();
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Table table = getTable(session, metastore, hiveTableHandle, isOfflineDataDebugModeEnabled(session));
List partitionColumns = getPartitionKeyColumnHandles(table);
List partitionTypes = partitionColumns.stream()
.map(column -> typeManager.getType(column.getTypeSignature()))
.collect(toList());
Map effectivePredicate = createPartitionPredicates(
metastore,
session,
effectivePredicateColumnHandles,
partitionColumns,
assumeCanonicalPartitionKeys);
if (partitionColumns.isEmpty()) {
return ImmutableList.of(new HivePartition(tableName));
}
else {
List partitionNames = partitionFilteringFromMetastoreEnabled ? getFilteredPartitionNames(session, metastore, hiveTableHandle, effectivePredicate) : getAllPartitionNames(session, metastore, hiveTableHandle, constraint);
if (isParallelParsingOfPartitionValuesEnabled(session) && partitionNames.size() > PARTITION_NAMES_BATCH_SIZE) {
List> partitionNameBatches = Lists.partition(partitionNames, PARTITION_NAMES_BATCH_SIZE);
// Use ConcurrentLinkedQueue to prevent race condition when multiple threads try to add partitions to this list
ConcurrentLinkedQueue result = new ConcurrentLinkedQueue<>();
List> futures = new ArrayList<>();
try {
partitionNameBatches.forEach(batch -> futures.add(executorService.submit(() -> result.addAll(getPartitionListFromPartitionNames(batch, tableName, partitionColumns, partitionTypes, constraint)))));
Futures.transform(Futures.allAsList(futures), input -> result, directExecutor()).get();
return Arrays.asList(result.toArray(new HivePartition[0]));
}
catch (InterruptedException | ExecutionException e) {
log.error(e, "Parallel parsing of partition values failed");
}
}
return getPartitionListFromPartitionNames(partitionNames, tableName, partitionColumns, partitionTypes, constraint);
}
}
private List getPartitionListFromPartitionNames(
List partitionNames,
SchemaTableName tableName,
List partitionColumns,
List partitionTypes,
Constraint constraint)
{
return partitionNames.stream()
// Apply extra filters which could not be done by getFilteredPartitionNames
.map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, constraint))
.filter(Optional::isPresent)
.map(Optional::get)
.collect(toImmutableList());
}
private Map createPartitionPredicates(
SemiTransactionalHiveMetastore metastore,
ConnectorSession session,
TupleDomain effectivePredicateColumnHandles,
List partitionColumns,
boolean assumeCanonicalPartitionKeys)
{
Optional