org.apache.iceberg.hive.CachedClientPool Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of iceberg-hive-metastore Show documentation
Show all versions of iceberg-hive-metastore Show documentation
A table format for huge analytic datasets
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.hive;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.Scheduler;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.ClientPool;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.util.PropertyUtil;
import org.apache.iceberg.util.ThreadPools;
import org.apache.thrift.TException;
import org.immutables.value.Value;
/**
* A ClientPool that caches the underlying HiveClientPool instances.
*
* The following key elements are supported and can be specified via {@link
* CatalogProperties#CLIENT_POOL_CACHE_KEYS}:
*
*
* - ugi - the Hadoop UserGroupInformation instance that represents the current user using the
* cache.
*
- user_name - similar to UGI but only includes the user's name determined by
* UserGroupInformation#getUserName.
*
- conf - name of an arbitrary configuration. The value of the configuration will be extracted
* from catalog properties and added to the cache key. A conf element should start with a
* "conf:" prefix which is followed by the configuration name. E.g. specifying "conf:a.b.c"
* will add "a.b.c" to the key, and so that configurations with different default catalog
* wouldn't share the same client pool. Multiple conf elements can be specified.
*
*/
public class CachedClientPool implements ClientPool {
private static final String CONF_ELEMENT_PREFIX = "conf:";
private static Cache clientPoolCache;
private final Configuration conf;
private final int clientPoolSize;
private final long evictionInterval;
private final Key key;
CachedClientPool(Configuration conf, Map properties) {
this.conf = conf;
this.clientPoolSize =
PropertyUtil.propertyAsInt(
properties,
CatalogProperties.CLIENT_POOL_SIZE,
CatalogProperties.CLIENT_POOL_SIZE_DEFAULT);
this.evictionInterval =
PropertyUtil.propertyAsLong(
properties,
CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS,
CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS_DEFAULT);
this.key = extractKey(properties.get(CatalogProperties.CLIENT_POOL_CACHE_KEYS), conf);
init();
}
@VisibleForTesting
HiveClientPool clientPool() {
return clientPoolCache.get(key, k -> new HiveClientPool(clientPoolSize, conf));
}
private synchronized void init() {
if (clientPoolCache == null) {
// Since Caffeine does not ensure that removalListener will be involved after expiration
// We use a scheduler with one thread to clean up expired clients.
clientPoolCache =
Caffeine.newBuilder()
.expireAfterAccess(evictionInterval, TimeUnit.MILLISECONDS)
.removalListener((ignored, value, cause) -> ((HiveClientPool) value).close())
.scheduler(
Scheduler.forScheduledExecutorService(
ThreadPools.newScheduledPool("hive-metastore-cleaner", 1)))
.build();
}
}
@VisibleForTesting
static Cache clientPoolCache() {
return clientPoolCache;
}
@Override
public R run(Action action)
throws TException, InterruptedException {
return clientPool().run(action);
}
@Override
public R run(Action action, boolean retry)
throws TException, InterruptedException {
return clientPool().run(action, retry);
}
@VisibleForTesting
static Key extractKey(String cacheKeys, Configuration conf) {
// generate key elements in a certain order, so that the Key instances are comparable
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy