org.apache.gobblin.hive.HiveMetastoreClientPool Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gobblin-hive-registration Show documentation
Show all versions of gobblin-hive-registration Show documentation
A distributed data integration framework for streaming and batch data ecosystems.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.gobblin.hive;
import java.io.IOException;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import org.apache.commons.pool2.impl.GenericObjectPool;
import org.apache.commons.pool2.impl.GenericObjectPoolConfig;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import com.google.common.base.Optional;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.RemovalListener;
import com.google.common.cache.RemovalNotification;
import com.google.common.collect.Maps;
import com.google.common.io.Closer;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.util.AutoReturnableObject;
import lombok.Getter;
/**
* A pool of {@link IMetaStoreClient} for querying the Hive metastore.
*/
public class HiveMetastoreClientPool {
private final GenericObjectPool pool;
private final HiveMetaStoreClientFactory factory;
@Getter
private final HiveConf hiveConf;
@Getter
private final HiveRegProps hiveRegProps;
private static final long DEFAULT_POOL_CACHE_TTL_MINUTES = 30;
public static final String POOL_CACHE_TTL_MINUTES_KEY = "hive.metaStorePoolCache.ttl";
private static Cache, HiveMetastoreClientPool> poolCache = null;
private static final Cache, HiveMetastoreClientPool> createPoolCache(final Properties properties) {
long duration = properties.containsKey(POOL_CACHE_TTL_MINUTES_KEY)
? Long.parseLong(properties.getProperty(POOL_CACHE_TTL_MINUTES_KEY)) : DEFAULT_POOL_CACHE_TTL_MINUTES;
return CacheBuilder.newBuilder()
.expireAfterAccess(duration, TimeUnit.MINUTES)
.removalListener(new RemovalListener, HiveMetastoreClientPool>() {
@Override
public void onRemoval(RemovalNotification, HiveMetastoreClientPool> notification) {
if (notification.getValue() != null) {
notification.getValue().close();
}
}
}).build();
}
/**
* Get a {@link HiveMetastoreClientPool} for the requested metastore URI. Useful for using the same pools across
* different classes in the code base. Note that if a pool already exists for that metastore, the max number of
* objects available will be unchanged, and it might be lower than requested by this method.
*
* @param properties {@link Properties} used to generate the pool.
* @param metastoreURI URI of the Hive metastore. If absent, use default metastore.
* @return a {@link HiveMetastoreClientPool}.
* @throws IOException
*/
public static HiveMetastoreClientPool get(final Properties properties, final Optional metastoreURI)
throws IOException {
synchronized (HiveMetastoreClientPool.class) {
if (poolCache == null) {
poolCache = createPoolCache(properties);
}
}
try {
return poolCache.get(metastoreURI, new Callable() {
@Override
public HiveMetastoreClientPool call() throws Exception {
return new HiveMetastoreClientPool(properties, metastoreURI);
}
});
} catch (ExecutionException ee) {
throw new IOException("Failed to get " + HiveMetastoreClientPool.class.getSimpleName(), ee.getCause());
}
}
/**
* Constructor for {@link HiveMetastoreClientPool}.
* @deprecated It is recommended to use the static {@link #get} method instead. Use this constructor only if you
* different pool configurations are required.
*/
@Deprecated
public HiveMetastoreClientPool(Properties properties, Optional metastoreURI) {
this.hiveRegProps = new HiveRegProps(new State(properties));
GenericObjectPoolConfig config = new GenericObjectPoolConfig();
config.setMaxTotal(this.hiveRegProps.getNumThreads());
config.setMaxIdle(this.hiveRegProps.getNumThreads());
this.factory = new HiveMetaStoreClientFactory(metastoreURI);
this.pool = new GenericObjectPool<>(this.factory, config);
this.hiveConf = this.factory.getHiveConf();
}
public void close() {
this.pool.close();
}
/**
* @return an auto returnable wrapper around a {@link IMetaStoreClient}.
* @throws IOException
* Note: if you must acquire multiple locks, please use {@link #safeGetClients} instead, as this call may deadlock.
*/
public AutoReturnableObject getClient() throws IOException {
return new AutoReturnableObject<>(this.pool);
}
/**
* A class wrapping multiple named {@link IMetaStoreClient}s.
*/
public static class MultiClient implements AutoCloseable {
private final Map> clients;
private final Closer closer;
private MultiClient(Map namedPools) throws IOException {
this.clients = Maps.newHashMap();
this.closer = Closer.create();
Map requiredClientsPerPool = Maps.newHashMap();
for (Map.Entry entry : namedPools.entrySet()) {
if (requiredClientsPerPool.containsKey(entry.getValue())) {
requiredClientsPerPool.put(entry.getValue(), requiredClientsPerPool.get(entry.getValue()) + 1);
} else {
requiredClientsPerPool.put(entry.getValue(), 1);
}
}
for (Map.Entry entry : requiredClientsPerPool.entrySet()) {
if (entry.getKey().pool.getMaxTotal() < entry.getValue()) {
throw new IOException(
String.format("Not enough clients available in the pool. Required %d, max available %d.",
entry.getValue(), entry.getKey().pool.getMaxTotal()));
}
}
for (Map.Entry entry : namedPools.entrySet()) {
this.clients.put(entry.getKey(), this.closer.register(entry.getValue().getClient()));
}
}
/**
* Get the {@link IMetaStoreClient} with the provided name.
* @throws IOException
*/
public IMetaStoreClient getClient(String name) throws IOException {
if (!this.clients.containsKey(name)) {
throw new IOException("There is no client with name " + name);
}
return this.clients.get(name).get();
}
@Override
public void close() throws IOException {
this.closer.close();
}
}
/**
* A method to get multiple {@link IMetaStoreClient}s while preventing deadlocks.
* @param namedPools A map from String to {@link HiveMetastoreClientPool}.
* @return a {@link MultiClient} with a {@link IMetaStoreClient} for each entry in the input map. The client can
* be retrieved by its name in the input map.
* @throws IOException
*/
public static synchronized MultiClient safeGetClients(Map namedPools)
throws IOException {
return new MultiClient(namedPools);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy