org.apache.hadoop.hive.ql.txn.compactor.CompactorThread Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.txn.compactor;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.MetaStoreThread;
import org.apache.hadoop.hive.metastore.RawStore;
import org.apache.hadoop.hive.metastore.RawStoreProxy;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
import org.apache.hadoop.hive.metastore.txn.CompactionTxnHandler;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* Superclass for all threads in the compactor.
*/
abstract class CompactorThread extends Thread implements MetaStoreThread {
static final private String CLASS_NAME = CompactorThread.class.getName();
static final private Log LOG = LogFactory.getLog(CLASS_NAME);
protected HiveConf conf;
protected CompactionTxnHandler txnHandler;
protected RawStore rs;
protected int threadId;
protected AtomicBoolean stop;
protected AtomicBoolean looped;
@Override
public void setHiveConf(HiveConf conf) {
this.conf = conf;
}
@Override
public void setThreadId(int threadId) {
this.threadId = threadId;
}
@Override
public void init(AtomicBoolean stop, AtomicBoolean looped) throws MetaException {
this.stop = stop;
this.looped = looped;
setPriority(MIN_PRIORITY);
setDaemon(true); // this means the process will exit without waiting for this thread
// Get our own instance of the transaction handler
txnHandler = new CompactionTxnHandler(conf);
// Get our own connection to the database so we can get table and partition information.
rs = RawStoreProxy.getProxy(conf, conf,
conf.getVar(HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL), threadId);
}
/**
* Find the table being compacted
* @param ci compaction info returned from the compaction queue
* @return metastore table
* @throws org.apache.hadoop.hive.metastore.api.MetaException if the table cannot be found.
*/
protected Table resolveTable(CompactionInfo ci) throws MetaException {
try {
return rs.getTable(ci.dbname, ci.tableName);
} catch (MetaException e) {
LOG.error("Unable to find table " + ci.getFullTableName() + ", " + e.getMessage());
throw e;
}
}
/**
* Get the partition being compacted.
* @param ci compaction info returned from the compaction queue
* @return metastore partition, or null if there is not partition in this compaction info
* @throws Exception if underlying calls throw, or if the partition name resolves to more than
* one partition.
*/
protected Partition resolvePartition(CompactionInfo ci) throws Exception {
if (ci.partName != null) {
List parts = null;
try {
parts = rs.getPartitionsByNames(ci.dbname, ci.tableName,
Collections.singletonList(ci.partName));
if (parts == null || parts.size() == 0) {
// The partition got dropped before we went looking for it.
return null;
}
} catch (Exception e) {
LOG.error("Unable to find partition " + ci.getFullPartitionName() + ", " + e.getMessage());
throw e;
}
if (parts.size() != 1) {
LOG.error(ci.getFullPartitionName() + " does not refer to a single partition");
throw new MetaException("Too many partitions");
}
return parts.get(0);
} else {
return null;
}
}
/**
* Get the storage descriptor for a compaction.
* @param t table from {@link #resolveTable(org.apache.hadoop.hive.metastore.txn.CompactionInfo)}
* @param p table from {@link #resolvePartition(org.apache.hadoop.hive.metastore.txn.CompactionInfo)}
* @return metastore storage descriptor.
*/
protected StorageDescriptor resolveStorageDescriptor(Table t, Partition p) {
return (p == null) ? t.getSd() : p.getSd();
}
/**
* Determine which user to run an operation as, based on the owner of the directory to be
* compacted. It is asserted that either the user running the hive metastore or the table
* owner must be able to stat the directory and determine the owner.
* @param location directory that will be read or written to.
* @param t metastore table object
* @return username of the owner of the location.
* @throws java.io.IOException if neither the hive metastore user nor the table owner can stat
* the location.
*/
protected String findUserToRunAs(String location, Table t) throws IOException,
InterruptedException {
LOG.debug("Determining who to run the job as.");
final Path p = new Path(location);
final FileSystem fs = p.getFileSystem(conf);
try {
FileStatus stat = fs.getFileStatus(p);
LOG.debug("Running job as " + stat.getOwner());
return stat.getOwner();
} catch (AccessControlException e) {
// TODO not sure this is the right exception
LOG.debug("Unable to stat file as current user, trying as table owner");
// Now, try it as the table owner and see if we get better luck.
final List wrapper = new ArrayList(1);
UserGroupInformation ugi = UserGroupInformation.createProxyUser(t.getOwner(),
UserGroupInformation.getLoginUser());
ugi.doAs(new PrivilegedExceptionAction