Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.txn.compactor;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.CompactionRequest;
import org.apache.hadoop.hive.metastore.api.CompactionType;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.ShowCompactRequest;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
import org.apache.hadoop.hive.metastore.txn.CompactionTxnHandler;
import org.apache.hadoop.hive.metastore.txn.TxnHandler;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* A class to initiate compactions. This will run in a separate thread.
*/
public class Initiator extends CompactorThread {
static final private String CLASS_NAME = Initiator.class.getName();
static final private Log LOG = LogFactory.getLog(CLASS_NAME);
private long checkInterval;
@Override
public void run() {
// Make sure nothing escapes this run method and kills the metastore at large,
// so wrap it in a big catch Throwable statement.
try {
recoverFailedCompactions(false);
int abortedThreshold = HiveConf.getIntVar(conf,
HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD);
// Make sure we run through the loop once before checking to stop as this makes testing
// much easier. The stop value is only for testing anyway and not used when called from
// HiveMetaStore.
do {
long startedAt = System.currentTimeMillis();
// Wrap the inner parts of the loop in a catch throwable so that any errors in the loop
// don't doom the entire thread.
try {
ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest());
ValidTxnList txns =
CompactionTxnHandler.createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
Set potentials = txnHandler.findPotentialCompactions(abortedThreshold);
LOG.debug("Found " + potentials.size() + " potential compactions, " +
"checking to see if we should compact any of them");
for (CompactionInfo ci : potentials) {
LOG.info("Checking to see if we should compact " + ci.getFullPartitionName());
try {
Table t = resolveTable(ci);
if (t == null) {
// Most likely this means it's a temp table
LOG.info("Can't find table " + ci.getFullTableName() + ", assuming it's a temp " +
"table or has been dropped and moving on.");
continue;
}
// check if no compaction set for this table
if (noAutoCompactSet(t)) {
LOG.info("Table " + tableName(t) + " marked true so we will not compact it.");
continue;
}
// Check to see if this is a table level request on a partitioned table. If so,
// then it's a dynamic partitioning case and we shouldn't check the table itself.
if (t.getPartitionKeys() != null && t.getPartitionKeys().size() > 0 &&
ci.partName == null) {
LOG.debug("Skipping entry for " + ci.getFullTableName() + " as it is from dynamic" +
" partitioning");
continue;
}
// Check if we already have initiated or are working on a compaction for this partition
// or table. If so, skip it. If we are just waiting on cleaning we can still check,
// as it may be time to compact again even though we haven't cleaned.
if (lookForCurrentCompactions(currentCompactions, ci)) {
LOG.debug("Found currently initiated or working compaction for " +
ci.getFullPartitionName() + " so we will not initiate another compaction");
continue;
}
// Figure out who we should run the file operations as
Partition p = resolvePartition(ci);
if (p == null && ci.partName != null) {
LOG.info("Can't find partition " + ci.getFullPartitionName() +
", assuming it has been dropped and moving on.");
continue;
}
StorageDescriptor sd = resolveStorageDescriptor(t, p);
String runAs = findUserToRunAs(sd.getLocation(), t);
CompactionType compactionNeeded = checkForCompaction(ci, txns, sd, runAs);
if (compactionNeeded != null) requestCompaction(ci, runAs, compactionNeeded);
} catch (Throwable t) {
LOG.error("Caught exception while trying to determine if we should compact " +
ci.getFullPartitionName() + ". Marking clean to avoid repeated failures, " +
"" + StringUtils.stringifyException(t));
txnHandler.markCleaned(ci);
}
}
// Check for timed out remote workers.
recoverFailedCompactions(true);
// Clean anything from the txns table that has no components left in txn_components.
txnHandler.cleanEmptyAbortedTxns();
} catch (Throwable t) {
LOG.error("Initiator loop caught unexpected exception this time through the loop: " +
StringUtils.stringifyException(t));
}
long elapsedTime = System.currentTimeMillis() - startedAt;
if (elapsedTime >= checkInterval || stop.get()) continue;
else Thread.sleep(checkInterval - elapsedTime);
} while (!stop.get());
} catch (Throwable t) {
LOG.error("Caught an exception in the main loop of compactor initiator, exiting " +
StringUtils.stringifyException(t));
}
}
@Override
public void init(AtomicBoolean stop, AtomicBoolean looped) throws MetaException {
super.init(stop, looped);
checkInterval =
conf.getTimeVar(HiveConf.ConfVars.HIVE_COMPACTOR_CHECK_INTERVAL, TimeUnit.MILLISECONDS) ;
}
private void recoverFailedCompactions(boolean remoteOnly) throws MetaException {
if (!remoteOnly) txnHandler.revokeFromLocalWorkers(Worker.hostname());
txnHandler.revokeTimedoutWorkers(HiveConf.getTimeVar(conf,
HiveConf.ConfVars.HIVE_COMPACTOR_WORKER_TIMEOUT, TimeUnit.MILLISECONDS));
}
// Figure out if there are any currently running compactions on the same table or partition.
private boolean lookForCurrentCompactions(ShowCompactResponse compactions,
CompactionInfo ci) {
if (compactions.getCompacts() != null) {
for (ShowCompactResponseElement e : compactions.getCompacts()) {
if (!e.getState().equals(TxnHandler.CLEANING_RESPONSE) &&
e.getDbname().equals(ci.dbname) &&
e.getTablename().equals(ci.tableName) &&
(e.getPartitionname() == null && ci.partName == null ||
e.getPartitionname().equals(ci.partName))) {
return true;
}
}
}
return false;
}
private CompactionType checkForCompaction(final CompactionInfo ci,
final ValidTxnList txns,
final StorageDescriptor sd,
final String runAs)
throws IOException, InterruptedException {
// If it's marked as too many aborted, we already know we need to compact
if (ci.tooManyAborts) {
LOG.debug("Found too many aborted transactions for " + ci.getFullPartitionName() + ", " +
"initiating major compaction");
return CompactionType.MAJOR;
}
if (runJobAsSelf(runAs)) {
return determineCompactionType(ci, txns, sd);
} else {
LOG.info("Going to initiate as user " + runAs);
UserGroupInformation ugi = UserGroupInformation.createProxyUser(runAs,
UserGroupInformation.getLoginUser());
return ugi.doAs(new PrivilegedExceptionAction() {
@Override
public CompactionType run() throws Exception {
return determineCompactionType(ci, txns, sd);
}
});
}
}
private CompactionType determineCompactionType(CompactionInfo ci, ValidTxnList txns,
StorageDescriptor sd)
throws IOException, InterruptedException {
boolean noBase = false;
Path location = new Path(sd.getLocation());
FileSystem fs = location.getFileSystem(conf);
AcidUtils.Directory dir = AcidUtils.getAcidState(location, conf, txns);
Path base = dir.getBaseDirectory();
long baseSize = 0;
FileStatus stat = null;
if (base != null) {
stat = fs.getFileStatus(base);
if (!stat.isDir()) {
LOG.error("Was assuming base " + base.toString() + " is directory, but it's a file!");
return null;
}
baseSize = sumDirSize(fs, base);
}
List originals = dir.getOriginalFiles();
for (FileStatus origStat : originals) {
baseSize += origStat.getLen();
}
long deltaSize = 0;
List deltas = dir.getCurrentDirectories();
for (AcidUtils.ParsedDelta delta : deltas) {
stat = fs.getFileStatus(delta.getPath());
if (!stat.isDir()) {
LOG.error("Was assuming delta " + delta.getPath().toString() + " is a directory, " +
"but it's a file!");
return null;
}
deltaSize += sumDirSize(fs, delta.getPath());
}
if (baseSize == 0 && deltaSize > 0) {
noBase = true;
} else {
float deltaPctThreshold = HiveConf.getFloatVar(conf,
HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_PCT_THRESHOLD);
boolean bigEnough = (float)deltaSize/(float)baseSize > deltaPctThreshold;
if (LOG.isDebugEnabled()) {
StringBuffer msg = new StringBuffer("delta size: ");
msg.append(deltaSize);
msg.append(" base size: ");
msg.append(baseSize);
msg.append(" threshold: ");
msg.append(deltaPctThreshold);
msg.append(" will major compact: ");
msg.append(bigEnough);
LOG.debug(msg);
}
if (bigEnough) return CompactionType.MAJOR;
}
int deltaNumThreshold = HiveConf.getIntVar(conf,
HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD);
boolean enough = deltas.size() > deltaNumThreshold;
if (enough) {
LOG.debug("Found " + deltas.size() + " delta files, threshold is " + deltaNumThreshold +
(enough ? "" : "not") + " and no base, requesting " + (noBase ? "major" : "minor") +
" compaction");
// If there's no base file, do a major compaction
return noBase ? CompactionType.MAJOR : CompactionType.MINOR;
}
return null;
}
private long sumDirSize(FileSystem fs, Path dir) throws IOException {
long size = 0;
FileStatus[] buckets = fs.listStatus(dir, FileUtils.HIDDEN_FILES_PATH_FILTER);
for (int i = 0; i < buckets.length; i++) {
size += buckets[i].getLen();
}
return size;
}
private void requestCompaction(CompactionInfo ci, String runAs, CompactionType type) throws MetaException {
String s = "Requesting " + type.toString() + " compaction for " + ci.getFullPartitionName();
LOG.info(s);
CompactionRequest rqst = new CompactionRequest(ci.dbname, ci.tableName, type);
if (ci.partName != null) rqst.setPartitionname(ci.partName);
rqst.setRunas(runAs);
txnHandler.compact(rqst);
}
// Because TABLE_NO_AUTO_COMPACT was originally assumed to be NO_AUTO_COMPACT and then was moved
// to no_auto_compact, we need to check it in both cases.
private boolean noAutoCompactSet(Table t) {
String noAutoCompact =
t.getParameters().get(hive_metastoreConstants.TABLE_NO_AUTO_COMPACT);
if (noAutoCompact == null) {
noAutoCompact =
t.getParameters().get(hive_metastoreConstants.TABLE_NO_AUTO_COMPACT.toUpperCase());
}
return noAutoCompact != null && noAutoCompact.equalsIgnoreCase("true");
}
}