Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.metastore.tools;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.facebook.presto.hive.$internal.org.apache.commons.cli.CommandLine;
import com.facebook.presto.hive.$internal.org.apache.commons.cli.CommandLineParser;
import com.facebook.presto.hive.$internal.org.apache.commons.cli.GnuParser;
import com.facebook.presto.hive.$internal.org.apache.commons.cli.HelpFormatter;
import com.facebook.presto.hive.$internal.org.apache.commons.cli.Option;
import com.facebook.presto.hive.$internal.org.apache.commons.cli.OptionBuilder;
import com.facebook.presto.hive.$internal.org.apache.commons.cli.Options;
import com.facebook.presto.hive.$internal.org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.utils.StringUtils;
import org.apache.orc.OrcFile;
import org.apache.orc.Reader;
import org.apache.orc.impl.AcidStats;
import org.apache.orc.impl.OrcAcidUtils;
import org.apache.thrift.TException;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.ObjectStore;
/**
* This class provides Hive admins a tool to
* - execute JDOQL against the metastore using DataNucleus
* - perform HA name node upgrade
*/
public class HiveMetaTool {
private static final Logger LOG = LoggerFactory.getLogger(HiveMetaTool.class.getName());
private final Options cmdLineOptions = new Options();
private ObjectStore objStore;
private boolean isObjStoreInitialized;
public HiveMetaTool() {
this.isObjStoreInitialized = false;
}
@SuppressWarnings("static-access")
private void init() {
System.out.println("Initializing HiveMetaTool..");
Option help = new Option("help", "print this message");
Option listFSRoot = new Option("listFSRoot", "print the current FS root locations");
Option executeJDOQL =
OptionBuilder.withArgName("query-string")
.hasArgs()
.withDescription("execute the given JDOQL query")
.create("executeJDOQL");
/* Ideally we want to specify the different arguments to updateLocation as separate argNames.
* However if we did that, HelpFormatter swallows all but the last argument. Note that this is
* a know issue with the HelpFormatter class that has not been fixed. We specify all arguments
* with a single argName to workaround this HelpFormatter bug.
*/
Option updateFSRootLoc =
OptionBuilder
.withArgName("new-loc> " + " hdfsRoots = objStore.listFSRoots();
if (hdfsRoots != null) {
System.out.println("Listing FS Roots..");
for (String s : hdfsRoots) {
System.out.println(s);
}
} else {
System.err.println("Encountered error during listFSRoot - " +
"commit of JDO transaction failed");
}
}
private void executeJDOQLSelect(String query) {
Configuration conf = MetastoreConf.newMetastoreConf();
initObjectStore(conf);
System.out.println("Executing query: " + query);
try (ObjectStore.QueryWrapper queryWrapper = new ObjectStore.QueryWrapper()) {
Collection> result = objStore.executeJDOQLSelect(query, queryWrapper);
if (result != null) {
Iterator> iter = result.iterator();
while (iter.hasNext()) {
Object o = iter.next();
System.out.println(o.toString());
}
} else {
System.err.println("Encountered error during executeJDOQLSelect -" +
"commit of JDO transaction failed.");
}
}
}
private void executeJDOQLUpdate(String query) {
Configuration conf = MetastoreConf.newMetastoreConf();
initObjectStore(conf);
System.out.println("Executing query: " + query);
long numUpdated = objStore.executeJDOQLUpdate(query);
if (numUpdated >= 0) {
System.out.println("Number of records updated: " + numUpdated);
} else {
System.err.println("Encountered error during executeJDOQL -" +
"commit of JDO transaction failed.");
}
}
private int printUpdateLocations(Map updateLocations) {
int count = 0;
for (String key: updateLocations.keySet()) {
String value = updateLocations.get(key);
System.out.println("old location: " + key + " new location: " + value);
count++;
}
return count;
}
private void printTblURIUpdateSummary(ObjectStore.UpdateMStorageDescriptorTblURIRetVal retVal,
boolean isDryRun) {
String tblName = "SDS";
String fieldName = "LOCATION";
if (retVal == null) {
System.err.println("Encountered error while executing updateMStorageDescriptorTblURI - " +
"commit of JDO transaction failed. Failed to update FSRoot locations in " +
fieldName + "field in " + tblName + " table.");
} else {
Map updateLocations = retVal.getUpdateLocations();
if (isDryRun) {
System.out.println("Dry Run of updateLocation on table " + tblName + "..");
} else {
System.out.println("Successfully updated the following locations..");
}
int count = printUpdateLocations(updateLocations);
if (isDryRun) {
System.out.println("Found " + count + " records in " + tblName + " table to update");
} else {
System.out.println("Updated " + count + " records in " + tblName + " table");
}
List badRecords = retVal.getBadRecords();
if (badRecords.size() > 0) {
System.err.println("Warning: Found records with bad " + fieldName + " in " +
tblName + " table.. ");
for (String badRecord:badRecords) {
System.err.println("bad location URI: " + badRecord);
}
}
int numNullRecords = retVal.getNumNullRecords();
if (numNullRecords != 0) {
LOG.debug("Number of NULL location URI: " + numNullRecords +
". This can happen for View or Index.");
}
}
}
private void printDatabaseURIUpdateSummary(ObjectStore.UpdateMDatabaseURIRetVal retVal,
boolean isDryRun) {
String tblName = "DBS";
String fieldName = "LOCATION_URI";
if (retVal == null) {
System.err.println("Encountered error while executing updateMDatabaseURI - " +
"commit of JDO transaction failed. Failed to update FSRoot locations in " +
fieldName + "field in " + tblName + " table.");
} else {
Map updateLocations = retVal.getUpdateLocations();
if (isDryRun) {
System.out.println("Dry Run of updateLocation on table " + tblName + "..");
} else {
System.out.println("Successfully updated the following locations..");
}
int count = printUpdateLocations(updateLocations);
if (isDryRun) {
System.out.println("Found " + count + " records in " + tblName + " table to update");
} else {
System.out.println("Updated " + count + " records in " + tblName + " table");
}
List badRecords = retVal.getBadRecords();
if (badRecords.size() > 0) {
System.err.println("Warning: Found records with bad " + fieldName + " in " +
tblName + " table.. ");
for (String badRecord:badRecords) {
System.err.println("bad location URI: " + badRecord);
}
}
}
}
private void printPropURIUpdateSummary(ObjectStore.UpdatePropURIRetVal retVal, String
tablePropKey, boolean isDryRun, String tblName, String methodName) {
if (retVal == null) {
System.err.println("Encountered error while executing " + methodName + " - " +
"commit of JDO transaction failed. Failed to update FSRoot locations in " +
"value field corresponding to" + tablePropKey + " in " + tblName + " table.");
} else {
Map updateLocations = retVal.getUpdateLocations();
if (isDryRun) {
System.out.println("Dry Run of updateLocation on table " + tblName + "..");
} else {
System.out.println("Successfully updated the following locations..");
}
int count = printUpdateLocations(updateLocations);
if (isDryRun) {
System.out.println("Found " + count + " records in " + tblName + " table to update");
} else {
System.out.println("Updated " + count + " records in " + tblName + " table");
}
List badRecords = retVal.getBadRecords();
if (badRecords.size() > 0) {
System.err.println("Warning: Found records with bad " + tablePropKey + " key in " +
tblName + " table.. ");
for (String badRecord:badRecords) {
System.err.println("bad location URI: " + badRecord);
}
}
}
}
private void printSerdePropURIUpdateSummary(ObjectStore.UpdateSerdeURIRetVal retVal,
String serdePropKey, boolean isDryRun) {
String tblName = "SERDE_PARAMS";
if (retVal == null) {
System.err.println("Encountered error while executing updateSerdeURI - " +
"commit of JDO transaction failed. Failed to update FSRoot locations in " +
"value field corresponding to " + serdePropKey + " in " + tblName + " table.");
} else {
Map updateLocations = retVal.getUpdateLocations();
if (isDryRun) {
System.out.println("Dry Run of updateLocation on table " + tblName + "..");
} else {
System.out.println("Successfully updated the following locations..");
}
int count = printUpdateLocations(updateLocations);
if (isDryRun) {
System.out.println("Found " + count + " records in " + tblName + " table to update");
} else {
System.out.println("Updated " + count + " records in " + tblName + " table");
}
List badRecords = retVal.getBadRecords();
if (badRecords.size() > 0) {
System.err.println("Warning: Found records with bad " + serdePropKey + " key in " +
tblName + " table.. ");
for (String badRecord:badRecords) {
System.err.println("bad location URI: " + badRecord);
}
}
}
}
public void updateFSRootLocation(URI oldURI, URI newURI, String serdePropKey, String
tablePropKey, boolean isDryRun) {
Configuration conf = MetastoreConf.newMetastoreConf();
initObjectStore(conf);
System.out.println("Looking for LOCATION_URI field in DBS table to update..");
ObjectStore.UpdateMDatabaseURIRetVal updateMDBURIRetVal = objStore.updateMDatabaseURI(oldURI,
newURI, isDryRun);
printDatabaseURIUpdateSummary(updateMDBURIRetVal, isDryRun);
System.out.println("Looking for LOCATION field in SDS table to update..");
ObjectStore.UpdateMStorageDescriptorTblURIRetVal updateTblURIRetVal =
objStore.updateMStorageDescriptorTblURI(oldURI, newURI, isDryRun);
printTblURIUpdateSummary(updateTblURIRetVal, isDryRun);
if (tablePropKey != null) {
System.out.println("Looking for value of " + tablePropKey + " key in TABLE_PARAMS table " +
"to update..");
ObjectStore.UpdatePropURIRetVal updateTblPropURIRetVal =
objStore.updateTblPropURI(oldURI, newURI,
tablePropKey, isDryRun);
printPropURIUpdateSummary(updateTblPropURIRetVal, tablePropKey, isDryRun, "TABLE_PARAMS",
"updateTblPropURI");
System.out.println("Looking for value of " + tablePropKey + " key in SD_PARAMS table " +
"to update..");
ObjectStore.UpdatePropURIRetVal updatePropURIRetVal = objStore
.updateMStorageDescriptorTblPropURI(oldURI, newURI, tablePropKey, isDryRun);
printPropURIUpdateSummary(updatePropURIRetVal, tablePropKey, isDryRun, "SD_PARAMS",
"updateMStorageDescriptorTblPropURI");
}
if (serdePropKey != null) {
System.out.println("Looking for value of " + serdePropKey + " key in SERDE_PARAMS table " +
"to update..");
ObjectStore.UpdateSerdeURIRetVal updateSerdeURIretVal = objStore.updateSerdeURI(oldURI,
newURI, serdePropKey, isDryRun);
printSerdePropURIUpdateSummary(updateSerdeURIretVal, serdePropKey, isDryRun);
}
}
private void prepareAcidUpgrade(String scriptLocation) {
try {
prepareAcidUpgradeInternal(scriptLocation);
}
catch(TException|IOException ex) {
System.err.println(StringUtils.stringifyException(ex));
printAndExit(this);
}
}
private static class CompactionMetaInfo {
/**
* total number of bytes to be compacted across all compaction commands
*/
long numberOfBytes;
}
/**
* todo: make sure compaction queue is configured and has ample capacity
* todo: what to do on failure? Suppose some table/part is not readable. should it produce
* todo: should probably suppor dryRun mode where we output scripts but instead of renaming files
* we generate a renaming script. Alternatively, always generate a renaming script and have
* user execute it - this is probably a better option. If script is not empty on rerun someone
* added files to table to be made Acid.
* commands for all other tables?
* todo: how do we test this? even if we had 2.x data it won't be readable in 3.0. even w/o any
* updates, txnids in the data won't make sense in 3.0 w/o actually performing equivalent of
* metastore upgrade to init writeid table. Also, can we even create a new table and set location
* to existing files to simulate a 2.x table?
* todo: generate some instructions in compaction script to include tottal compactions to perform,
* total data volume to handle and anything else that may help users guess at how long it will
* take. Also, highlight tuning options to speed this up.
* todo: can we make the script blocking so that it waits for cleaner to delete files?
* need to poll SHOW COMPACTIONS and make sure that all partitions are in "finished" state
* todo: this should accept a file of table names to exclude from non-acid to acid conversion
* todo: change script comments to a preamble instead of a footer
*
* @throws MetaException
* @throws TException
*/
private void prepareAcidUpgradeInternal(String scriptLocation) throws MetaException, TException, IOException {
Configuration conf = MetastoreConf.newMetastoreConf();
System.out.println("Looking for Acid tables that need to be compacted");
//todo: check if acid is enabled and bail if not
//todo: check that running on 2.x?
HiveMetaStoreClient hms = new HiveMetaStoreClient(conf);//MetaException
List databases = hms.getAllDatabases();//TException
System.out.println("Found " + databases.size() + " databases to process");
List compactions = new ArrayList<>();
List convertToAcid = new ArrayList<>();
List convertToMM = new ArrayList<>();
final CompactionMetaInfo compactionMetaInfo = new CompactionMetaInfo();
for(String dbName : databases) {
List tables = hms.getAllTables(dbName);
System.out.println("found " + tables.size() + " tables in " + dbName);
for(String tableName : tables) {
Table t = hms.getTable(dbName, tableName);
//ql depends on metastore and is not accessible here... and if it was, I would not be using
//2.6 exec jar, but 3.0.... which is not what we want
List compactionCommands = getCompactionCommands(t, conf, hms, compactionMetaInfo);
compactions.addAll(compactionCommands);
processConversion(t, convertToAcid, convertToMM, hms);
/*todo: handle renaming files somewhere
* */
}
}
makeCompactionScript(compactions, scriptLocation, compactionMetaInfo);
makeConvertTableScript(convertToAcid, convertToMM, scriptLocation);
makeRenameFileScript(scriptLocation);
}
//todo: handle exclusion list
private static void processConversion(Table t, List convertToAcid,
List convertToMM, HiveMetaStoreClient hms) throws TException {
if(isFullAcidTable(t)) {
return;
}
if(!TableType.MANAGED_TABLE.name().equalsIgnoreCase(t.getTableType())) {
return;
}
String fullTableName = Warehouse.getQualifiedName(t);
if(t.getPartitionKeysSize() <= 0) {
if(canBeMadeAcid(fullTableName, t.getSd())) {
convertToAcid.add("ALTER TABLE " + Warehouse.getQualifiedName(t) + " SET TBLPROPERTIES (" +
"'transactional'='true')");
}
else {
convertToMM.add("ALTER TABLE " + Warehouse.getQualifiedName(t) + " SET TBLPROPERTIES (" +
"'transactional'='true', 'transactional_properties'='insert_only')");
}
}
else {
List partNames = hms.listPartitionNames(t.getDbName(), t.getTableName(), (short)-1);
int batchSize = 10000;//todo: right size?
int numWholeBatches = partNames.size()/batchSize;
for(int i = 0; i < numWholeBatches; i++) {
List partitionList = hms.getPartitionsByNames(t.getDbName(), t.getTableName(),
partNames.subList(i * batchSize, (i + 1) * batchSize));
for(Partition p : partitionList) {
if(!canBeMadeAcid(fullTableName, p.getSd())) {
convertToMM.add("ALTER TABLE " + Warehouse.getQualifiedName(t) + " SET TBLPROPERTIES (" +
"'transactional'='true', 'transactional_properties'='insert_only')");
return;
}
}
}
if(numWholeBatches * batchSize < partNames.size()) {
//last partial batch
List partitionList = hms.getPartitionsByNames(t.getDbName(), t.getTableName(),
partNames.subList(numWholeBatches * batchSize, partNames.size()));
for (Partition p : partitionList) {
if (!canBeMadeAcid(fullTableName, p.getSd())) {
convertToMM.add("ALTER TABLE " + Warehouse.getQualifiedName(t) + " SET TBLPROPERTIES (" +
"'transactional'='true', 'transactional_properties'='insert_only')");
return;
}
}
}
//if here checked all parts and they are Acid compatible - make it acid
convertToAcid.add("ALTER TABLE " + Warehouse.getQualifiedName(t) + " SET TBLPROPERTIES (" +
"'transactional'='true')");
}
}
private static boolean canBeMadeAcid(String fullTableName, StorageDescriptor sd) {
return isAcidInputOutputFormat(fullTableName, sd) && sd.getSortColsSize() <= 0;
}
private static boolean isAcidInputOutputFormat(String fullTableName, StorageDescriptor sd) {
try {
Class inputFormatClass = sd.getInputFormat() == null ? null :
Class.forName(sd.getInputFormat());
Class outputFormatClass = sd.getOutputFormat() == null ? null :
Class.forName(sd.getOutputFormat());
if (inputFormatClass != null && outputFormatClass != null &&
Class.forName("org.apache.hadoop.hive.ql.io.AcidInputFormat")
.isAssignableFrom(inputFormatClass) &&
Class.forName("org.apache.hadoop.hive.ql.io.AcidOutputFormat")
.isAssignableFrom(outputFormatClass)) {
return true;
}
} catch (ClassNotFoundException e) {
//if a table is using some custom I/O format and it's not in the classpath, we won't mark
//the table for Acid, but today (Hive 3.1 and earlier) OrcInput/OutputFormat is the only
//Acid format
System.err.println("Could not determine if " + fullTableName +
" can be made Acid due to: " + e.getMessage());
return false;
}
return false;
}
/**
* currently writes to current dir (whatever that is).
* If there is nothing to compact, outputs empty file so as not to confuse the output with a
* failed run.
* todo: add some config to tell it where to put the script
*/
private static void makeCompactionScript(List commands, String scriptLocation,
CompactionMetaInfo compactionMetaInfo) throws IOException {
if (commands.isEmpty()) {
System.out.println("No compaction is necessary");
return;
}
String fileName = "compacts_" + System.currentTimeMillis() + ".sql";
System.out.println("Writing compaction commands to " + fileName);
try(PrintWriter pw = createScript(commands, fileName, scriptLocation)) {
//add post script
pw.println("-- Generated total of " + commands.size() + " compaction commands");
if(compactionMetaInfo.numberOfBytes < Math.pow(2, 20)) {
//to see it working in UTs
pw.println("-- The total volume of data to be compacted is " +
String.format("%.6fMB", compactionMetaInfo.numberOfBytes/Math.pow(2, 20)));
}
else {
pw.println("-- The total volume of data to be compacted is " +
String.format("%.3fGB", compactionMetaInfo.numberOfBytes/Math.pow(2, 30)));
}
pw.println();
pw.println(
"-- Please note that compaction may be a heavyweight and time consuming process.\n" +
"-- Submitting all of these commands will enqueue them to a scheduling queue from\n" +
"-- which they will be picked up by compactor Workers. The max number of\n" +
"-- concurrent Workers is controlled by hive.compactor.worker.threads configured\n" +
"-- for the standalone metastore process. Compaction itself is a Map-Reduce job\n" +
"-- which is submitted to the YARN queue identified by hive.compactor.job.queue\n" +
"-- property if defined or 'default' if not defined. It's advisable to set the\n" +
"-- capacity of this queue appropriately");
}
}
private static void makeConvertTableScript(List alterTableAcid, List alterTableMm,
String scriptLocation) throws IOException {
if (alterTableAcid.isEmpty()) {
System.out.println("No acid conversion is necessary");
} else {
String fileName = "convertToAcid_" + System.currentTimeMillis() + ".sql";
System.out.println("Writing acid conversion commands to " + fileName);
try(PrintWriter pw = createScript(alterTableAcid, fileName, scriptLocation)) {
pw.println("-- These commands may be executed by Hive 1.x later");
}
}
if (alterTableMm.isEmpty()) {
System.out.println("No managed table conversion is necessary");
} else {
String fileName = "convertToMM_" + System.currentTimeMillis() + ".sql";
System.out.println("Writing managed table conversion commands to " + fileName);
try(PrintWriter pw = createScript(alterTableMm, fileName, scriptLocation)) {
pw.println("-- These commands must be executed by Hive 3.0 or later");
}
}
}
private static PrintWriter createScript(List commands, String fileName,
String scriptLocation) throws IOException {
//todo: make sure to create the file in 'scriptLocation' dir
FileWriter fw = new FileWriter(scriptLocation + "/" + fileName);
PrintWriter pw = new PrintWriter(fw);
for(String cmd : commands) {
pw.println(cmd + ";");
}
return pw;
}
private static void makeRenameFileScript(String scriptLocation) throws IOException {
List commands = Collections.emptyList();
if (commands.isEmpty()) {
System.out.println("No file renaming is necessary");
} else {
String fileName = "normalizeFileNames_" + System.currentTimeMillis() + ".sh";
System.out.println("Writing file renaming commands to " + fileName);
PrintWriter pw = createScript(commands, fileName, scriptLocation);
pw.close();
}
}
/**
* @return any compaction commands to run for {@code Table t}
*/
private static List getCompactionCommands(Table t, Configuration conf,
HiveMetaStoreClient hms, CompactionMetaInfo compactionMetaInfo)
throws IOException, TException {
if(!isFullAcidTable(t)) {
return Collections.emptyList();
}
if(t.getPartitionKeysSize() <= 0) {
//not partitioned
if(!needsCompaction(new Path(t.getSd().getLocation()), conf, compactionMetaInfo)) {
return Collections.emptyList();
}
List cmds = new ArrayList<>();
cmds.add(getCompactionCommand(t, null));
return cmds;
}
List partNames = hms.listPartitionNames(t.getDbName(), t.getTableName(), (short)-1);
int batchSize = 10000;//todo: right size?
int numWholeBatches = partNames.size()/batchSize;
List compactionCommands = new ArrayList<>();
for(int i = 0; i < numWholeBatches; i++) {
List partitionList = hms.getPartitionsByNames(t.getDbName(), t.getTableName(), partNames.subList(i * batchSize, (i + 1) * batchSize));
for(Partition p : partitionList) {
if(needsCompaction(new Path(p.getSd().getLocation()), conf, compactionMetaInfo)) {
compactionCommands.add(getCompactionCommand(t, p));
}
}
}
if(numWholeBatches * batchSize < partNames.size()) {
//last partial batch
List partitionList = hms.getPartitionsByNames(t.getDbName(), t.getTableName(), partNames.subList(numWholeBatches * batchSize, partNames.size()));
for (Partition p : partitionList) {
if (needsCompaction(new Path(p.getSd().getLocation()), conf, compactionMetaInfo)) {
compactionCommands.add(getCompactionCommand(t, p));
}
}
}
return compactionCommands;
}
/**
*
* @param location - path to a partition (or table if not partitioned) dir
*/
private static boolean needsCompaction(Path location, Configuration conf,
CompactionMetaInfo compactionMetaInfo) throws IOException {
FileSystem fs = location.getFileSystem(conf);
FileStatus[] deltas = fs.listStatus(location, new PathFilter() {
@Override
public boolean accept(Path path) {
//checking for delete_delta is only so that this functionality can be exercised by code 3.0
//which cannot produce any deltas with mix of update/insert events
return path.getName().startsWith("delta_") || path.getName().startsWith("delete_delta_");
}
});
if(deltas == null || deltas.length == 0) {
//base_n cannot contain update/delete. Original files are all 'insert' and we need to compact
//only if there are update/delete events.
return false;
}
deltaLoop: for(FileStatus delta : deltas) {
if(!delta.isDirectory()) {
//should never happen - just in case
continue;
}
FileStatus[] buckets = fs.listStatus(delta.getPath(), new PathFilter() {
@Override
public boolean accept(Path path) {
//since this is inside a delta dir created by Hive 2.x or earlier it can only contain
//bucket_x or bucket_x__flush_length
return path.getName().startsWith("bucket_");
}
});
for(FileStatus bucket : buckets) {
if(bucket.getPath().getName().endsWith("_flush_length")) {
//streaming ingest dir - cannot have update/delete events
continue deltaLoop;
}
if(needsCompaction(bucket, fs)) {
//found delete events - this 'location' needs compacting
compactionMetaInfo.numberOfBytes += getDataSize(location, conf);
return true;
}
}
}
return false;
}
/**
* @param location - path to a partition (or table if not partitioned) dir
* @throws IOException
*/
private static long getDataSize(Path location, Configuration conf) throws IOException {
/*
* todo: Figure out the size of the partition. The
* best way is to getAcidState() and look at each file - this way it takes care of
* original files vs base and any other obsolete files. For now just brute force it,
* it's likely close enough for a rough estimate.*/
FileSystem fs = location.getFileSystem(conf);
ContentSummary cs = fs.getContentSummary(location);
return cs.getLength();
}
private static boolean needsCompaction(FileStatus bucket, FileSystem fs) throws IOException {
//create reader, look at footer
//no need to check side file since it can only be in a streaming ingest delta
Reader orcReader = OrcFile.createReader(bucket.getPath(),OrcFile.readerOptions(fs.getConf())
.filesystem(fs));
AcidStats as = OrcAcidUtils.parseAcidStats(orcReader);
if(as == null) {
//should never happen since we are reading bucket_x written by acid write
throw new IllegalStateException("AcidStats missing in " + bucket.getPath());
}
return as.deletes > 0 || as.updates > 0;
}
private static String getCompactionCommand(Table t, Partition p) {
StringBuilder sb = new StringBuilder("ALTER TABLE ").append(Warehouse.getQualifiedName(t));
if(t.getPartitionKeysSize() > 0) {
assert p != null : "must supply partition for partitioned table " +
Warehouse.getQualifiedName(t);
sb.append(" PARTITION(");
for (int i = 0; i < t.getPartitionKeysSize(); i++) {
//todo: should these be quoted? HiveUtils.unparseIdentifier() - if value is String should definitely quote
sb.append(t.getPartitionKeys().get(i).getName()).append('=')
.append(p.getValues().get(i)).append(",");
}
sb.setCharAt(sb.length() - 1, ')');//replace trailing ','
}
return sb.append(" COMPACT 'major'").toString();
}
private static boolean isFullAcidTable(Table t) {
if (t.getParametersSize() <= 0) {
//cannot be acid
return false;
}
String transacationalValue = t.getParameters()
.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL);
if (transacationalValue != null && "true".equalsIgnoreCase(transacationalValue)) {
System.out.println("Found Acid table: " + Warehouse.getQualifiedName(t));
return true;
}
return false;
}
private static void printAndExit(HiveMetaTool metaTool) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("metatool", metaTool.cmdLineOptions);
System.exit(1);
}
public static void main(String[] args) {
HiveMetaTool metaTool = new HiveMetaTool();
metaTool.init();
CommandLineParser parser = new GnuParser();
CommandLine line = null;
try {
try {
line = parser.parse(metaTool.cmdLineOptions, args);
} catch (ParseException e) {
System.err.println("HiveMetaTool:Parsing failed. Reason: " + e.getLocalizedMessage());
printAndExit(metaTool);
}
if (line.hasOption("help")) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("metatool", metaTool.cmdLineOptions);
} else if (line.hasOption("listFSRoot")) {
if (line.hasOption("dryRun")) {
System.err.println("HiveMetaTool: dryRun is not valid with listFSRoot");
printAndExit(metaTool);
} else if (line.hasOption("serdePropKey")) {
System.err.println("HiveMetaTool: serdePropKey is not valid with listFSRoot");
printAndExit(metaTool);
} else if (line.hasOption("tablePropKey")) {
System.err.println("HiveMetaTool: tablePropKey is not valid with listFSRoot");
printAndExit(metaTool);
}
metaTool.listFSRoot();
} else if (line.hasOption("executeJDOQL")) {
String query = line.getOptionValue("executeJDOQL");
if (line.hasOption("dryRun")) {
System.err.println("HiveMetaTool: dryRun is not valid with executeJDOQL");
printAndExit(metaTool);
} else if (line.hasOption("serdePropKey")) {
System.err.println("HiveMetaTool: serdePropKey is not valid with executeJDOQL");
printAndExit(metaTool);
} else if (line.hasOption("tablePropKey")) {
System.err.println("HiveMetaTool: tablePropKey is not valid with executeJDOQL");
printAndExit(metaTool);
}
if (query.toLowerCase().trim().startsWith("select")) {
metaTool.executeJDOQLSelect(query);
} else if (query.toLowerCase().trim().startsWith("update")) {
metaTool.executeJDOQLUpdate(query);
} else {
System.err.println("HiveMetaTool:Unsupported statement type");
printAndExit(metaTool);
}
} else if (line.hasOption("updateLocation")) {
String[] loc = line.getOptionValues("updateLocation");
boolean isDryRun = false;
String serdepropKey = null;
String tablePropKey = null;
if (loc.length != 2 && loc.length != 3) {
System.err.println("HiveMetaTool:updateLocation takes in 2 required and 1 " +
"optional arguments but " +
"was passed " + loc.length + " arguments");
printAndExit(metaTool);
}
Path newPath = new Path(loc[0]);
Path oldPath = new Path(loc[1]);
URI oldURI = oldPath.toUri();
URI newURI = newPath.toUri();
if (line.hasOption("dryRun")) {
isDryRun = true;
}
if (line.hasOption("serdePropKey")) {
serdepropKey = line.getOptionValue("serdePropKey");
}
if (line.hasOption("tablePropKey")) {
tablePropKey = line.getOptionValue("tablePropKey");
}
/*
* validate input - Both new and old URI should contain valid host names and valid schemes.
* port is optional in both the URIs since HDFS HA NN URI doesn't have a port.
*/
if (oldURI.getHost() == null || newURI.getHost() == null) {
System.err.println("HiveMetaTool:A valid host is required in both old-loc and new-loc");
} else if (oldURI.getScheme() == null || newURI.getScheme() == null) {
System.err.println("HiveMetaTool:A valid scheme is required in both old-loc and new-loc");
} else {
metaTool.updateFSRootLocation(oldURI, newURI, serdepropKey, tablePropKey, isDryRun);
}
} else if(line.hasOption("prepareAcidUpgrade")) {
String[] values = line.getOptionValues("prepareAcidUpgrade");
String targetDir = ".";
if(values != null && values.length > 0) {
if(values.length > 1) {
System.err.println("HiveMetaTool: prepareAcidUpgrade");
printAndExit(metaTool);
} else {
targetDir = values[0];
}
}
metaTool.prepareAcidUpgrade(targetDir);
} else {
if (line.hasOption("dryRun")) {
System.err.println("HiveMetaTool: dryRun is not a valid standalone option");
} else if (line.hasOption("serdePropKey")) {
System.err.println("HiveMetaTool: serdePropKey is not a valid standalone option");
} else if (line.hasOption("tablePropKey")) {
System.err.println("HiveMetaTool: tablePropKey is not a valid standalone option");
printAndExit(metaTool);
} else {
System.err.print("HiveMetaTool:Parsing failed. Reason: Invalid arguments: " );
for (String s : line.getArgs()) {
System.err.print(s + " ");
}
System.err.println();
}
printAndExit(metaTool);
}
} finally {
metaTool.shutdownObjectStore();
}
}
}