All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.util.HFileV1Detector Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.io.FileLink;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * Tool to detect presence of any HFileV1 in the given directory. It prints all such regions which
 * have such files.
 * 

* To print the help section of the tool: *

    *
  • ./bin/hbase org.apache.hadoop.hbase.util.HFileV1Detector --h or, *
  • java -cp `hbase classpath` org.apache.hadoop.hbase.util.HFileV1Detector --h *
* It also supports -h, --help, -help options. *

*/ public class HFileV1Detector extends Configured implements Tool { private FileSystem fs; private static final Log LOG = LogFactory.getLog(HFileV1Detector.class); private static final int DEFAULT_NUM_OF_THREADS = 10; /** * Pre-namespace archive directory */ private static final String PRE_NS_DOT_ARCHIVE = ".archive"; /** * Pre-namespace tmp directory */ private static final String PRE_NS_DOT_TMP = ".tmp"; private int numOfThreads; /** * directory to start the processing. */ private Path targetDirPath; /** * executor for processing regions. */ private ExecutorService exec; /** * Keeps record of processed tables. */ private final Set processedTables = new HashSet(); /** * set of corrupted HFiles (with undetermined major version) */ private final Set corruptedHFiles = Collections .newSetFromMap(new ConcurrentHashMap()); /** * set of HfileV1; */ private final Set hFileV1Set = Collections .newSetFromMap(new ConcurrentHashMap()); private Options options = new Options(); /** * used for computing pre-namespace paths for hfilelinks */ private Path defaultNamespace; public HFileV1Detector() { Option pathOption = new Option("p", "path", true, "Path to a table, or hbase installation"); pathOption.setRequired(false); options.addOption(pathOption); Option threadOption = new Option("n", "numberOfThreads", true, "Number of threads to use while processing HFiles."); threadOption.setRequired(false); options.addOption(threadOption); options.addOption("h", "help", false, "Help"); } private boolean parseOption(String[] args) throws ParseException, IOException { if (args.length == 0) { return true; // no args will process with default values. } CommandLineParser parser = new GnuParser(); CommandLine cmd = parser.parse(options, args); if (cmd.hasOption("h")) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("HFileV1Detector", options, true); System.out .println("In case no option is provided, it processes hbase.rootdir using 10 threads."); System.out.println("Example:"); System.out.println(" To detect any HFileV1 in a given hbase installation '/myhbase':"); System.out.println(" $ $HBASE_HOME/bin/hbase " + this.getClass().getName() + " -p /myhbase"); System.out.println(); return false; } if (cmd.hasOption("p")) { this.targetDirPath = new Path(FSUtils.getRootDir(getConf()), cmd.getOptionValue("p")); } try { if (cmd.hasOption("n")) { int n = Integer.parseInt(cmd.getOptionValue("n")); if (n < 0 || n > 100) { LOG.warn("Please use a positive number <= 100 for number of threads." + " Continuing with default value " + DEFAULT_NUM_OF_THREADS); return true; } this.numOfThreads = n; } } catch (NumberFormatException nfe) { LOG.error("Please select a valid number for threads"); return false; } return true; } /** * Checks for HFileV1. * @return 0 when no HFileV1 is present. * 1 when a HFileV1 is present or, when there is a file with corrupt major version * (neither V1 nor V2). * -1 in case of any error/exception */ @Override public int run(String args[]) throws IOException, ParseException { FSUtils.setFsDefault(getConf(), new Path(FSUtils.getRootDir(getConf()).toUri())); fs = FileSystem.get(getConf()); numOfThreads = DEFAULT_NUM_OF_THREADS; targetDirPath = FSUtils.getRootDir(getConf()); if (!parseOption(args)) { System.exit(-1); } this.exec = Executors.newFixedThreadPool(numOfThreads); try { return processResult(checkForV1Files(targetDirPath)); } catch (Exception e) { LOG.error(e); } finally { exec.shutdown(); fs.close(); } return -1; } private void setDefaultNamespaceDir() throws IOException { Path dataDir = new Path(FSUtils.getRootDir(getConf()), HConstants.BASE_NAMESPACE_DIR); defaultNamespace = new Path(dataDir, NamespaceDescriptor.DEFAULT_NAMESPACE_NAME_STR); } private int processResult(Set regionsWithHFileV1) { LOG.info("Result: \n"); printSet(processedTables, "Tables Processed: "); int count = hFileV1Set.size(); LOG.info("Count of HFileV1: " + count); if (count > 0) printSet(hFileV1Set, "HFileV1:"); count = corruptedHFiles.size(); LOG.info("Count of corrupted files: " + count); if (count > 0) printSet(corruptedHFiles, "Corrupted Files: "); count = regionsWithHFileV1.size(); LOG.info("Count of Regions with HFileV1: " + count); if (count > 0) printSet(regionsWithHFileV1, "Regions to Major Compact: "); return (hFileV1Set.isEmpty() && corruptedHFiles.isEmpty()) ? 0 : 1; } private void printSet(Set result, String msg) { LOG.info(msg); for (Path p : result) { LOG.info(p); } } /** * Takes a directory path, and lists out any HFileV1, if present. * @param targetDir directory to start looking for HFilev1. * @return set of Regions that have HFileV1 * @throws IOException */ private Set checkForV1Files(Path targetDir) throws IOException { LOG.info("Target dir is: " + targetDir); if (!fs.exists(targetDir)) { throw new IOException("The given path does not exist: " + targetDir); } if (isTableDir(fs, targetDir)) { processedTables.add(targetDir); return processTable(targetDir); } Set regionsWithHFileV1 = new HashSet(); FileStatus[] fsStats = fs.listStatus(targetDir); for (FileStatus fsStat : fsStats) { if (isTableDir(fs, fsStat.getPath()) && !isRootTable(fsStat.getPath())) { processedTables.add(fsStat.getPath()); // look for regions and find out any v1 file. regionsWithHFileV1.addAll(processTable(fsStat.getPath())); } else { LOG.info("Ignoring path: " + fsStat.getPath()); } } return regionsWithHFileV1; } /** * Ignore ROOT table as it doesn't exist in 0.96. * @param path */ private boolean isRootTable(Path path) { if (path != null && path.toString().endsWith("-ROOT-")) return true; return false; } /** * Find out regions in the table which have HFileV1. * @param tableDir * @return the set of regions containing HFile v1. * @throws IOException */ private Set processTable(Path tableDir) throws IOException { // list out the regions and then process each file in it. LOG.debug("processing table: " + tableDir); List> regionLevelResults = new ArrayList>(); Set regionsWithHFileV1 = new HashSet(); FileStatus[] fsStats = fs.listStatus(tableDir); for (FileStatus fsStat : fsStats) { // process each region if (isRegionDir(fs, fsStat.getPath())) { regionLevelResults.add(processRegion(fsStat.getPath())); } } for (Future f : regionLevelResults) { try { if (f.get() != null) { regionsWithHFileV1.add(f.get()); } } catch (InterruptedException e) { LOG.error(e); } catch (ExecutionException e) { LOG.error(e); // might be a bad hfile. We print it at the end. } } return regionsWithHFileV1; } /** * Each region is processed by a separate handler. If a HRegion has a hfileV1, its path is * returned as the future result, otherwise, a null value is returned. * @param regionDir Region to process. * @return corresponding Future object. */ private Future processRegion(final Path regionDir) { LOG.debug("processing region: " + regionDir); Callable regionCallable = new Callable() { @Override public Path call() throws Exception { for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) { FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir); if (storeFiles == null || storeFiles.length == 0) continue; for (FileStatus storeFile : storeFiles) { Path storeFilePath = storeFile.getPath(); FSDataInputStream fsdis = null; long lenToRead = 0; try { // check whether this path is a reference. if (StoreFileInfo.isReference(storeFilePath)) continue; // check whether this path is a HFileLink. else if (HFileLink.isHFileLink(storeFilePath)) { FileLink fLink = getFileLinkWithPreNSPath(storeFilePath); fsdis = fLink.open(fs); lenToRead = fLink.getFileStatus(fs).getLen(); } else { // a regular hfile fsdis = fs.open(storeFilePath); lenToRead = storeFile.getLen(); } int majorVersion = computeMajorVersion(fsdis, lenToRead); if (majorVersion == 1) { hFileV1Set.add(storeFilePath); // return this region path, as it needs to be compacted. return regionDir; } if (majorVersion > 2 || majorVersion < 1) throw new IllegalArgumentException( "Incorrect major version: " + majorVersion); } catch (Exception iae) { corruptedHFiles.add(storeFilePath); LOG.error("Got exception while reading trailer for file: "+ storeFilePath, iae); } finally { if (fsdis != null) fsdis.close(); } } } return null; } private int computeMajorVersion(FSDataInputStream istream, long fileSize) throws IOException { //read up the last int of the file. Major version is in the last 3 bytes. long seekPoint = fileSize - Bytes.SIZEOF_INT; if (seekPoint < 0) throw new IllegalArgumentException("File too small, no major version found"); // Read the version from the last int of the file. istream.seek(seekPoint); int version = istream.readInt(); // Extract and return the major version return version & 0x00ffffff; } }; Future f = exec.submit(regionCallable); return f; } /** * Creates a FileLink which adds pre-namespace paths in its list of available paths. This is used * when reading a snapshot file in a pre-namespace file layout, for example, while upgrading. * @param storeFilePath * @return a FileLink which could read from pre-namespace paths. * @throws IOException */ public FileLink getFileLinkWithPreNSPath(Path storeFilePath) throws IOException { HFileLink link = HFileLink.buildFromHFileLinkPattern(getConf(), storeFilePath); List pathsToProcess = getPreNSPathsForHFileLink(link); pathsToProcess.addAll(Arrays.asList(link.getLocations())); return new FileLink(pathsToProcess); } private List getPreNSPathsForHFileLink(HFileLink fileLink) throws IOException { if (defaultNamespace == null) setDefaultNamespaceDir(); List p = new ArrayList(); String relativeTablePath = removeDefaultNSPath(fileLink.getOriginPath()); p.add(getPreNSPath(PRE_NS_DOT_ARCHIVE, relativeTablePath)); p.add(getPreNSPath(PRE_NS_DOT_TMP, relativeTablePath)); p.add(getPreNSPath(null, relativeTablePath)); return p; } /** * Removes the prefix of defaultNamespace from the path. * @param originalPath */ private String removeDefaultNSPath(Path originalPath) { String pathStr = originalPath.toString(); if (!pathStr.startsWith(defaultNamespace.toString())) return pathStr; return pathStr.substring(defaultNamespace.toString().length() + 1); } private Path getPreNSPath(String prefix, String relativeTablePath) throws IOException { String relativePath = (prefix == null ? relativeTablePath : prefix + Path.SEPARATOR + relativeTablePath); return new Path(FSUtils.getRootDir(getConf()), relativePath); } private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException { // check for old format, of having /table/.tableinfo; hbase:meta doesn't has .tableinfo, // include it. if (fs.isFile(path)) return false; return (FSTableDescriptors.getTableInfoPath(fs, path) != null || FSTableDescriptors .getCurrentTableInfoStatus(fs, path, false) != null) || path.toString().endsWith(".META."); } private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException { if (fs.isFile(path)) return false; Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE); return fs.exists(regionInfo); } public static void main(String args[]) throws Exception { System.exit(ToolRunner.run(HBaseConfiguration.create(), new HFileV1Detector(), args)); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy