All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hdfs.tools.offlineImageViewer.FileDistributionCalculator Maven / Gradle / Ivy

There is a newer version: 3.4.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.tools.offlineImageViewer;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.io.RandomAccessFile;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
import org.apache.hadoop.util.LimitInputStream;
import org.apache.hadoop.util.StringUtils;

import com.google.common.base.Preconditions;

/**
 * This is the tool for analyzing file sizes in the namespace image. In order to
 * run the tool one should define a range of integers [0, maxSize] by
 * specifying maxSize and a step. The range of integers is
 * divided into segments of size step:
 * [0, s1, ..., sn-1, maxSize], and the visitor
 * calculates how many files in the system fall into each segment
 * [si-1, si). Note that files larger than
 * maxSize always fall into the very last segment.
 *
 * 

Input.

*
    *
  • filename specifies the location of the image file;
  • *
  • maxSize determines the range [0, maxSize] of files * sizes considered by the visitor;
  • *
  • step the range is divided into segments of size step.
  • *
* *

Output.

The output file is formatted as a tab separated two column * table: Size and NumFiles. Where Size represents the start of the segment, and * numFiles is the number of files form the image which size falls in this * segment. * */ final class FileDistributionCalculator { private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37 private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21 private final static int MAX_INTERVALS = 0x8000000; // 128 M = 2^27 private final Configuration conf; private final long maxSize; private final int steps; private final PrintStream out; private final int[] distribution; private int totalFiles; private int totalDirectories; private int totalBlocks; private long totalSpace; private long maxFileSize; private boolean formatOutput = false; FileDistributionCalculator(Configuration conf, long maxSize, int steps, boolean formatOutput, PrintStream out) { this.conf = conf; this.maxSize = maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize; this.steps = steps == 0 ? INTERVAL_DEFAULT : steps; this.formatOutput = formatOutput; this.out = out; long numIntervals = this.maxSize / this.steps; // avoid OutOfMemoryError when allocating an array Preconditions.checkState(numIntervals <= MAX_INTERVALS, "Too many distribution intervals (maxSize/step): " + numIntervals + ", should be less than " + (MAX_INTERVALS+1) + "."); this.distribution = new int[1 + (int) (numIntervals)]; } void visit(RandomAccessFile file) throws IOException { if (!FSImageUtil.checkFileFormat(file)) { throw new IOException("Unrecognized FSImage"); } FileSummary summary = FSImageUtil.loadSummary(file); try (FileInputStream in = new FileInputStream(file.getFD())) { for (FileSummary.Section s : summary.getSectionsList()) { if (SectionName.fromString(s.getName()) != SectionName.INODE) { continue; } in.getChannel().position(s.getOffset()); InputStream is = FSImageUtil.wrapInputStreamForCompression(conf, summary.getCodec(), new BufferedInputStream(new LimitInputStream( in, s.getLength()))); run(is); output(); } } } private void run(InputStream in) throws IOException { INodeSection s = INodeSection.parseDelimitedFrom(in); for (int i = 0; i < s.getNumInodes(); ++i) { INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in); if (p.getType() == INodeSection.INode.Type.FILE) { ++totalFiles; INodeSection.INodeFile f = p.getFile(); totalBlocks += f.getBlocksCount(); long fileSize = 0; for (BlockProto b : f.getBlocksList()) { fileSize += b.getNumBytes(); } maxFileSize = Math.max(fileSize, maxFileSize); totalSpace += fileSize * f.getReplication(); int bucket = fileSize > maxSize ? distribution.length - 1 : (int) Math .ceil((double)fileSize / steps); // Compare the bucket value with distribution's length again, // because sometimes the bucket value will be equal to // the length when maxSize can't be divided completely by step. if (bucket >= distribution.length) { bucket = distribution.length - 1; } ++distribution[bucket]; } else if (p.getType() == INodeSection.INode.Type.DIRECTORY) { ++totalDirectories; } if (i % (1 << 20) == 0) { out.println("Processed " + i + " inodes."); } } } private void output() { // write the distribution into the output file out.print((formatOutput ? "Size Range" : "Size") + "\tNumFiles\n"); for (int i = 0; i < distribution.length; i++) { if (distribution[i] != 0) { if (formatOutput) { out.print((i == 0 ? "[" : "(") + StringUtils.byteDesc(((long) (i == 0 ? 0 : i - 1) * steps)) + ", " + StringUtils.byteDesc((long) (i == distribution.length - 1 ? maxFileSize : (long) i * steps)) + "]\t" + distribution[i]); } else { out.print(((long) i * steps) + "\t" + distribution[i]); } out.print('\n'); } } out.print("totalFiles = " + totalFiles + "\n"); out.print("totalDirectories = " + totalDirectories + "\n"); out.print("totalBlocks = " + totalBlocks + "\n"); out.print("totalSpace = " + totalSpace + "\n"); out.print("maxFileSize = " + maxFileSize + "\n"); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy