All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hdfs.tools.offlineImageViewer.FileDistributionVisitor Maven / Gradle / Ivy

There is a newer version: 3.4.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.tools.offlineImageViewer;

import java.io.IOException;
import java.util.LinkedList;

import org.apache.hadoop.util.StringUtils;

/**
 * File size distribution visitor.
 * 
 * 

Description.

* This is the tool for analyzing file sizes in the namespace image. * In order to run the tool one should define a range of integers * [0, maxSize] by specifying maxSize and a step. * The range of integers is divided into segments of size step: * [0, s1, ..., sn-1, maxSize], * and the visitor calculates how many files in the system fall into * each segment [si-1, si). * Note that files larger than maxSize always fall into * the very last segment. * *

Input.

*
    *
  • filename specifies the location of the image file;
  • *
  • maxSize determines the range [0, maxSize] of files * sizes considered by the visitor;
  • *
  • step the range is divided into segments of size step.
  • *
* *

Output.

* The output file is formatted as a tab separated two column table: * Size and NumFiles. Where Size represents the start of the segment, * and numFiles is the number of files form the image which size falls in * this segment. */ class FileDistributionVisitor extends TextWriterImageVisitor { final private LinkedList elemS = new LinkedList(); private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37 private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21 private int[] distribution; private long maxSize; private int step; private int totalFiles; private int totalDirectories; private int totalBlocks; private long totalSpace; private long maxFileSize; private FileContext current; private boolean inInode = false; private boolean formatOutput = false; /** * File or directory information. */ private static class FileContext { String path; long fileSize; int numBlocks; int replication; } public FileDistributionVisitor(String filename, long maxSize, int step, boolean formatOutput) throws IOException { super(filename, false); this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize); this.step = (step == 0 ? INTERVAL_DEFAULT : step); this.formatOutput = formatOutput; long numIntervals = this.maxSize / this.step; if(numIntervals >= Integer.MAX_VALUE) throw new IOException("Too many distribution intervals " + numIntervals); this.distribution = new int[1 + (int)(numIntervals)]; this.totalFiles = 0; this.totalDirectories = 0; this.totalBlocks = 0; this.totalSpace = 0; this.maxFileSize = 0; } @Override void start() throws IOException {} @Override void finish() throws IOException { output(); super.finish(); } @Override void finishAbnormally() throws IOException { System.out.println("*** Image processing finished abnormally. Ending ***"); output(); super.finishAbnormally(); } private void output() throws IOException { // write the distribution into the output file write((formatOutput ? "Size Range" : "Size") + "\tNumFiles\n"); for (int i = 0; i < distribution.length; i++) { if (distribution[i] > 0) { if (formatOutput) { write((i == 0 ? "[" : "(") + StringUtils.byteDesc(((long) (i == 0 ? 0 : i - 1) * step)) + ", " + StringUtils.byteDesc((long) (i == distribution.length - 1 ? maxFileSize : i * step)) + "]\t" + distribution[i] + "\n"); } else { write(((long) i * step) + "\t" + distribution[i] + "\n"); } } } System.out.println("totalFiles = " + totalFiles); System.out.println("totalDirectories = " + totalDirectories); System.out.println("totalBlocks = " + totalBlocks); System.out.println("totalSpace = " + totalSpace); System.out.println("maxFileSize = " + maxFileSize); } @Override void leaveEnclosingElement() throws IOException { ImageElement elem = elemS.pop(); if(elem != ImageElement.INODE && elem != ImageElement.INODE_UNDER_CONSTRUCTION) return; inInode = false; if(current.numBlocks < 0) { totalDirectories ++; return; } totalFiles++; totalBlocks += current.numBlocks; totalSpace += current.fileSize * current.replication; if(maxFileSize < current.fileSize) maxFileSize = current.fileSize; int high; if(current.fileSize > maxSize) high = distribution.length-1; else high = (int)Math.ceil((double)current.fileSize / step); if (high >= distribution.length) { high = distribution.length - 1; } distribution[high]++; if(totalFiles % 1000000 == 1) System.out.println("Files processed: " + totalFiles + " Current: " + current.path); } @Override void visit(ImageElement element, String value) throws IOException { if(inInode) { switch(element) { case INODE_PATH: current.path = (value.equals("") ? "/" : value); break; case REPLICATION: current.replication = Integer.parseInt(value); break; case NUM_BYTES: current.fileSize += Long.parseLong(value); break; default: break; } } } @Override void visitEnclosingElement(ImageElement element) throws IOException { elemS.push(element); if(element == ImageElement.INODE || element == ImageElement.INODE_UNDER_CONSTRUCTION) { current = new FileContext(); inInode = true; } } @Override void visitEnclosingElement(ImageElement element, ImageElement key, String value) throws IOException { elemS.push(element); if(element == ImageElement.INODE || element == ImageElement.INODE_UNDER_CONSTRUCTION) inInode = true; else if(element == ImageElement.BLOCKS) current.numBlocks = Integer.parseInt(value); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy