All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.io.file.tfile.TFileDumper Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.apache.hadoop.io.file.tfile;

import java.io.IOException;
import java.io.PrintStream;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;

import org.apache.commons.io.Charsets;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.file.tfile.BCFile.BlockRegion;
import org.apache.hadoop.io.file.tfile.BCFile.MetaIndexEntry;
import org.apache.hadoop.io.file.tfile.TFile.TFileIndexEntry;
import org.apache.hadoop.io.file.tfile.Utils.Version;

/**
 * Dumping the information of a TFile.
 */
class TFileDumper {
  static final Log LOG = LogFactory.getLog(TFileDumper.class);

  private TFileDumper() {
    // namespace object not constructable.
  }

  private enum Align {
    LEFT, CENTER, RIGHT, ZERO_PADDED;
    static String format(String s, int width, Align align) {
      if (s.length() >= width) return s;
      int room = width - s.length();
      Align alignAdjusted = align;
      if (room == 1) {
        alignAdjusted = LEFT;
      }
      if (alignAdjusted == LEFT) {
        return s + String.format("%" + room + "s", "");
      }
      if (alignAdjusted == RIGHT) {
        return String.format("%" + room + "s", "") + s;
      }
      if (alignAdjusted == CENTER) {
        int half = room / 2;
        return String.format("%" + half + "s", "") + s
            + String.format("%" + (room - half) + "s", "");
      }
      throw new IllegalArgumentException("Unsupported alignment");
    }

    static String format(long l, int width, Align align) {
      if (align == ZERO_PADDED) {
        return String.format("%0" + width + "d", l);
      }
      return format(Long.toString(l), width, align);
    }

    static int calculateWidth(String caption, long max) {
      return Math.max(caption.length(), Long.toString(max).length());
    }
  }

  /**
   * Dump information about TFile.
   * 
   * @param file
   *          Path string of the TFile
   * @param out
   *          PrintStream to output the information.
   * @param conf
   *          The configuration object.
   * @throws IOException
   */
  static public void dumpInfo(String file, PrintStream out, Configuration conf)
      throws IOException {
    final int maxKeySampleLen = 16;
    Path path = new Path(file);
    FileSystem fs = path.getFileSystem(conf);
    long length = fs.getFileStatus(path).getLen();
    FSDataInputStream fsdis = fs.open(path);
    TFile.Reader reader = new TFile.Reader(fsdis, length, conf);
    try {
      LinkedHashMap properties =
          new LinkedHashMap();
      int blockCnt = reader.readerBCF.getBlockCount();
      int metaBlkCnt = reader.readerBCF.metaIndex.index.size();
      properties.put("BCFile Version", reader.readerBCF.version.toString());
      properties.put("TFile Version", reader.tfileMeta.version.toString());
      properties.put("File Length", Long.toString(length));
      properties.put("Data Compression", reader.readerBCF
          .getDefaultCompressionName());
      properties.put("Record Count", Long.toString(reader.getEntryCount()));
      properties.put("Sorted", Boolean.toString(reader.isSorted()));
      if (reader.isSorted()) {
        properties.put("Comparator", reader.getComparatorName());
      }
      properties.put("Data Block Count", Integer.toString(blockCnt));
      long dataSize = 0, dataSizeUncompressed = 0;
      if (blockCnt > 0) {
        for (int i = 0; i < blockCnt; ++i) {
          BlockRegion region =
              reader.readerBCF.dataIndex.getBlockRegionList().get(i);
          dataSize += region.getCompressedSize();
          dataSizeUncompressed += region.getRawSize();
        }
        properties.put("Data Block Bytes", Long.toString(dataSize));
        if (!reader.readerBCF.getDefaultCompressionName().equals("none")) {
          properties.put("Data Block Uncompressed Bytes", Long
              .toString(dataSizeUncompressed));
          properties.put("Data Block Compression Ratio", String.format(
              "1:%.1f", (double) dataSizeUncompressed / dataSize));
        }
      }

      properties.put("Meta Block Count", Integer.toString(metaBlkCnt));
      long metaSize = 0, metaSizeUncompressed = 0;
      if (metaBlkCnt > 0) {
        Collection metaBlks =
            reader.readerBCF.metaIndex.index.values();
        boolean calculateCompression = false;
        for (Iterator it = metaBlks.iterator(); it.hasNext();) {
          MetaIndexEntry e = it.next();
          metaSize += e.getRegion().getCompressedSize();
          metaSizeUncompressed += e.getRegion().getRawSize();
          if (e.getCompressionAlgorithm() != Compression.Algorithm.NONE) {
            calculateCompression = true;
          }
        }
        properties.put("Meta Block Bytes", Long.toString(metaSize));
        if (calculateCompression) {
          properties.put("Meta Block Uncompressed Bytes", Long
              .toString(metaSizeUncompressed));
          properties.put("Meta Block Compression Ratio", String.format(
              "1:%.1f", (double) metaSizeUncompressed / metaSize));
        }
      }
      properties.put("Meta-Data Size Ratio", String.format("1:%.1f",
          (double) dataSize / metaSize));
      long leftOverBytes = length - dataSize - metaSize;
      long miscSize =
          BCFile.Magic.size() * 2 + Long.SIZE / Byte.SIZE + Version.size();
      long metaIndexSize = leftOverBytes - miscSize;
      properties.put("Meta Block Index Bytes", Long.toString(metaIndexSize));
      properties.put("Headers Etc Bytes", Long.toString(miscSize));
      // Now output the properties table.
      int maxKeyLength = 0;
      Set> entrySet = properties.entrySet();
      for (Iterator> it = entrySet.iterator(); it
          .hasNext();) {
        Map.Entry e = it.next();
        if (e.getKey().length() > maxKeyLength) {
          maxKeyLength = e.getKey().length();
        }
      }
      for (Iterator> it = entrySet.iterator(); it
          .hasNext();) {
        Map.Entry e = it.next();
        out.printf("%s : %s%n", Align.format(e.getKey(), maxKeyLength,
            Align.LEFT), e.getValue());
      }
      out.println();
      reader.checkTFileDataIndex();
      if (blockCnt > 0) {
        String blkID = "Data-Block";
        int blkIDWidth = Align.calculateWidth(blkID, blockCnt);
        int blkIDWidth2 = Align.calculateWidth("", blockCnt);
        String offset = "Offset";
        int offsetWidth = Align.calculateWidth(offset, length);
        String blkLen = "Length";
        int blkLenWidth =
            Align.calculateWidth(blkLen, dataSize / blockCnt * 10);
        String rawSize = "Raw-Size";
        int rawSizeWidth =
            Align.calculateWidth(rawSize, dataSizeUncompressed / blockCnt * 10);
        String records = "Records";
        int recordsWidth =
            Align.calculateWidth(records, reader.getEntryCount() / blockCnt
                * 10);
        String endKey = "End-Key";
        int endKeyWidth = Math.max(endKey.length(), maxKeySampleLen * 2 + 5);

        out.printf("%s %s %s %s %s %s%n", Align.format(blkID, blkIDWidth,
            Align.CENTER), Align.format(offset, offsetWidth, Align.CENTER),
            Align.format(blkLen, blkLenWidth, Align.CENTER), Align.format(
                rawSize, rawSizeWidth, Align.CENTER), Align.format(records,
                recordsWidth, Align.CENTER), Align.format(endKey, endKeyWidth,
                Align.LEFT));

        for (int i = 0; i < blockCnt; ++i) {
          BlockRegion region =
              reader.readerBCF.dataIndex.getBlockRegionList().get(i);
          TFileIndexEntry indexEntry = reader.tfileIndex.getEntry(i);
          out.printf("%s %s %s %s %s ", Align.format(Align.format(i,
              blkIDWidth2, Align.ZERO_PADDED), blkIDWidth, Align.LEFT), Align
              .format(region.getOffset(), offsetWidth, Align.LEFT), Align
              .format(region.getCompressedSize(), blkLenWidth, Align.LEFT),
              Align.format(region.getRawSize(), rawSizeWidth, Align.LEFT),
              Align.format(indexEntry.kvEntries, recordsWidth, Align.LEFT));
          byte[] key = indexEntry.key;
          boolean asAscii = true;
          int sampleLen = Math.min(maxKeySampleLen, key.length);
          for (int j = 0; j < sampleLen; ++j) {
            byte b = key[j];
            if ((b < 32 && b != 9) || (b == 127)) {
              asAscii = false;
            }
          }
          if (!asAscii) {
            out.print("0X");
            for (int j = 0; j < sampleLen; ++j) {
              byte b = key[i];
              out.printf("%X", b);
            }
          } else {
            out.print(new String(key, 0, sampleLen, Charsets.UTF_8));
          }
          if (sampleLen < key.length) {
            out.print("...");
          }
          out.println();
        }
      }

      out.println();
      if (metaBlkCnt > 0) {
        String name = "Meta-Block";
        int maxNameLen = 0;
        Set> metaBlkEntrySet =
            reader.readerBCF.metaIndex.index.entrySet();
        for (Iterator> it =
            metaBlkEntrySet.iterator(); it.hasNext();) {
          Map.Entry e = it.next();
          if (e.getKey().length() > maxNameLen) {
            maxNameLen = e.getKey().length();
          }
        }
        int nameWidth = Math.max(name.length(), maxNameLen);
        String offset = "Offset";
        int offsetWidth = Align.calculateWidth(offset, length);
        String blkLen = "Length";
        int blkLenWidth =
            Align.calculateWidth(blkLen, metaSize / metaBlkCnt * 10);
        String rawSize = "Raw-Size";
        int rawSizeWidth =
            Align.calculateWidth(rawSize, metaSizeUncompressed / metaBlkCnt
                * 10);
        String compression = "Compression";
        int compressionWidth = compression.length();
        out.printf("%s %s %s %s %s%n", Align.format(name, nameWidth,
            Align.CENTER), Align.format(offset, offsetWidth, Align.CENTER),
            Align.format(blkLen, blkLenWidth, Align.CENTER), Align.format(
                rawSize, rawSizeWidth, Align.CENTER), Align.format(compression,
                compressionWidth, Align.LEFT));

        for (Iterator> it =
            metaBlkEntrySet.iterator(); it.hasNext();) {
          Map.Entry e = it.next();
          String blkName = e.getValue().getMetaName();
          BlockRegion region = e.getValue().getRegion();
          String blkCompression =
              e.getValue().getCompressionAlgorithm().getName();
          out.printf("%s %s %s %s %s%n", Align.format(blkName, nameWidth,
              Align.LEFT), Align.format(region.getOffset(), offsetWidth,
              Align.LEFT), Align.format(region.getCompressedSize(),
              blkLenWidth, Align.LEFT), Align.format(region.getRawSize(),
              rawSizeWidth, Align.LEFT), Align.format(blkCompression,
              compressionWidth, Align.LEFT));
        }
      }
    } finally {
      IOUtils.cleanup(LOG, reader, fsdis);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy