All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.hive.orc.FileDump Maven / Gradle / Ivy

There is a newer version: 0.18.9
Show newest version
//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.hive.orc;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;

/**
 * A tool for printing out the file structure of ORC files.
 */
public final class FileDump {

  // not used
  private FileDump() {}

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    for(String filename: args) {
      System.out.println("Structure for " + filename);
      Path path = new Path(filename);
      ReaderWriterProfiler.setProfilerOptions(conf);
      Reader reader = OrcFile.createReader(path.getFileSystem(conf), path, conf);
      RecordReaderImpl rows = (RecordReaderImpl) reader.rows(null);
      System.out.println("Rows: " + reader.getNumberOfRows());
      System.out.println("Compression: " + reader.getCompression());
      if (reader.getCompression() != CompressionKind.NONE) {
        System.out.println("Compression size: " + reader.getCompressionSize());
      }
      System.out.println("Raw data size: " + reader.getRawDataSize());
      System.out.println("Type: " + reader.getObjectInspector().getTypeName());
      ColumnStatistics[] stats = reader.getStatistics();
      System.out.println("\nStatistics:");
      for(int i=0; i < stats.length; ++i) {
        System.out.println("  Column " + i + ": " + stats[i].toString());
      }
      System.out.println("\nStripes:");
      for(StripeInformation stripe: reader.getStripes()) {
        long stripeStart = stripe.getOffset();
        System.out.println("  Stripe: " + stripe.toString());
        OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
        long sectionStart = stripeStart;
        for(OrcProto.Stream section: footer.getStreamsList()) {
          System.out.println("    Stream: column " + section.getColumn() +
            " section " + section.getKind() + " start: " + sectionStart +
            " length " + section.getLength());
          sectionStart += section.getLength();
        }
        for(int i=0; i < footer.getColumnsCount(); ++i) {
          OrcProto.ColumnEncoding encoding = footer.getColumns(i);
          StringBuilder buf = new StringBuilder();
          buf.append("    Encoding column ");
          buf.append(i);
          buf.append(": ");
          buf.append(encoding.getKind());
          if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY) {
            buf.append("[");
            buf.append(encoding.getDictionarySize());
            buf.append("]");
          }
          System.out.println(buf);
        }
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy