All Downloads are FREE. Search and download functionalities are using the official Maven repository.

parquet.column.ParquetProperties Maven / Gradle / Ivy

There is a newer version: 1.6.0
Show newest version
package parquet.column;

import parquet.bytes.BytesUtils;
import parquet.column.values.ValuesWriter;
import parquet.column.values.boundedint.DevNullValuesWriter;
import parquet.column.values.delta.DeltaBinaryPackingValuesWriter;
import parquet.column.values.deltastrings.DeltaByteArrayWriter;
import parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter;
import parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter;
import parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter;
import parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter;
import parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter;
import parquet.column.values.dictionary.DictionaryValuesWriter.PlainFixedLenArrayDictionaryValuesWriter;
import parquet.column.values.plain.BooleanPlainValuesWriter;
import parquet.column.values.plain.FixedLenByteArrayPlainValuesWriter;
import parquet.column.values.plain.PlainValuesWriter;
import parquet.column.values.rle.RunLengthBitPackingHybridValuesWriter;

/**
 * This class represents all the configurable Parquet properties.
 * 
 * @author amokashi
 *
 */
public class ParquetProperties {
  
  public enum WriterVersion {
    PARQUET_1_0 ("v1"),
    PARQUET_2_0 ("v2");
    
    private final String shortName;
    
    WriterVersion(String shortname) {
      this.shortName = shortname;
    }
    
    public static WriterVersion fromString(String name) {
      for(WriterVersion v : WriterVersion.values()) {
        if (v.shortName.equals(name)) {
          return v;
        }
      }
      // Throws IllegalArgumentException if name does not exact match with enum name
      return WriterVersion.valueOf(name);
    }
  }
  private final int dictionaryPageSizeThreshold;
  private final WriterVersion writerVersion;
  private final boolean enableDictionary;

  public ParquetProperties(int dictPageSize, WriterVersion writerVersion, boolean enableDict) {
    this.dictionaryPageSizeThreshold = dictPageSize;
    this.writerVersion = writerVersion;
    this.enableDictionary = enableDict;
  }
  
  public static ValuesWriter getColumnDescriptorValuesWriter(int maxLevel,  int initialSizePerCol) {
    if (maxLevel == 0) {
      return new DevNullValuesWriter();
    } else {
      return new RunLengthBitPackingHybridValuesWriter(
          BytesUtils.getWidthFromMaxInt(maxLevel), initialSizePerCol);
    }
  }

  public ValuesWriter getValuesWriter(ColumnDescriptor path, int initialSizePerCol) {
    switch (path.getType()) {
    case BOOLEAN:
      if(writerVersion == WriterVersion.PARQUET_1_0) {
        return new BooleanPlainValuesWriter();
      } else if (writerVersion == WriterVersion.PARQUET_2_0) {
        return new RunLengthBitPackingHybridValuesWriter(1, initialSizePerCol);
      }
      break;
    case BINARY:
      if(enableDictionary) {
        return new PlainBinaryDictionaryValuesWriter(dictionaryPageSizeThreshold, initialSizePerCol);
      } else {
        if (writerVersion == WriterVersion.PARQUET_1_0) {
          return new PlainValuesWriter(initialSizePerCol);
        } else if (writerVersion == WriterVersion.PARQUET_2_0) {
          return new DeltaByteArrayWriter(initialSizePerCol);
        } 
      }
      break;
    case INT32:
      if(enableDictionary) {
        return new PlainIntegerDictionaryValuesWriter(dictionaryPageSizeThreshold, initialSizePerCol);
      } else {
        if(writerVersion == WriterVersion.PARQUET_1_0) {
          return new PlainValuesWriter(initialSizePerCol);
        } else if(writerVersion == WriterVersion.PARQUET_2_0) {
          return new DeltaBinaryPackingValuesWriter(initialSizePerCol);
        }
      }
      break;
    case INT64:
      if(enableDictionary) {
        return new PlainLongDictionaryValuesWriter(dictionaryPageSizeThreshold, initialSizePerCol);
      } else {
        return new PlainValuesWriter(initialSizePerCol);
      }
    case INT96:
      if (enableDictionary) {
        return new PlainFixedLenArrayDictionaryValuesWriter(dictionaryPageSizeThreshold, initialSizePerCol, 12);
      } else {
        return new FixedLenByteArrayPlainValuesWriter(12, initialSizePerCol);
      }
    case DOUBLE:
      if(enableDictionary) {
        return new PlainDoubleDictionaryValuesWriter(dictionaryPageSizeThreshold, initialSizePerCol);
      } else {
        return new PlainValuesWriter(initialSizePerCol);
      }
    case FLOAT:
      if(enableDictionary) {
        return new PlainFloatDictionaryValuesWriter(dictionaryPageSizeThreshold, initialSizePerCol);
      } else {
        return new PlainValuesWriter(initialSizePerCol);
      }
    case FIXED_LEN_BYTE_ARRAY:
      if (enableDictionary && (writerVersion == WriterVersion.PARQUET_2_0)) {
        return new PlainFixedLenArrayDictionaryValuesWriter(dictionaryPageSizeThreshold, initialSizePerCol, path.getTypeLength());
      } else {
        return new FixedLenByteArrayPlainValuesWriter(path.getTypeLength(), initialSizePerCol);
      }
    default:
      return new PlainValuesWriter(initialSizePerCol);
    }
    return null;
  }

  public int getDictionaryPageSizeThreshold() {
    return dictionaryPageSizeThreshold;
  }

  public WriterVersion getWriterVersion() {
    return writerVersion;
  }

  public boolean isEnableDictionary() {
    return enableDictionary;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy