All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.parquet.column.values.dictionary.PlainValuesDictionary Maven / Gradle / Ivy

There is a newer version: 1.15.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.column.values.dictionary;

import static org.apache.parquet.bytes.BytesUtils.readIntLittleEndian;
import static org.apache.parquet.column.Encoding.PLAIN;
import static org.apache.parquet.column.Encoding.PLAIN_DICTIONARY;

import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.parquet.Preconditions;
import org.apache.parquet.bytes.ByteBufferInputStream;
import org.apache.parquet.column.Dictionary;
import org.apache.parquet.column.page.DictionaryPage;
import org.apache.parquet.column.values.plain.PlainValuesReader.DoublePlainValuesReader;
import org.apache.parquet.column.values.plain.PlainValuesReader.FloatPlainValuesReader;
import org.apache.parquet.column.values.plain.PlainValuesReader.IntegerPlainValuesReader;
import org.apache.parquet.column.values.plain.PlainValuesReader.LongPlainValuesReader;
import org.apache.parquet.io.ParquetDecodingException;
import org.apache.parquet.io.api.Binary;

/**
 * a simple implementation of dictionary for plain encoded values
 */
public abstract class PlainValuesDictionary extends Dictionary {

  /**
   * @param dictionaryPage the PLAIN encoded content of the dictionary
   * @throws IOException if there is an exception while decoding the dictionary page
   */
  protected PlainValuesDictionary(DictionaryPage dictionaryPage) throws IOException {
    super(dictionaryPage.getEncoding());
    if (dictionaryPage.getEncoding() != PLAIN_DICTIONARY && dictionaryPage.getEncoding() != PLAIN) {
      throw new ParquetDecodingException(
          "Dictionary data encoding type not supported: " + dictionaryPage.getEncoding());
    }
  }

  /**
   * a simple implementation of dictionary for plain encoded binary
   */
  public static class PlainBinaryDictionary extends PlainValuesDictionary {

    private Binary[] binaryDictionaryContent = null;

    /**
     * Decodes {@link Binary} values from a {@link DictionaryPage}.
     * 

* Values are read as length-prefixed values with a 4-byte little-endian * length. * * @param dictionaryPage a {@code DictionaryPage} of encoded binary values * @throws IOException if there is an exception while decoding the dictionary page */ public PlainBinaryDictionary(DictionaryPage dictionaryPage) throws IOException { this(dictionaryPage, null); } /** * Decodes {@link Binary} values from a {@link DictionaryPage}. *

* If the given {@code length} is null, the values will be read as length- * prefixed values with a 4-byte little-endian length. If length is not * null, it will be used as the length for all fixed-length {@code Binary} * values read from the page. * * @param dictionaryPage a {@code DictionaryPage} of encoded binary values * @param length a fixed length of binary arrays, or null if not fixed * @throws IOException if there is an exception while decoding the dictionary page */ public PlainBinaryDictionary(DictionaryPage dictionaryPage, Integer length) throws IOException { super(dictionaryPage); final ByteBuffer dictionaryBytes = dictionaryPage.getBytes().toByteBuffer(); binaryDictionaryContent = new Binary[dictionaryPage.getDictionarySize()]; // dictionary values are stored in order: size (4 bytes LE) followed by {size} bytes int offset = dictionaryBytes.position(); if (length == null) { // dictionary values are stored in order: size (4 bytes LE) followed by {size} bytes for (int i = 0; i < binaryDictionaryContent.length; i++) { int len = readIntLittleEndian(dictionaryBytes, offset); // read the length offset += 4; // wrap the content in a binary binaryDictionaryContent[i] = Binary.fromConstantByteBuffer(dictionaryBytes, offset, len); // increment to the next value offset += len; } } else { // dictionary values are stored as fixed-length arrays Preconditions.checkArgument(length > 0, "Invalid byte array length: %s", length); for (int i = 0; i < binaryDictionaryContent.length; i++) { // wrap the content in a Binary binaryDictionaryContent[i] = Binary.fromConstantByteBuffer(dictionaryBytes, offset, length); // increment to the next value offset += length; } } } @Override public Binary decodeToBinary(int id) { return binaryDictionaryContent[id]; } @Override public String toString() { StringBuilder sb = new StringBuilder("PlainBinaryDictionary {\n"); for (int i = 0; i < binaryDictionaryContent.length; i++) { sb.append(i).append(" => ").append(binaryDictionaryContent[i]).append("\n"); } return sb.append("}").toString(); } @Override public int getMaxId() { return binaryDictionaryContent.length - 1; } } /** * a simple implementation of dictionary for plain encoded long values */ public static class PlainLongDictionary extends PlainValuesDictionary { private long[] longDictionaryContent = null; /** * @param dictionaryPage a dictionary page of encoded long values * @throws IOException if there is an exception while decoding the dictionary page */ public PlainLongDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage); ByteBufferInputStream in = dictionaryPage.getBytes().toInputStream(); longDictionaryContent = new long[dictionaryPage.getDictionarySize()]; LongPlainValuesReader longReader = new LongPlainValuesReader(); longReader.initFromPage(dictionaryPage.getDictionarySize(), in); for (int i = 0; i < longDictionaryContent.length; i++) { longDictionaryContent[i] = longReader.readLong(); } } @Override public long decodeToLong(int id) { return longDictionaryContent[id]; } @Override public String toString() { StringBuilder sb = new StringBuilder("PlainLongDictionary {\n"); for (int i = 0; i < longDictionaryContent.length; i++) { sb.append(i).append(" => ").append(longDictionaryContent[i]).append("\n"); } return sb.append("}").toString(); } @Override public int getMaxId() { return longDictionaryContent.length - 1; } } /** * a simple implementation of dictionary for plain encoded double values */ public static class PlainDoubleDictionary extends PlainValuesDictionary { private double[] doubleDictionaryContent = null; /** * @param dictionaryPage a dictionary page of encoded double values * @throws IOException if there is an exception while decoding the dictionary page */ public PlainDoubleDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage); ByteBufferInputStream in = dictionaryPage.getBytes().toInputStream(); doubleDictionaryContent = new double[dictionaryPage.getDictionarySize()]; DoublePlainValuesReader doubleReader = new DoublePlainValuesReader(); doubleReader.initFromPage(dictionaryPage.getDictionarySize(), in); for (int i = 0; i < doubleDictionaryContent.length; i++) { doubleDictionaryContent[i] = doubleReader.readDouble(); } } @Override public double decodeToDouble(int id) { return doubleDictionaryContent[id]; } @Override public String toString() { StringBuilder sb = new StringBuilder("PlainDoubleDictionary {\n"); for (int i = 0; i < doubleDictionaryContent.length; i++) { sb.append(i).append(" => ").append(doubleDictionaryContent[i]).append("\n"); } return sb.append("}").toString(); } @Override public int getMaxId() { return doubleDictionaryContent.length - 1; } } /** * a simple implementation of dictionary for plain encoded integer values */ public static class PlainIntegerDictionary extends PlainValuesDictionary { private int[] intDictionaryContent = null; /** * @param dictionaryPage a dictionary page of encoded integer values * @throws IOException if there is an exception while decoding the dictionary page */ public PlainIntegerDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage); ByteBufferInputStream in = dictionaryPage.getBytes().toInputStream(); intDictionaryContent = new int[dictionaryPage.getDictionarySize()]; IntegerPlainValuesReader intReader = new IntegerPlainValuesReader(); intReader.initFromPage(dictionaryPage.getDictionarySize(), in); for (int i = 0; i < intDictionaryContent.length; i++) { intDictionaryContent[i] = intReader.readInteger(); } } @Override public int decodeToInt(int id) { return intDictionaryContent[id]; } @Override public String toString() { StringBuilder sb = new StringBuilder("PlainIntegerDictionary {\n"); for (int i = 0; i < intDictionaryContent.length; i++) { sb.append(i).append(" => ").append(intDictionaryContent[i]).append("\n"); } return sb.append("}").toString(); } @Override public int getMaxId() { return intDictionaryContent.length - 1; } } /** * a simple implementation of dictionary for plain encoded float values */ public static class PlainFloatDictionary extends PlainValuesDictionary { private float[] floatDictionaryContent = null; /** * @param dictionaryPage a dictionary page of encoded float values * @throws IOException if there is an exception while decoding the dictionary page */ public PlainFloatDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage); ByteBufferInputStream in = dictionaryPage.getBytes().toInputStream(); floatDictionaryContent = new float[dictionaryPage.getDictionarySize()]; FloatPlainValuesReader floatReader = new FloatPlainValuesReader(); floatReader.initFromPage(dictionaryPage.getDictionarySize(), in); for (int i = 0; i < floatDictionaryContent.length; i++) { floatDictionaryContent[i] = floatReader.readFloat(); } } @Override public float decodeToFloat(int id) { return floatDictionaryContent[id]; } @Override public String toString() { StringBuilder sb = new StringBuilder("PlainFloatDictionary {\n"); for (int i = 0; i < floatDictionaryContent.length; i++) { sb.append(i).append(" => ").append(floatDictionaryContent[i]).append("\n"); } return sb.append("}").toString(); } @Override public int getMaxId() { return floatDictionaryContent.length - 1; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy