All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.comet.vector.CometDictionary Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.comet.vector;

import org.apache.comet.shaded.arrow.vector.ValueVector;
import org.apache.spark.unsafe.types.UTF8String;

/** A dictionary which maps indices (integers) to values. */
public class CometDictionary implements AutoCloseable {
  private static final int DECIMAL_BYTE_WIDTH = 16;

  private CometPlainVector values;
  private final int numValues;

  /** Decoded dictionary values. We only need to copy values for decimal type. */
  private volatile ByteArrayWrapper[] binaries;

  public CometDictionary(CometPlainVector values) {
    this.values = values;
    this.numValues = values.numValues();
  }

  public void setDictionaryVector(CometPlainVector values) {
    this.values = values;
    if (values.numValues() != numValues) {
      throw new IllegalArgumentException("Mismatched dictionary size");
    }
  }

  public ValueVector getValueVector() {
    return values.getValueVector();
  }

  public boolean decodeToBoolean(int index) {
    return values.getBoolean(index);
  }

  public byte decodeToByte(int index) {
    return values.getByte(index);
  }

  public short decodeToShort(int index) {
    return values.getShort(index);
  }

  public int decodeToInt(int index) {
    return values.getInt(index);
  }

  public long decodeToLong(int index) {
    return values.getLong(index);
  }

  public long decodeToLongDecimal(int index) {
    return values.getLongDecimal(index);
  }

  public float decodeToFloat(int index) {
    return values.getFloat(index);
  }

  public double decodeToDouble(int index) {
    return values.getDouble(index);
  }

  public byte[] decodeToBinary(int index) {
    switch (values.getValueVector().getMinorType()) {
      case VARBINARY:
      case FIXEDSIZEBINARY:
        return values.getBinary(index);
      case DECIMAL:
        if (binaries == null) {
          // We only need to copy values for decimal 128 type as random access
          // to the dictionary is not efficient for decimal (it needs to copy
          // the value to a new byte array everytime).
          ByteArrayWrapper[] binaries = new ByteArrayWrapper[numValues];
          for (int i = 0; i < numValues; i++) {
            // Need copying here since we re-use byte array for decimal
            byte[] bytes = new byte[DECIMAL_BYTE_WIDTH];
            bytes = values.copyBinaryDecimal(i, bytes);
            binaries[i] = new ByteArrayWrapper(bytes);
          }
          this.binaries = binaries;
        }
        return binaries[index].bytes;
      default:
        throw new IllegalArgumentException(
            "Invalid Arrow minor type: " + values.getValueVector().getMinorType());
    }
  }

  public UTF8String decodeToUTF8String(int index) {
    return values.getUTF8String(index);
  }

  @Override
  public void close() {
    values.close();
  }

  private static class ByteArrayWrapper {
    private final byte[] bytes;

    ByteArrayWrapper(byte[] bytes) {
      this.bytes = bytes;
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy