All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.scala Maven / Gradle / Ivy

There is a newer version: 2.0.0-alpha4
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.spark.datasources

import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.Logging
import org.apache.spark.sql.types._

/**
  * The ranges for the data type whose size is known. Whether the bound is inclusive
  * or exclusive is undefind, and upper to the caller to decide.
  *
  * @param low: the lower bound of the range.
  * @param upper: the upper bound of the range.
  */
@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
@InterfaceStability.Evolving
case class BoundRange(low: Array[Byte],upper: Array[Byte])

/**
  * The class identifies the ranges for a java primitive type. The caller needs
  * to decide the bound is either inclusive or exclusive on its own.
  * information
  *
  * @param less: the set of ranges for LessThan/LessOrEqualThan
  * @param greater: the set of ranges for GreaterThan/GreaterThanOrEqualTo
  * @param value: the byte array of the original value
  */
@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
@InterfaceStability.Evolving
case class BoundRanges(less: Array[BoundRange], greater: Array[BoundRange], value: Array[Byte])

/**
  * The trait to support plugin architecture for different encoder/decoder.
  * encode is used for serializing the data type to byte array and the filter is
  * used to filter out the unnecessary records.
  */
@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
@InterfaceStability.Evolving
trait BytesEncoder {
  def encode(dt: DataType, value: Any): Array[Byte]

  /**
    * The function performing real filtering operations. The format of filterBytes depends on the
    * implementation of the BytesEncoder.
    *
    * @param input: the current input byte array that needs to be filtered out
    * @param offset1: the starting offset of the input byte array.
    * @param length1: the length of the input byte array.
    * @param filterBytes: the byte array provided by query condition.
    * @param offset2: the starting offset in the filterBytes.
    * @param length2: the length of the bytes in the filterBytes
    * @param ops: The operation of the filter operator.
    * @return true: the record satisfies the predicates
    *         false: the record does not satisfy the predicates.
    */
  def filter(input: Array[Byte], offset1: Int, length1: Int,
             filterBytes: Array[Byte], offset2: Int, length2: Int,
             ops: JavaBytesEncoder): Boolean

  /**
    * Currently, it is used for partition pruning.
    * As for some codec, the order may be inconsistent between java primitive
    * type and its byte array. We may have to  split the predicates on some
    * of the java primitive type into multiple predicates.
    *
    * For example in naive codec,  some of the java primitive types have to be
    * split into multiple predicates, and union these predicates together to
    * make the predicates be performed correctly.
    * For example, if we have "COLUMN < 2", we will transform it into
    * "0 <= COLUMN < 2 OR Integer.MIN_VALUE <= COLUMN <= -1"
    */
  def ranges(in: Any): Option[BoundRanges]
}

@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
@InterfaceStability.Evolving
object JavaBytesEncoder extends Enumeration with Logging{
  type JavaBytesEncoder = Value
  val Greater, GreaterEqual, Less, LessEqual, Equal, Unknown = Value

  /**
    * create the encoder/decoder
    *
    * @param clsName: the class name of the encoder/decoder class
    * @return the instance of the encoder plugin.
    */
  def create(clsName: String): BytesEncoder = {
    try {
      Class.forName(clsName).newInstance.asInstanceOf[BytesEncoder]
    } catch {
      case _: Throwable =>
        logWarning(s"$clsName cannot be initiated, falling back to naive encoder")
        new NaiveEncoder()
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy