All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.stratio.cassandra.lucene.partitioning.PartitionerOnColumn.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2014 Stratio (http://stratio.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.stratio.cassandra.lucene.partitioning

import java.nio.ByteBuffer

import com.fasterxml.jackson.annotation.JsonProperty
import com.stratio.cassandra.lucene.IndexException
import com.stratio.cassandra.lucene.util.ByteBufferUtils
import org.apache.cassandra.db.PartitionPosition.Kind.ROW_KEY
import org.apache.cassandra.db._
import org.apache.cassandra.db.marshal.{AbstractType, UTF8Type}
import org.apache.cassandra.schema.TableMetadata
import org.apache.cassandra.utils.MurmurHash
import org.apache.commons.lang3.StringUtils

import scala.collection.JavaConverters._

/** [[Partitioner]] based on a partition key column. Rows will be stored in an index partition
  * determined by the hash of the specified partition key column. Both partition-directed and token
  * range searches containing an CQL equality filter over the selected partition key column will be
  * routed to a single partition, increasing performance. However, token range searches without
  * filters over the partitioning column will be routed to all the partitions, with a slightly lower
  * performance.
  *
  * Load balancing depends on the cardinality and distribution of the values of the partitioning
  * column. Both high cardinalities and uniform distributions will provide better load balancing
  * between partitions.
  *
  * @param partitions   the number of index partitions per node
  * @param column       the name of the partition key column
  * @param position     the position of the partition column in the partition key
  * @param keyValidator the type of the partition key
  * @author Andres de la Pena `[email protected]`
  */
case class PartitionerOnColumn(
    partitions: Int,
    column: String,
    position: Int,
    keyValidator: AbstractType[_]) extends Partitioner {

  if (partitions <= 0) throw new IndexException(
    s"The number of partitions should be strictly positive but found $partitions")

  if (StringUtils.isBlank(column)) throw new IndexException(
    s"A partition column should be specified")

  if (position < 0) throw new IndexException(
    s"The column position in the partition key should be positive")

  if (keyValidator == null) throw new IndexException(
    s"The partition key type should be specified")

  private def partition(bb: ByteBuffer): Int = {
    val hash = new Array[Long](2)
    MurmurHash.hash3_x64_128(bb, bb.position, bb.remaining, 0, hash)
    (Math.abs(hash(0)) % partitions).toInt
  }

  /** @inheritdoc */
  override def numPartitions: Int = partitions

  /** @inheritdoc */
  override def partition(key: DecoratedKey): Int =
    partition(ByteBufferUtils.split(key.getKey, keyValidator)(position))

  /** @inheritdoc */
  override def partitions(command: ReadCommand): List[Int] = command match {
    case c: SinglePartitionReadCommand => List(partition(c.partitionKey))
    case c: PartitionRangeReadCommand =>
      val range = c.dataRange
      val start = range.startKey
      val stop = range.stopKey
      if (start.kind == ROW_KEY && stop.kind == ROW_KEY && !start.isMinimum && start.equals(stop)) {
        List(partition(start.asInstanceOf[DecoratedKey]))
      } else {
        val expressions = command.rowFilter.getExpressions.asScala
        val expression = expressions.filter(!_.isCustom).find(_.column.name.toString == column)
        expression.map(_.getIndexValue).map(partition).map(List(_)).getOrElse(allPartitions)
      }
    case _ => throw new IndexException(s"Unsupported read command type: ${command.getClass}")
  }

}

/** Companion object for [[PartitionerOnColumn]]. */
object PartitionerOnColumn {

  /** [[PartitionerOnColumn]] builder.
    *
    * @param partitions the number of index partitions per node
    * @param column     the name of the partition key column
    */
  case class Builder(
      @JsonProperty("partitions") partitions: Int,
      @JsonProperty("column") column: String)
    extends Partitioner.Builder {
    override def build(metadata: TableMetadata): PartitionerOnColumn = {
      val name = UTF8Type.instance.decompose(column)
      metadata.getColumn(name) match {
        case null =>
          throw new IndexException(s"Partitioner's column '$column' not found in table schema")
        case d if d.isPartitionKey =>
          PartitionerOnColumn(partitions, column, d.position(), metadata.partitionKeyType)
        case _ =>
          throw new IndexException(s"Partitioner's column '$column' is not part of partition key")
      }
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy