com.stratio.cassandra.lucene.mapping.TokenMapper.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cassandra-lucene-index-plugin Show documentation
Cassandra Lucene Index plugin
The newest version!
/*
 * Copyright (C) 2014 Stratio (http://stratio.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.stratio.cassandra.lucene.mapping

import com.stratio.cassandra.lucene.IndexException
import com.stratio.cassandra.lucene.mapping.TokenMapper._
import org.apache.cassandra.config.DatabaseDescriptor
import org.apache.cassandra.db.DecoratedKey
import org.apache.cassandra.dht.{Murmur3Partitioner, Token}
import org.apache.lucene.document.{FieldType, LongField}
import org.apache.lucene.index.{DocValuesType, IndexOptions, IndexableField, Term}
import org.apache.lucene.search._
import org.apache.lucene.util.{BytesRef, BytesRefBuilder, NumericUtils}

/** Class for several token mappings between Cassandra and Lucene.
  *
  * @author Andres de la Pena `[email protected]`
  */
class TokenMapper {

  if (!DatabaseDescriptor.getPartitioner.isInstanceOf[Murmur3Partitioner]) {
    throw new IndexException("Only Murmur3 partitioner is supported")
  }

  /** Returns the Lucene [[IndexableField]] associated to the token of the specified row key.
    *
    * @param key the raw partition key to be added
    * @return a indexable field
    */
  def indexableField(key: DecoratedKey): IndexableField = {
    val token = key.getToken
    val value = longValue(token)
    new LongField(FIELD_NAME, value, FIELD_TYPE)
  }

  /** Returns a Lucene [[SortField]] for sorting documents according to the partitioner's order.
    *
    * @return a sort field for sorting by token
    */
  def sortField: SortField = {
    new SortField(FIELD_NAME, SortField.Type.LONG)
  }

  /** Returns a query to find the documents containing a token inside the specified token range.
    *
    * @param lower        the lower token
    * @param upper        the upper token
    * @param includeLower if the lower token should be included
    * @param includeUpper if the upper token should be included
    * @return the query to find the documents containing a token inside the range
    */
  def query(
      lower: Token,
      upper: Token,
      includeLower: Boolean,
      includeUpper: Boolean): Option[Query] = {

    // Skip if it's full data range
    if (lower.isMinimum && upper.isMinimum) return None

    // Get token values
    val min: java.lang.Long = if (lower.isMinimum) Long.MinValue else longValue(lower)
    val max: java.lang.Long = if (upper.isMinimum) Long.MaxValue else longValue(upper)

    // Do query using doc values or inverted index depending on empirical heuristic
    if (max / 10 - min / 10 > 1222337203685480000L) {
      Some(DocValuesRangeQuery.newLongRange(FIELD_NAME, min, max, includeLower, includeUpper))
    } else {
      Some(NumericRangeQuery.newLongRange(FIELD_NAME, min, max, includeLower, includeUpper))
    }
  }

  /** Returns a Lucene query to find the documents containing the specified token.
    *
    * @param token the token
    * @return the query to find the documents containing `token`
    */
  def query(token: Token): Query = {
    new TermQuery(new Term(FIELD_NAME, bytesRef(token)))
  }

}

/** Companion object for [[TokenMapper]]. */
object TokenMapper {

  /** The Lucene field name */
  val FIELD_NAME = "_token"

  /** The Lucene field type */
  val FIELD_TYPE = new FieldType
  FIELD_TYPE.setTokenized(true)
  FIELD_TYPE.setOmitNorms(true)
  FIELD_TYPE.setIndexOptions(IndexOptions.DOCS)
  FIELD_TYPE.setNumericType(FieldType.NumericType.LONG)
  FIELD_TYPE.setDocValuesType(DocValuesType.NUMERIC)
  FIELD_TYPE.freeze()

  /** Returns the `Long` value of the specified Murmur3 partitioning [[Token]].
    *
    * @param token a Murmur3 token
    * @return the `token`'s `Long` value
    */
  def longValue(token: Token): Long = {
    token.getTokenValue.asInstanceOf[Long]
  }

  /** Returns the [[BytesRef]] indexing value of the specified Murmur3 partitioning [[Token]].
    *
    * @param token a Murmur3 token
    * @return the `token`'s indexing value
    */
  def bytesRef(token: Token): BytesRef = {
    val value = longValue(token)
    val bytesRefBuilder = new BytesRefBuilder
    NumericUtils.longToPrefixCoded(value, 0, bytesRefBuilder)
    bytesRefBuilder.get
  }
}