com.stratio.cassandra.lucene.IndexOptions.scala Maven / Gradle / Ivy
The newest version!
/*
* Copyright (C) 2014 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.lucene
import java.io.File
import java.nio.file.{Path, Paths}
import com.stratio.cassandra.lucene.IndexOptions._
import com.stratio.cassandra.lucene.partitioning.{Partitioner, PartitionerOnNone}
import com.stratio.cassandra.lucene.schema.{Schema, SchemaBuilder}
import com.stratio.cassandra.lucene.util.SchemaValidator
import org.apache.cassandra.db.Directories
import org.apache.cassandra.schema.{IndexMetadata, TableMetadata}
import scala.collection.JavaConverters._
/** Index user-specified configuration options parser.
*
* @param tableMetadata the indexed table metadata
* @param indexMetadata the index metadata
* @author Andres de la Pena `[email protected]`
*/
class IndexOptions(tableMetadata: TableMetadata, indexMetadata: IndexMetadata) {
val options = indexMetadata.options.asScala.toMap
/** The Lucene index searcher refresh frequency, in seconds */
val refreshSeconds = parseRefresh(options)
/** The Lucene's max RAM buffer size, in MB */
val ramBufferMB = parseRamBufferMB(options)
/** The Lucene's max segments merge size size, in MB */
val maxMergeMB = parseMaxMergeMB(options)
/** The Lucene's max cache size, in MB */
val maxCachedMB = parseMaxCachedMB(options)
/** The number of asynchronous indexing threads */
val indexingThreads = parseIndexingThreads(options)
/** The size of the asynchronous indexing queues */
val indexingQueuesSize = parseIndexingQueuesSize(options)
/** The names of the data centers excluded from indexing */
val excludedDataCenters = parseExcludedDataCenters(options)
/** The mapping schema */
val schema = parseSchema(options, tableMetadata)
/** The index partitioner */
val partitioner = parsePartitioner(options, tableMetadata)
/** The path of the directory where the index files will be stored */
val path = parsePath(options, tableMetadata, Some(indexMetadata))
/** If the index is sparse or not */
val sparse = parseSparse(options, tableMetadata)
}
/** Companion object for [[IndexOptions]]. */
object IndexOptions {
val REFRESH_SECONDS_OPTION = "refresh_seconds"
val DEFAULT_REFRESH_SECONDS = 60D
val RAM_BUFFER_MB_OPTION = "ram_buffer_mb"
val DEFAULT_RAM_BUFFER_MB = 64
val MAX_MERGE_MB_OPTION = "max_merge_mb"
val DEFAULT_MAX_MERGE_MB = 5
val MAX_CACHED_MB_OPTION = "max_cached_mb"
val DEFAULT_MAX_CACHED_MB = 30
val INDEXING_THREADS_OPTION = "indexing_threads"
val DEFAULT_INDEXING_THREADS = Runtime.getRuntime.availableProcessors
val INDEXING_QUEUES_SIZE_OPTION = "indexing_queues_size"
val DEFAULT_INDEXING_QUEUES_SIZE = 50
val EXCLUDED_DATA_CENTERS_OPTION = "excluded_data_centers"
val DEFAULT_EXCLUDED_DATA_CENTERS = List[String]()
val DIRECTORY_PATH_OPTION = "directory_path"
val INDEXES_DIR_NAME = "lucene"
val SCHEMA_OPTION = "schema"
val PARTITIONER_OPTION = "partitioner"
val DEFAULT_PARTITIONER = PartitionerOnNone()
val SPARSE_OPTION = "sparse"
val DEFAULT_SPARSE = false
/** Validates the specified index options.
*
* @param options the options to be validated
* @param metadata the indexed table metadata
*/
def validate(options: java.util.Map[String, String], metadata: TableMetadata) {
val o = options.asScala.toMap
parseRefresh(o)
parseRamBufferMB(o)
parseMaxMergeMB(o)
parseMaxCachedMB(o)
parseIndexingThreads(o)
parseIndexingQueuesSize(o)
parseExcludedDataCenters(o)
parseSchema(o, metadata)
parsePath(o, metadata, None)
parsePartitioner(o, metadata)
}
def parseRefresh(options: Map[String, String]): Double = {
parseStrictlyPositiveDouble(options, REFRESH_SECONDS_OPTION, DEFAULT_REFRESH_SECONDS)
}
def parseRamBufferMB(options: Map[String, String]): Int = {
parseStrictlyPositiveInt(options, RAM_BUFFER_MB_OPTION, DEFAULT_RAM_BUFFER_MB)
}
def parseMaxMergeMB(options: Map[String, String]): Int = {
parseStrictlyPositiveInt(options, MAX_MERGE_MB_OPTION, DEFAULT_MAX_MERGE_MB)
}
def parseMaxCachedMB(options: Map[String, String]): Int = {
parseStrictlyPositiveInt(options, MAX_CACHED_MB_OPTION, DEFAULT_MAX_CACHED_MB)
}
def parseIndexingThreads(options: Map[String, String]): Int = {
parseInt(options, INDEXING_THREADS_OPTION, DEFAULT_INDEXING_THREADS)
}
def parseIndexingQueuesSize(options: Map[String, String]): Int = {
parseStrictlyPositiveInt(options, INDEXING_QUEUES_SIZE_OPTION, DEFAULT_INDEXING_QUEUES_SIZE)
}
def parseExcludedDataCenters(options: Map[String, String]): List[String] = {
options
.get(EXCLUDED_DATA_CENTERS_OPTION)
.map(_.split(",").map(_.trim).filterNot(_.isEmpty).toList)
.getOrElse(DEFAULT_EXCLUDED_DATA_CENTERS)
}
def parsePath(
options: Map[String, String],
table: TableMetadata,
index: Option[IndexMetadata]): Path = {
options.get(DIRECTORY_PATH_OPTION).map(Paths.get(_)).getOrElse(
index.map(
index => {
val directories = new Directories(table)
val basePath = directories.getDirectoryForNewSSTables.getAbsolutePath
Paths.get(basePath + File.separator + INDEXES_DIR_NAME + File.separator + index.name)
}).orNull)
}
def parseSchema(options: Map[String, String], table: TableMetadata): Schema = {
options.get(SCHEMA_OPTION).map(
value => try {
val schema = SchemaBuilder.fromJson(value).build
SchemaValidator.validate(schema, table)
schema
} catch {
case e: Exception => throw new IndexException(e,
s"'$SCHEMA_OPTION' is invalid : ${e.getMessage}")
}).getOrElse(throw new IndexException(s"'$SCHEMA_OPTION' is required"))
}
def parsePartitioner(options: Map[String, String], table: TableMetadata): Partitioner = {
options.get(PARTITIONER_OPTION).map(
value => try {
Partitioner.fromJson(table, value)
} catch {
case e: Exception => throw new IndexException(e,
s"'$PARTITIONER_OPTION' is invalid : ${e.getMessage}")
}).getOrElse(DEFAULT_PARTITIONER)
}
def parseSparse(options: Map[String, String], table: TableMetadata): Boolean = {
options.get(SPARSE_OPTION).map(
value => try value.toBoolean catch {
case e: Exception => throw new IndexException(e,
s"'$SPARSE_OPTION' is invalid : ${e.getMessage}")
}).getOrElse(DEFAULT_SPARSE)
}
private def parseInt(options: Map[String, String], name: String, default: Int): Int = {
options.get(name).map(
string => try string.toInt catch {
case _: NumberFormatException =>
throw new IndexException(s"'$name' must be an integer, found: $string")
}).getOrElse(default)
}
private def parseStrictlyPositiveInt(
options: Map[String, String],
name: String,
default: Int): Int = {
options.get(name).map(
string => try string.toInt catch {
case _: NumberFormatException =>
throw new IndexException(s"'$name' must be a strictly positive integer, found: $string")
}).map(
integer => if (integer > 0) integer
else {
throw new IndexException(s"'$name' must be strictly positive, found: $integer")
}).getOrElse(default)
}
private def parseStrictlyPositiveDouble(
options: Map[String, String],
name: String,
default: Double): Double = {
options.get(name).map(
string => try string.toDouble catch {
case _: NumberFormatException =>
throw new IndexException(s"'$name' must be a strictly positive decimal, found: $string")
}).map(
double => if (double > 0) double
else {
throw new IndexException(s"'$name' must be strictly positive, found: $double")
}).getOrElse(default)
}
}