All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.lucidworks.spark.SolrConf.scala Maven / Gradle / Ivy

package com.lucidworks.spark

import com.lucidworks.spark.util.ConfigurationConstants._
import com.lucidworks.spark.util.QueryConstants._
import com.lucidworks.spark.util.SolrRelationUtil
import org.apache.commons.lang3.StringUtils
import org.apache.solr.common.params.ModifiableSolrParams

class SolrConf(config: Map[String, String]) extends Serializable with LazyLogging {

  require(config != null, "Config cannot be null")
  require(config.nonEmpty, "Config cannot be empty")

  var zkHostFromDriverEnv : Option[String] = None

  def getZkHost: Option[String] = {
    if (zkHostFromDriverEnv.isDefined) return zkHostFromDriverEnv
    if (config.contains(SOLR_ZK_HOST_PARAM)) return config.get(SOLR_ZK_HOST_PARAM)

    // allow users to set the zkhost using a Java system property or env property
    val zkHostSysProp = System.getProperty("solr.zkhost", System.getenv("SOLR_ZKHOST"))
    if (zkHostSysProp != null) {
      // stash it for later use in executors
      zkHostFromDriverEnv = Some(zkHostSysProp)
      return zkHostFromDriverEnv
    }

    None
  }

  def getCollection: Option[String] = config.get(SOLR_COLLECTION_PARAM)

  def getCollectionAlias: Option[String] = config.get(COLLECTION_ALIAS)

  def getQuery: Option[String] = config.get(SOLR_QUERY_PARAM)

  def getStreamingExpr: Option[String] = config.get(SOLR_STREAMING_EXPR)

  def getSqlStmt: Option[String] = config.get(SOLR_SQL_STMT)

  def getSplitField: Option[String] = config.get(SOLR_SPLIT_FIELD_PARAM)

  def getFields: Array[String] =
    if (config.contains(SOLR_FIELD_PARAM) && !StringUtils.equals(config(SOLR_FIELD_PARAM), "*"))
      config(SOLR_FIELD_PARAM).split(",").map(_.trim)
    else
      Array.empty

  def getFilters: List[String] =
    if (config.contains(SOLR_FILTERS_PARAM))
      SolrRelationUtil.parseCommaSeparatedValuesToList(config(SOLR_FILTERS_PARAM))
    else List.empty

  def partitionBy: Option[String] = config.get(PARTITION_BY)

  def getTimestampFieldName: Option[String] = config.get(TIMESTAMP_FIELD_NAME)

  def getTimePeriod: Option[String] = config.get(TIME_PERIOD)

  def getDateTimePattern: Option[String] = config.get(DATETIME_PATTERN)

  def getTimeZoneId: Option[String] = config.get(TIMEZONE_ID)

  def getMaxActivePartitions: Option[String]= config.get(MAX_ACTIVE_PARTITIONS)

  def getSort: Option[String] = config.get(SORT_PARAM)

  def getStreamingExpressionSchema: Option[String] = config.get(STREAMING_EXPR_SCHEMA)

  def getSolrSQLSchema: Option[String] = config.get(SOLR_SQL_SCHEMA)

  def getExcludeFields: Option[String] = config.get(EXCLUDE_FIELDS)

  def getChildDocFieldName: Option[String] = config.get(CHILD_DOC_FIELDNAME)

  def skipNonDocValueFields: Option[Boolean] =
    if (config.contains(SKIP_NON_DOCVALUE_FIELDS)) Some(config(SKIP_NON_DOCVALUE_FIELDS).toBoolean) else None

  def maxRows: Option[Int] =
    if (config.contains(MAX_ROWS)) Some(config(MAX_ROWS).toInt) else None

  def getAccumulatorName: Option[String] = config.get(ACCUMULATOR_NAME)

  def getRows: Option[Int] =
    if (config.contains(SOLR_ROWS_PARAM)) Some(config(SOLR_ROWS_PARAM).toInt) else None

  def splits: Option[Boolean] =
    if (config.contains(SOLR_DO_SPLITS)) Some(config(SOLR_DO_SPLITS).toBoolean) else None

  def docValues: Option[Boolean] =
    if (config.contains(SOLR_DOC_VALUES)) Some(config(SOLR_DOC_VALUES).toBoolean) else None

  def getSplitsPerShard: Option[Int] =
    if (config.contains(SOLR_SPLITS_PER_SHARD_PARAM)) Some(config(SOLR_SPLITS_PER_SHARD_PARAM).toInt) else Some(1)

  def escapeFieldNames: Option[Boolean] =
    if (config.contains(ESCAPE_FIELDNAMES_PARAM)) Some(config(ESCAPE_FIELDNAMES_PARAM).toBoolean) else None

  def flattenMultivalued: Option[Boolean] =
    if (config.contains(FLATTEN_MULTIVALUED)) Some(config(FLATTEN_MULTIVALUED).toBoolean) else None

  def softAutoCommitSecs: Option[Int] =
    if (config.contains(SOFT_AUTO_COMMIT_SECS)) Some(config(SOFT_AUTO_COMMIT_SECS).toInt) else None

  def commitWithin: Option[Int] =
    if (config.contains(COMMIT_WITHIN_MILLI_SECS)) Some(config(COMMIT_WITHIN_MILLI_SECS).toInt) else None

  def batchSize: Option[Int] =
    if (config.contains(BATCH_SIZE)) Some(config(BATCH_SIZE).toInt) else None

  def batchSizeType: Option[String] = config.get(BATCH_SIZE_TYPE)

  def useCursorMarks: Option[Boolean] =
    if (config.contains(USE_CURSOR_MARKS)) Some(config(USE_CURSOR_MARKS).toBoolean) else None

  def genUniqKey: Option[Boolean] =
    if (config.contains(GENERATE_UNIQUE_KEY)) Some(config(GENERATE_UNIQUE_KEY).toBoolean) else None

  def genUniqChildKey: Option[Boolean] =
    if (config.contains(GENERATE_UNIQUE_CHILD_KEY)) Some(config(GENERATE_UNIQUE_CHILD_KEY).toBoolean) else None

  def sampleSeed: Option[Int] =
    if (config.contains(SAMPLE_SEED)) Some(config(SAMPLE_SEED).toInt) else None

  def samplePct: Option[Float] =
    if (config.contains(SAMPLE_PCT)) Some(config(SAMPLE_PCT).toFloat) else None

  def schema: Option[String] = config.get(SCHEMA)

  def getMaxShardsForSchemaSampling: Option[Int] = {
    if (config.contains(MAX_SHARDS_FOR_SCHEMA_SAMPLING)) Some(config(MAX_SHARDS_FOR_SCHEMA_SAMPLING).toInt) else None
  }

  def requestHandler: Option[String] = {

    if (!config.contains(REQUEST_HANDLER) && config.contains(SOLR_STREAMING_EXPR) && config.contains(SOLR_STREAMING_EXPR)) {
      // they didn't specify a request handler but gave us an expression, so we know the request handler should be /stream
      logger.debug(s"Set ${REQUEST_HANDLER} to ${QT_STREAM} because the ${SOLR_STREAMING_EXPR} option is set.")
      return Some(QT_STREAM)
    }

    if (!config.contains(REQUEST_HANDLER) && config.contains(SOLR_SQL_STMT) && config.contains(SOLR_SQL_STMT)) {
      // they didn't specify a request handler but gave us an expression, so we know the request handler should be /stream
      logger.debug(s"Set ${REQUEST_HANDLER} to ${QT_SQL} because the ${SOLR_SQL_STMT} option is set.")
      return Some(QT_SQL)
    }

    if (config.contains(REQUEST_HANDLER) && config.contains(REQUEST_HANDLER)) {
      return Some(config(REQUEST_HANDLER))
    }

    None
  }

  def getSolrFieldTypes: Option[String] = config.get(SOLR_FIELD_TYPES)

  def getArbitrarySolrParams: ModifiableSolrParams = {
    val solrParams = new ModifiableSolrParams()
    if (config.contains(ARBITRARY_PARAMS_STRING) && config.contains(ARBITRARY_PARAMS_STRING)) {
      val paramString = config(ARBITRARY_PARAMS_STRING)
      val params = paramString.split("&")
      for (param <- params) {
        val eqAt = param.indexOf('=')
        if (eqAt != -1) {
          val key = param.substring(0,eqAt)
          val value = param.substring(eqAt+1)
          solrParams.add(key, value)
        }
      }
    }
    solrParams
  }

  def getExtraOptions: Map[String, String] = {
    val extraParams = SolrRelation.checkUnknownParams(config.keySet)
    config.filter(c => extraParams.contains(c._1))
  }

  override def toString: String = {
    val sb = new StringBuilder
    sb ++= "SolrConf("
    sb ++= s"${SOLR_ZK_HOST_PARAM}=${getZkHost}"
    sb ++= s", ${SOLR_COLLECTION_PARAM}=${getCollection}"
    if (getQuery.isDefined) {
      sb ++= s", ${SOLR_QUERY_PARAM}=${getQuery.get}"
    }
    if (!getFields.isEmpty) {
      sb ++= s", ${SOLR_FIELD_PARAM}=${getFields.mkString(",")}"
    }
    if (getRows.isDefined) {
      sb ++= s", ${SOLR_ROWS_PARAM}=${getRows.get}"
    }
    if (maxRows.isDefined) {
      sb ++= s", ${MAX_ROWS}=${maxRows.get}"
    }
    if (splits.isDefined) {
      sb ++= s", ${SOLR_DO_SPLITS}=${splits.get}"
    }
    if (docValues.isDefined) {
      sb ++= s", ${SOLR_DOC_VALUES}=${docValues.get}"
    }
    if (getSplitField.isDefined) {
      sb ++= s", ${SOLR_SPLIT_FIELD_PARAM}=${getSplitField.get}"
    }
    if (getSplitsPerShard.isDefined) {
      sb ++= s", ${SOLR_SPLITS_PER_SHARD_PARAM}=${getSplitsPerShard.get}"
    }
    if (escapeFieldNames.isDefined) {
      sb ++= s", ${ESCAPE_FIELDNAMES_PARAM}=${escapeFieldNames.get}"
    }
    if (flattenMultivalued.isDefined) {
      sb ++= s", ${FLATTEN_MULTIVALUED}=${flattenMultivalued.get}"
    }
    if (requestHandler.isDefined) {
      sb ++= s", ${REQUEST_HANDLER}=${requestHandler.get}"
    }
    if (useCursorMarks.isDefined) {
      sb ++= s", ${USE_CURSOR_MARKS}=${useCursorMarks.get}"
    }
    if (sampleSeed.isDefined) {
      sb ++= s", ${SAMPLE_SEED}=${sampleSeed.get}"
    }
    if (samplePct.isDefined) {
      sb ++= s", ${SAMPLE_PCT}=${samplePct.get}"
    }
    if (getSort.isDefined) {
      sb ++= s", ${SORT_PARAM}=${getSort.get}"
    }
    if (getArbitrarySolrParams != null && getArbitrarySolrParams.size() > 0) {
      sb ++= s", ${ARBITRARY_PARAMS_STRING}=${getArbitrarySolrParams}"
    }
    if (getExcludeFields.isDefined) {
      sb ++= s", ${EXCLUDE_FIELDS}=${getExcludeFields.get}"
    }
    if (skipNonDocValueFields.isDefined) {
      sb ++= s", ${SKIP_NON_DOCVALUE_FIELDS}=${skipNonDocValueFields.get}"
    }
    if (getStreamingExpr.isDefined) {
      sb ++= s", ${SOLR_STREAMING_EXPR}=${getStreamingExpr.get}"
    }
    if (getStreamingExpressionSchema.isDefined) {
      sb ++= s", ${STREAMING_EXPR_SCHEMA}=${getStreamingExpressionSchema.get}"
    }
    if (getSqlStmt.isDefined) {
      sb ++= s", ${SOLR_SQL_STMT}=${getSqlStmt.get}"
    }
    if (getSolrSQLSchema.isDefined) {
      sb ++= s", ${SOLR_SQL_SCHEMA}=${getSolrSQLSchema.get}"
    }
    if (getMaxShardsForSchemaSampling.isDefined) {
      sb ++= s", ${MAX_SHARDS_FOR_SCHEMA_SAMPLING}=${getMaxShardsForSchemaSampling.get}"
    }

    sb ++= s", extraOptions=${getExtraOptions}"

    // time-based partitioning options
    if (partitionBy.isDefined) {
      sb ++= s", ${PARTITION_BY}=${partitionBy.get}"
    }
    if (getTimestampFieldName.isDefined) {
      sb ++= s", ${TIMESTAMP_FIELD_NAME}=${getTimestampFieldName.get}"
    }
    if (getTimePeriod.isDefined) {
      sb ++= s", ${TIME_PERIOD}=${getTimePeriod.get}"
    }
    if (getDateTimePattern.isDefined) {
      sb ++= s", ${DATETIME_PATTERN}=${getDateTimePattern.get}"
    }
    if (getTimeZoneId.isDefined) {
      sb ++= s", ${TIMEZONE_ID}=${getTimeZoneId.get}"
    }
    if (getMaxActivePartitions.isDefined) {
      sb ++= s", ${MAX_ACTIVE_PARTITIONS}=${getMaxActivePartitions.get}"
    }

    // indexing options
    if (genUniqKey.isDefined) {
      sb ++= s", ${GENERATE_UNIQUE_KEY}=${genUniqKey.get}"
    }
    if (softAutoCommitSecs.isDefined) {
      sb ++= s", ${SOFT_AUTO_COMMIT_SECS}=${softAutoCommitSecs.get}"
    }
    if (commitWithin.isDefined) {
      sb ++= s", ${COMMIT_WITHIN_MILLI_SECS}=${commitWithin.get}"
    }
    if (batchSize.isDefined) {
      sb ++= s", ${BATCH_SIZE}=${batchSize.get}"
    }
    if (batchSizeType.isDefined) {
      sb ++= s", ${BATCH_SIZE_TYPE}=${batchSizeType.get}"
    }
    if (getChildDocFieldName.isDefined) {
      sb ++= s", ${CHILD_DOC_FIELDNAME}=${getChildDocFieldName.get}"
    }
    if (getAccumulatorName.isDefined) {
      sb ++= s", ${ACCUMULATOR_NAME}=${getAccumulatorName.get}"
    }
    if (getSolrFieldTypes.isDefined) {
      sb ++= s", ${SOLR_FIELD_TYPES}=${getSolrFieldTypes.get}"
    }
    if (getCollectionAlias.isDefined) {
      sb ++= s", ${COLLECTION_ALIAS}=${getCollectionAlias.get}"
    }
    sb ++= ")"
    sb.toString
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy