All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.spark.sql.DefaultSource.scala Maven / Gradle / Ivy

package org.elasticsearch.spark.sql

import java.util.Arrays

import scala.collection.JavaConverters.mapAsJavaMapConverter
import scala.collection.mutable.LinkedHashMap

import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.BaseRelation
import org.apache.spark.sql.sources.PrunedScan
import org.apache.spark.sql.sources.RelationProvider
import org.elasticsearch.hadoop.cfg.ConfigurationOptions
import org.elasticsearch.hadoop.cfg.InternalConfigurationOptions
import org.elasticsearch.hadoop.util.StringUtils
import org.elasticsearch.spark.cfg.SparkSettingsManager

private[sql] class DefaultSource extends RelationProvider {
  override def createRelation(
      sqlContext: SQLContext,
      parameters: Map[String, String]): BaseRelation = {

    // . seems to be problematic when specifying the options
    val params = parameters.map { case (k, v) => (k.replace('_', '.'), v)}. map { case (k, v) =>
      if (k.startsWith("es.")) (k, v)
      else if (k == "path") ("es.resource", v)
      else ("es." + k, v) }

    params.getOrElse("es.resource", sys.error("resource must be specified for Elasticsearch resources."))
    ElasticsearchRelation(params)(sqlContext)
  }
}

private [sql] case class ElasticsearchRelation(parameters: Map[String, String])
  (@transient val sqlContext: SQLContext)
  extends PrunedScan {

  @transient lazy val cfg = {
    new SparkSettingsManager().load(sqlContext.sparkContext.getConf).merge(parameters.asJava)
  }

  @transient lazy val lazySchema = {
    SchemaUtils.discoverMapping(cfg)
  }

  override val schema = lazySchema.struct

  // TableScan
  def buildScan() = new ScalaEsRowRDD(sqlContext.sparkContext, parameters, lazySchema)

  // PrunedScan
  def buildScan(requiredColumns: Array[String]) = {
    val paramWithProjection = LinkedHashMap[String, String]() ++ parameters
    paramWithProjection += (InternalConfigurationOptions.INTERNAL_ES_TARGET_FIELDS -> 
                            StringUtils.concatenate(requiredColumns.asInstanceOf[Array[Object]], StringUtils.DEFAULT_DELIMITER))

    if (cfg.getReadMetadata) {
      val metadata = cfg.getReadMetadataField
      // if metadata is not selected, don't ask for it
      if (!requiredColumns.contains(metadata)) {
        paramWithProjection += (ConfigurationOptions.ES_READ_METADATA -> false.toString())
      }
    }

    new ScalaEsRowRDD(sqlContext.sparkContext, paramWithProjection, lazySchema)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy