All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.spark.connector.util.ConfigCheck.scala Maven / Gradle / Ivy

The newest version!
package com.datastax.spark.connector.util

import org.apache.commons.lang3.StringUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.cassandra.CassandraSourceRelation
import com.datastax.spark.connector.cql.{AuthConfFactory, CassandraConnectionFactory, CassandraConnectorConf}
import com.datastax.spark.connector.rdd.ReadConf
import com.datastax.spark.connector.types.ColumnTypeConf
import com.datastax.spark.connector.writer.WriteConf

import scala.collection.mutable

/**
  * Helper class to throw exceptions if there are environment variables in the spark.cassandra
  * namespace which don't map to Spark Cassandra Connector known properties.
  */
object ConfigCheck {

  val MatchThreshold = 0.85
  val Prefix = "spark.cassandra."



  /** Set of valid static properties hardcoded in the connector.
    * Custom CassandraConnectionFactory and AuthConf properties are not listed here. */
  val validStaticProperties: mutable.Set[ConfigParameter[_]] = ConfigParameter.staticParameters

  val validStaticPropertyNames = validStaticProperties.map(_.name)

  val deprecatedProperties: mutable.Set[DeprecatedConfigParameter[_]] = DeprecatedConfigParameter.deprecatedParameters

 /**
    * Checks the SparkConf Object for any unknown spark.cassandra.* properties and throws an exception
    * with suggestions if an unknown property is found.
    *
    * @param conf SparkConf object to check
    */
  def checkConfig(conf: SparkConf): Unit = {
    for (deprecatedProperty <- deprecatedProperties) {
      deprecatedProperty.maybeReplace(conf)
    }

    val connectionFactory = CassandraConnectionFactory.fromSparkConf(conf)
    val authConfFactory = AuthConfFactory.fromSparkConf(conf)
    val extraProps = connectionFactory.properties ++ authConfFactory.properties

    /** We no longer support snake_case properties, so we are going to fail
      * all the tests if a snake case parameter is added back.
      */
    val invalidProperties =
      (validStaticPropertyNames ++ extraProps)
        .filter( property => property.contains("_"))

    if (invalidProperties.nonEmpty) throw new IllegalArgumentException(
      s"Developer Error: These properties will not work in SparkSql/DataSets " +
        s"because of capitals: ${invalidProperties.mkString(",")}")

    val unknownProps = unknownProperties(conf, extraProps)
    if (unknownProps.nonEmpty) {
      val suggestions =
        for (u <- unknownProps) yield (u, suggestedProperties(u))
      throw new ConnectorConfigurationException(unknownProps, suggestions.toMap)
    }
  }

  def unknownProperties(conf: SparkConf, extraProps: Set[String] = Set.empty): Seq[String] = {
    val validProps = validStaticPropertyNames ++ extraProps
    val scEnv = for ((key, value) <- conf.getAll if key.startsWith(Prefix)) yield key
    for (key <- scEnv if !validProps.contains(key)) yield key
  }

  /**
    * For a given unknown property determine if we have any guesses as
    * to what the property should be. Suggestions are found by
    * breaking the unknown property into fragments.
    * spark.cassandra.foo.bar => [foo,bar]
    * Any known property which has some fragments which fuzzy match
    * all of the fragments of the unknown property are considered a
    * suggestion.
    *
    * Fuzziness is determined by MatchThreshold
    */
  def suggestedProperties(unknownProp: String, extraProps: Set[String] = Set.empty): Seq[String] = {
    val validProps = validStaticPropertyNames ++ extraProps
    val unknownFragments = unknownProp.stripPrefix(Prefix).split("\\.")
    validProps.toSeq.filter { knownProp =>
      val knownFragments = knownProp.stripPrefix(Prefix).split("\\.")
      unknownFragments.forall { unknown =>
        knownFragments.exists { known =>
          val matchScore = StringUtils.getJaroWinklerDistance(unknown, known)
          matchScore >= MatchThreshold
        }
      }
    }
  }

  class ConnectorConfigurationException(message: String) extends Exception(message) {
    /**
      * Exception to be thrown when unknown properties are found in the SparkConf
      *
      * @param unknownProps Properties that have no mapping to known Spark Cassandra Connector properties
      * @param suggestionMap A map possibly containing suggestions for each of of the unknown properties
      */
    def this(unknownProps: Seq[String], suggestionMap: Map[String, Seq[String]]) = this({
      val intro =
        "Invalid Config Variables\n" +
          "Only known spark.cassandra.* variables are allowed when using the Spark Cassandra Connector.\n"
      val body = unknownProps.map { unknown =>
        suggestionMap.get(unknown) match {
          case Some(Seq()) | None =>
            s"""$unknown is not a valid Spark Cassandra Connector variable.
               |No likely matches found.""".stripMargin
          case Some(suggestions) =>
            s"""$unknown is not a valid Spark Cassandra Connector variable.
               |Possible matches:
               |${suggestions.mkString("\n")}""".stripMargin
        }
      }.mkString("\n")
      intro + body
    })
  }

}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy