All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fix.MultiLineDatasetReadWarn.scala Maven / Gradle / Ivy

package fix

import scalafix.v1._
import scala.meta._

case class MultiLineDatasetReadWarning(tn: scala.meta.Tree) extends Diagnostic {
  override def position: Position = tn.pos

  override def message: String =
    """In Spark 2.4.X and below,
      |when reading multi-line textual input with \r\n (windows line feed) _might_
      |leave \rs. You can get this legacy behaviour by specifying a lineSep of "\n",
      |but for most people this was bug.
      |This linter rule is fuzzy.""".stripMargin
}

class MultiLineDatasetReadWarn extends SemanticRule("MultiLineDatasetReadWarn") {
  val matcher = SymbolMatcher.normalized("org.apache.spark.sql.DataFrameReader#option")
  override val description = "MultiLine text input dataframe warning."

  override def fix(implicit doc: SemanticDocument): Patch = {
    // Imperfect, maybe someone will have the string "multiline" while reading from a DataFrame but it's an ok place to start.
    if (doc.input.text.contains("'multiline'") || doc.input.text.contains("\"multiline\"")) {
      doc.tree.collect {
        case matcher(read) =>
          if (read.toString.contains("multiline")) {
            Patch.lint(MultiLineDatasetReadWarning(read))
          } else {
            None.asPatch
          }
      }.asPatch
    } else {
      Patch.empty
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy