com.lucidworks.spark.util.SolrDataFrameImplicits.scala Maven / Gradle / Ivy
package com.lucidworks.spark.util
import org.apache.spark.sql.{DataFrameReader, DataFrameWriter, Row, SaveMode}
/**
* Usage:
*
* import SolrDataFrameImplicits._
* // then you can:
* val spark: SparkSession
* val collectionName: String
* val df = spark.read.solr(collectionName)
* // do stuff
* df.write.solr(collectionName, overwrite = true)
* // or various other combinations, like setting your own options earlier
* df.write.option("zkhost", "some other solr cluster's zk host").solr(collectionName)
*/
object SolrDataFrameImplicits {
implicit class SolrReader(reader: DataFrameReader) {
def solr(collection: String, query: String = "*:*") =
reader.format("solr").option("collection", collection).option("query", query).load()
def solr(collection: String, options: Map[String, String]) =
reader.format("solr").option("collection", collection).options(options).load()
}
implicit class SolrWriter(writer: DataFrameWriter[Row]) {
def solr(collectionName: String, softCommitSecs: Int = 10, overwrite: Boolean = false, format: String = "solr") = {
writer
.format(format)
.option("collection", collectionName)
.option("soft_commit_secs", softCommitSecs.toString)
.mode(if(overwrite) SaveMode.Overwrite else SaveMode.Append)
.save()
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy