All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.arangodb.commons.ArangoUtils.scala Maven / Gradle / Ivy

package org.apache.spark.sql.arangodb.commons

import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{Encoders, SparkSession}

/**
 * @author Michele Rastelli
 */
object ArangoUtils {

  def inferSchema(options: ArangoDBConf): StructType = {
    val client = ArangoClient(options)
    val sampleEntries = options.readOptions.readMode match {
      case ReadMode.Query => client.readQuerySample()
      case ReadMode.Collection => client.readCollectionSample()
    }
    client.shutdown()

    val spark = SparkSession.getActiveSession.get
    val schema = spark
      .read
      .json(spark.createDataset(sampleEntries)(Encoders.STRING))
      .schema

    if (options.readOptions.columnNameOfCorruptRecord.isEmpty) {
      schema
    } else {
      schema.add(StructField(options.readOptions.columnNameOfCorruptRecord, StringType, nullable = true))
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy