All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.databricks.spark.xml.util.ValidatorUtil.scala Maven / Gradle / Ivy

/*
 * Copyright 2019 Databricks
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.databricks.spark.xml.util

import java.nio.file.Paths
import javax.xml.validation.{Schema, SchemaFactory}
import javax.xml.XMLConstants

import com.google.common.cache.{CacheBuilder, CacheLoader}

import org.apache.spark.SparkFiles

/**
 * Utilities for working with XSD validation.
 */
private[xml] object ValidatorUtil {

  // Parsing XSDs may be slow, so cache them by path:

  private val cache = CacheBuilder.newBuilder().softValues().build(
    new CacheLoader[String, Schema] {
      override def load(key: String): Schema = {
        // Handle case where file exists as specified
        var path = Paths.get(key)
        if (!path.toFile.exists()) {
          // Handle case where it was added with sc.addFile
          path = Paths.get(SparkFiles.get(key))
        }
        val schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
        schemaFactory.newSchema(path.toFile)
      }
    })

  /**
   * Parses the XSD at the given local path and caches it.
   *
   * @param path path to XSD
   * @return Schema for the file at that path
   */
  def getSchema(path: String): Schema = cache.get(path)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy