All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sansa_stack.owl.spark.package.scala Maven / Gradle / Ivy

package net.sansa_stack.owl.spark

import net.sansa_stack.owl.spark.rdd._
import net.sansa_stack.owl.spark.writers._
import org.apache.hadoop.fs.Path
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
import org.semanticweb.owlapi.formats._
import org.semanticweb.owlapi.model.{AxiomType, OWLAxiom, OWLDocumentFormat}

class UnknownOWLFormatException(msg: String) extends Exception

/**
 * Wrap up implicit classes/methods to read OWL data into either
 * [[org.apache.spark.sql.Dataset]] or [[org.apache.spark.rdd.RDD]].
 *
 * @author Gezim Sejdiu
 * @author Patrick Westphal
 */
package object owl {

  /**
    * SaveMode is used to specify the expected behavior of saving an OWL dataset/RDD to a path.
    *
    * FIXME: This code was copied from SANSA-RDF and should go into a dedicated SANSA commons package
    */
  object SaveMode extends Enumeration {
    type SaveMode = Value
    val

    /**
      * Overwrite mode means that when saving an OWL dataset/RDD to a path,
      * if path already exists, the existing data is expected to be
      * overwritten by the contents of the RDF dataset.
      */
    Overwrite,

    /**
      * ErrorIfExists mode means that when saving an OWL dataset/RDD to a
      * path, if path already exists, an exception is expected to be thrown.
      */
    ErrorIfExists,

    /**
      * Ignore mode means that when saving an OWL dataset/RDD to a path, if
      * path already exists, the save operation is expected to not save the
      * contents of the RDF dataset and to not change the existing data.
      */
    Ignore = Value
  }

  object Syntax extends Enumeration {
    val FUNCTIONAL, MANCHESTER, OWLXML = Value
  }

  /**
   * Adds methods, `owl(syntax: Syntax)`, `functional` and `manchester`, to
   * [[org.apache.spark.sql.SparkSession]] that allows to read owl files.
   */
  implicit class OWLAxiomReader(spark: SparkSession) {

    /**
     * Load RDF data into a [[org.apache.spark.rdd.RDD]][OWLAxiom]. Currently,
     * only functional, manchester and OWL/XML syntax are supported
     * @param syntax of the OWL (functional, manchester or OWL/XML)
     * @return a [[net.sansa_stack.owl.spark.rdd.OWLAxiomsRDD]]
     */
    def owl(syntax: Syntax.Value): String => OWLAxiomsRDD = syntax match {
      case Syntax.FUNCTIONAL => functional
      case Syntax.MANCHESTER => manchester
      case Syntax.OWLXML => owlXml
      case _ => throw new IllegalArgumentException(
        s"$syntax syntax not supported yet!")
    }

    /**
     * Load OWL data in Functional syntax into an
     * [[org.apache.spark.rdd.RDD]][OWLAxiom].
     * @return the [[net.sansa_stack.owl.spark.rdd.OWLAxiomsRDD]]
     */
    def functional: String => OWLAxiomsRDD = path => {
      FunctionalSyntaxOWLAxiomsRDDBuilder.build(spark, path)
    }

    /**
     * Load OWL data in Manchester syntax into an
     * [[org.apache.spark.rdd.RDD]][OWLAxiom].
     * @return the [[net.sansa_stack.owl.spark.rdd.OWLAxiomsRDD]]
     */
    def manchester: String => OWLAxiomsRDD = path => {
      ManchesterSyntaxOWLAxiomsRDDBuilder.build(spark, path)
    }

    /**
      * Load OWL data in OWL/XML syntax into an
      * [[org.apache.spark.rdd.RDD]][OWLAxiom].
      * @return the [[net.sansa_stack.owl.spark.rdd.OWLAxiomsRDD]]
      */
    def owlXml: String => OWLAxiomsRDD = path => {
      OWLXMLSyntaxOWLAxiomsRDDBuilder.build(spark, path)
    }

  }

  implicit class OWLExpressionsRDDReader(spark: SparkSession) {
    /**
      * Load RDF data into an [[org.apache.spark.rdd.RDD]][String]. Currently,
      * only functional, manchester and OWL/XML syntax are supported
      * @param syntax of the OWL (functional or manchester or OWL/XML)
      * @return a [[net.sansa_stack.owl.spark.rdd.OWLExpressionsRDD]]
      */
     def owlExpressions(syntax: Syntax.Value): String => OWLExpressionsRDD = syntax match {
       case Syntax.FUNCTIONAL => functional
       case Syntax.MANCHESTER => manchester
       case Syntax.OWLXML => owlXml
       case _ => throw new IllegalArgumentException(
         s"$syntax syntax not supported yet!")
     }

     /**
      * Load OWL data in Functional syntax into an
      * [[org.apache.spark.rdd.RDD]][String].
      * @return the [[net.sansa_stack.owl.spark.rdd.OWLExpressionsRDD]]
      */
     def functional: String => OWLExpressionsRDD = path => {
       FunctionalSyntaxOWLExpressionsRDDBuilder.build(spark, path)
     }

     /**
      * Load OWL data in Manchester syntax into an
      * [[org.apache.spark.rdd.RDD]][String].
      * @return the [[net.sansa_stack.owl.spark.rdd.OWLExpressionsRDD]]
      */
     def manchester: String => OWLExpressionsRDD = path => {
       ManchesterSyntaxOWLExpressionsRDDBuilder.build(spark, path)
     }

     /**
       * Load OWL data in OWLXML syntax into an
       * [[org.apache.spark.rdd.RDD]][String].
       * @return the [[net.sansa_stack.owl.spark.rdd.OWLExpressionsRDD]]
       */
     def owlXml: String => OWLExpressionsRDD = path => {
       OWLXMLSyntaxOWLExpressionsRDDBuilder.build(spark, path)._3
     }
  }

  implicit class OWLWriter(axioms: OWLAxiomsRDD) {
    def save(
              path: String,
              format: OWLDocumentFormat,
              saveMode: SaveMode.Value = SaveMode.ErrorIfExists,
              exitOnError: Boolean = false): Unit = {

      val fsPath = new Path(path)
      val fs = fsPath.getFileSystem(axioms.sparkContext.hadoopConfiguration)

      val doSave = if (fs.exists(fsPath)) {
        saveMode match {
          case SaveMode.Overwrite =>
            fs.delete(fsPath, true)
            true
          case SaveMode.ErrorIfExists =>
            sys.error(s"Given path $path already exists!")
            if (exitOnError) sys.exit(1)
            false
          case SaveMode.Ignore => false
          case _ =>
            throw new IllegalStateException(s"Unsupported save mode $saveMode")
        }
      } else {
        true
      }

      if (doSave) {
        format match {
          case _: FunctionalSyntaxDocumentFormat => OWLFunctionalSyntaxWriter.save(path, axioms)
          case format: LabelFunctionalDocumentFormat =>
            throw new NotImplementedError(s"Support for ${format.getClass.getName} not implemented, yet")
          case _: KRSS2DocumentFormat => KRSS2SyntaxWriter.save(path, axioms)
          case _: KRSSDocumentFormat => KRSSSyntaxWriter.save(path, axioms)
          case _: DLSyntaxDocumentFormat => DLSyntaxWriter.save(path, axioms)
          case _: OBODocumentFormat => OBOWriter.save(path, axioms)
          case _: RDFJsonDocumentFormat => RDFJsonWriter.save(path, axioms)
          case _: RDFJsonLDDocumentFormat => JsonLDWriter.save(path, axioms)
          case _: NQuadsDocumentFormat => NQuadsWriter.save(path, axioms)
          case _: NTriplesDocumentFormat => NTriplesWriter.save(path, axioms)
          case _: ManchesterSyntaxDocumentFormat => ManchesterOWLSyntaxWriter.save(path, axioms)
          case _: RDFXMLDocumentFormat => RDFXMLWriter.save(path, axioms)
          case _: TurtleDocumentFormat => TurtleWriter.save(path, axioms)
          case _: N3DocumentFormat => N3Writer.save(path, axioms)
          case _: TrixDocumentFormat => TrixWriter.save(path, axioms)
          case _: OWLXMLDocumentFormat => OWLXMLWriter.save(path, axioms)
          case _ =>
            throw new UnknownOWLFormatException(
              s"OWL serialization format ${format.toString} unknown or not supported")
        }
      }
    }
  }
}

package object owlAxioms {
  def extractAxioms(axioms: RDD[OWLAxiom], T: AxiomType[_]): RDD[OWLAxiom] =
    axioms.filter(a => a.getAxiomType.equals(T))
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy