All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sansa_stack.rdf.spark.partition.semantic.SemanticRdfPartitionUtilsSpark.scala Maven / Gradle / Ivy

package net.sansa_stack.rdf.spark.partition.semantic

import net.sansa_stack.rdf.common.partition.utils.Symbols
import org.apache.jena.graph.Triple
import org.apache.spark.rdd._

/**
 * Semantic partition of and RDF graph
 */
object SemanticRdfPartitionUtilsSpark extends Serializable {

  /**
   * Apply semantic partitioning for a given RDF graph
   * @param triples an RDD of triples.
   * @return semantic partition data.
   */
  def partitionGraph(triples: RDD[Triple]): RDD[String] = {
    val symbol = Symbols.symbol
    // partition the data
    val partitionedData = triples
      .distinct
      .filter(_.getSubject.getURI.nonEmpty) // ignore SUBJECT with empty URI
      .map(triple => {
        var filteredPredicate: Any = triple.getPredicate
        var filteredObject: Any = triple.getObject

        // set: PREDICATE
        if (triple.getPredicate.isURI && triple.getPredicate.getURI.contains(symbol("hash"))) {
          filteredPredicate = triple.getPredicate.getURI.split(symbol("hash"))(1)

          // set: OBJECT where PREDICATE is a "type"
          if (filteredPredicate.equals("type") && triple.getObject.isURI && triple.getObject.getURI.contains(symbol("hash"))) {
            filteredObject = symbol("colon") + triple.getObject.getURI.split(symbol("hash"))(1)
          } else if (triple.getObject.isURI) {
            filteredObject = symbol("less-than") + triple.getObject + symbol("greater-than")
          } else {
            filteredObject = triple.getObject
          }

          // add colon at the start
          filteredPredicate = symbol("colon") + filteredPredicate
        } else {
          // PREDICATE
          if (triple.getPredicate.isURI) {
            filteredPredicate = symbol("less-than") + triple.getPredicate + symbol("greater-than")
          }

          // OBJECT
          if (triple.getObject.isURI) {
            filteredObject = symbol("less-than") + triple.getObject + symbol("greater-than")
          }
        }

        // (K,V) pair
        (
          symbol("less-than") + triple.getSubject + symbol("greater-than"),
          filteredPredicate + symbol("space") + filteredObject + symbol("space"))
      })
      .reduceByKey(_ + _) // group based on key
      // .sortBy(x => x._1) // sort by key
      // .map(_.swap).sortByKey(false)
      .map(x => x._1 + symbol("space") + x._2) // output format

    partitionedData
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy