All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dbpedia.extraction.destinations.PublisherDiffDestination.scala Maven / Gradle / Ivy

The newest version!
package org.dbpedia.extraction.destinations

import org.dbpedia.extraction.live.core.LiveOptions

import scala.collection.mutable
import org.dbpedia.extraction.live.main.Main
import org.dbpedia.extraction.live.publisher.DiffData

import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer

/**
 * This class publishes the triples to files (added / deleted)
 * when cleanUpdate is true
 *    - added contains 'added+unmodified'
 *    - deleted is empty
 *    - subjects contains all the distinct subjects that are to be deleted with a graph pattern " ?p ?o"
 */
class PublisherDiffDestination(val pageID: Long, val cleanUpdate: Boolean, val subjects: java.util.Set[String]) extends LiveDestination {

  var added = new ArrayBuffer[Quad]()
  var deleted = new ArrayBuffer[Quad]()
  var unmodified = new ArrayBuffer[Quad]()


  def open() { }

  def write(extractor: String, hash: String, graphAdd: Seq[Quad], graphRemove: Seq[Quad], graphUnmodified: Seq[Quad]) {
    for (quad <- graphAdd)
      added += quad

    for (quad <- graphRemove)
      deleted += quad

    for (quad <- graphUnmodified)
      unmodified += quad
  }

  def close() {

    val toAdd = new java.util.HashSet[Quad](added)
    val toDelete = new java.util.HashSet[Quad](deleted)
    val toReInsert = if (! cleanUpdate) new java.util.HashSet[Quad]() else new java.util.HashSet[Quad](unmodified)

    var resourceToClear = new java.util.HashSet[Quad]()
    if (cleanUpdate) {

      // We also create a list of resources to delete completely with "<...> ?p ?o"
      var subjectURIs = new mutable.HashSet[String]()
      for (quad <- added)
        subjectURIs.add(quad.subject);
      for (quad <- deleted)
        subjectURIs.add(quad.subject);
      for (quad <- unmodified)
        subjectURIs.add(quad.subject);

      for (uri: String <- subjectURIs) {
        if (!uri.contains("dbpedia.org/property") && uri.startsWith("http")) { // skip global property definitions or non-http uris
          resourceToClear.add(new Quad(LiveOptions.language, "", uri, "http://dbpedia.org/delete", " ?p ?o ", "", "http://www.w3.org/2001/XMLSchema#string"))
        }
      }
    }

    Main.publishingDataQueue.put(new DiffData(pageID, toAdd, toDelete, toReInsert, resourceToClear))
  }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy