com.smartlogic.rdfdiff.RDFDifferenceBuilder Maven / Gradle / Ivy
package com.smartlogic.rdfdiff;
import org.apache.jena.ext.com.google.common.base.Preconditions;
import org.apache.jena.ext.com.google.common.collect.*;
import org.apache.jena.rdf.model.*;
import org.apache.jena.vocabulary.SKOSXL;
import java.util.*;
/**
* Builds a difference between two models and returns a structure with differences.
*/
public class RDFDifferenceBuilder {
/**
* Build and return an RDFDifference between two models. All statements with bnodes are ignored.
* @param modelLeft
* @param modelRight
* @return
*/
public static RDFDifference buildDifference(Model modelLeft, Model modelRight) {
Model inLeftOnlyModel = ModelFactory.createDefaultModel();
Model inRightOnlyModel = ModelFactory.createDefaultModel();
{
StmtIterator it = modelLeft.listStatements();
while (it.hasNext()) {
Statement stmt = it.nextStatement();
if (!stmt.getSubject().isAnon() && !stmt.getObject().isAnon() && !modelRight.contains(stmt)) {
inLeftOnlyModel.add(stmt);
}
}
}
{
StmtIterator it = modelRight.listStatements();
while (it.hasNext()) {
Statement stmt = it.nextStatement();
if (!stmt.getSubject().isAnon() && !stmt.getObject().isAnon() && !modelLeft.contains(stmt)) {
inRightOnlyModel.add(stmt);
}
}
}
/*
modelLeft.listStatements().toList()
.stream()
.filter(stmt -> !stmt.getObject().isAnon() && !stmt.getSubject().isAnon() && !modelRight.contains(stmt))
.forEach(stmt -> inLeftOnlyModel.add(stmt));
modelRight.listStatements().toList()
.stream()
.filter(stmt -> !stmt.getObject().isAnon() && !stmt.getSubject().isAnon() && !modelLeft.contains(stmt))
.forEach(stmt -> inRightOnlyModel.add(stmt));
*/
return new RDFDifference(modelLeft, modelRight, inLeftOnlyModel, inRightOnlyModel);
}
/**
* Organizes the differences by subject URI and bundles them together into batches.
* Optionally "chase" secondary subjects for given Properties and add them to the primary subject batch.
* (to group prefLabel and altLabel changes for concepts into the same batch)
* @param diff - the diff to batch by subject
* @param chaseIncludeSubjectProperties - properties to traverse to include additional secondary subjects (i.e. prefLabels)
*/
public static Collection buildSubjectBatches(RDFDifference diff, List chaseIncludeSubjectProperties) {
Preconditions.checkArgument(diff != null);
if (null == chaseIncludeSubjectProperties) {
chaseIncludeSubjectProperties = Lists.newArrayList();
}
Map resourceToModel = Maps.newHashMap();
Set subjectResources = Sets.newHashSet();
Multimap extraIncludeResources = HashMultimap.create();
/*
* Build the set of unique Resource objects in this diff.
*/
{
ResIterator it = diff.inLeftOnly.listSubjects();
while (it.hasNext()) {
Resource res = it.nextResource();
if (!subjectResources.contains(res)) {
subjectResources.add(res);
}
}
}
{
ResIterator it = diff.inRightOnly.listSubjects();
while (it.hasNext()) {
Resource res = it.nextResource();
if (!subjectResources.contains(res)) {
subjectResources.add(res);
}
}
}
/*
* Iterate the set of subjects, and sort the diff into the right subject-based diff.
*/
Iterator it = subjectResources.iterator();
while (it.hasNext()) {
Resource r = it.next();
if (!resourceToModel.containsKey(r)) {
RDFSubjectDifference sd = new RDFSubjectDifference(r, diff.leftModel, diff.rightModel);
resourceToModel.put(r, sd);
}
RDFSubjectDifference sd = resourceToModel.get(r);
StmtIterator leftStmtIt = diff.inLeftOnly.listStatements(r, (Property) null, (RDFNode) null);
while (leftStmtIt.hasNext()) {
Statement stmt = leftStmtIt.nextStatement();
sd.inLeftOnly.add(stmt);
if (chaseIncludeSubjectProperties.contains(stmt.getPredicate()) &&
!extraIncludeResources.containsEntry(r, stmt.getObject())) {
extraIncludeResources.put(r, (Resource)stmt.getObject());
}
}
StmtIterator rightStmtIt = diff.inRightOnly.listStatements(r, (Property) null, (RDFNode) null);
while (rightStmtIt.hasNext()) {
Statement stmt = rightStmtIt.nextStatement();
sd.inRightOnly.add(stmt);
if (chaseIncludeSubjectProperties.contains(stmt.getPredicate()) &&
!extraIncludeResources.containsEntry(r, stmt.getObject())) {
extraIncludeResources.put(r, (Resource)stmt.getObject());
}
}
}
/*
* If we have extra subjects to include, sort and add those to the corresponding batch
*/
if (extraIncludeResources.size() > 0) {
/* Iterate the Multimap that has original subject mapped to secondary subjects set.
* We then iterate those secondary subjects and add to primary batch.
*/
for (Resource primarySubject : extraIncludeResources.keySet()) {
RDFSubjectDifference sd = resourceToModel.get(primarySubject);
for (Resource secondarySubject : extraIncludeResources.get(primarySubject)) {
StmtIterator leftStmtIt = diff.inLeftOnly.listStatements(secondarySubject, (Property) null, (RDFNode) null);
while (leftStmtIt.hasNext()) {
Statement stmt = leftStmtIt.nextStatement();
sd.inLeftOnly.add(stmt);
}
StmtIterator rightStmtIt = diff.inRightOnly.listStatements(secondarySubject, (Property) null, (RDFNode) null);
while (rightStmtIt.hasNext()) {
Statement stmt = rightStmtIt.nextStatement();
sd.inRightOnly.add(stmt);
}
/* remove the batch entry for the secondary subject. now promoted to primary subject */
resourceToModel.remove(secondarySubject);
}
}
}
return resourceToModel.values();
}
/**
* Returns the default list of properties to chase and include subjects
* when batching by primary subject. (altLabel, prefLabel)
* @return
*/
public static List getDefaultChaseIncludeProps() {
List props = Lists.newArrayList();
props.add(SKOSXL.prefLabel);
props.add(SKOSXL.altLabel);
return props;
}
}