com.bigdata.rdf.inf.Justification Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.rdf.inf;
import java.util.Arrays;
import java.util.UUID;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.IVariableOrConstant;
import com.bigdata.btree.BTree;
import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.raba.codec.EmptyRabaValueCoder;
import com.bigdata.journal.TemporaryRawStore;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVUtility;
import com.bigdata.rdf.model.StatementEnum;
import com.bigdata.rdf.rules.InferenceEngine;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.JustificationTupleSerializer;
import com.bigdata.rdf.spo.SPO;
import com.bigdata.rdf.spo.SPOKeyOrder;
import com.bigdata.rdf.spo.SPOTupleSerializer;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.IRawTripleStore;
import com.bigdata.rdf.store.TempTripleStore;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.eval.ISolution;
import com.bigdata.striterator.IChunkedOrderedIterator;
/**
*
* A justification for a {@link StatementEnum#Inferred} statement. The head is
* the entailed statement. The tail of the justification is one or more triple
* pattern(s). Consider rdf1
*
*
*
* (?u ?a ?y) -> (?a rdf:type rdf:Property)
*
*
*
* Then the triple pattern for the tail is:
*
*
*
* (0 ?a 0)
*
*
*
* where 0 reprents a {@link IRawTripleStore#NULL} term identifier.
*
*
* So a justification chain for rdf1
would be:
*
*
*
* head := [?a rdf:type rdf:Property]
*
* tail := [0 ?a 0]
*
*
*
* In fact, the total bindings for the rule are represented as a long[] with the
* head occupying the 1st N positions in that array and the bindings for the
* tail appearing thereafter in the declared order of the predicates in the
* tail.
*
*
*
* When a {@link StatementEnum#Explicit} statement is to be retracted from the
* database we need to determined whether or not there exists a grounded
* justification for that statement (same head). For each justification for that
* statement we consider the tail. If there exists either an explicit statement
* that satisifies the triple pattern for the tail or if there exists an
* inference that satisifies the triple pattern for the tail and the inference
* can be proven to be grounded by recursive examination of its justifications,
* then the head is still valid and is converted from an explicit statement into
* an inference.
*
*
* This looks more or less like: Find all statements matching the
* pattern. If any are explicit, then that part of the tail is grounded. If none
* are explicit, then chase the justification recursively. Only retract a
* justification when it can no longer be grounded.
*
*
*
* The concept of grounded vs ungrounded justifications is described in Inferencing and
* Truth Maintenance in RDF Schema : Exploring a naive practical approach
* by Jeen Broekstra and Arjohn Kampman.
*
*
* @todo the tails could be represented more efficiently if we only stored the
* variable bindings and not all values in each tail. however, we might
* then need the rule on hand in order to decode the tail(s) and
* substitute in the missing constants.
*
* @author Bryan Thompson
* @version $Id$
*/
public class Justification implements Comparable {
protected static transient final Logger log = Logger.getLogger(Justification.class);
/**
* True iff the {@link #log} level is INFO or less.
*/
final static public boolean INFO = log.getEffectiveLevel().toInt() <= Level.INFO
.toInt();
/**
* True iff the {@link #log} level is DEBUG or less.
*/
final static public boolean DEBUG = log.getEffectiveLevel().toInt() <= Level.DEBUG
.toInt();
/**
* The #of term identifiers in a statement.
*/
// private static final transient int N = IRawTripleStore.N;
private final transient int N;
/**
* From the ctor, but not persisted.
*/
public final transient IRule rule;
/**
* Term identifiers for the head and bindings.
*
* Divide the length by the #of terms in a statement #N and subtract one to
* find the #of bindings for the tail. The first N entries are the head. The
* rest are the tail.
*
* Note: A term identifier MAY be {@link IRawTripleStore#NULL} to indicate a
* wildcard.
*/
final IV[] ivs;
// /**
// * Construct an entailment for an {@link StatementEnum#Inferred} statement.
// *
// * @param rule
// * The rule that licensed the entailment (this is only used for
// * debugging).
// * @param head
// * The entailment licensed by the rule and the bindings.
// * @param bindings
// * The bindings for that rule that licensed the entailment.
// *
// * @todo no longer used?
// */
// public Justification(Rule rule, SPO head, SPO[] bindings) {
//
//// assert rule != null;
// assert head != null;
// assert bindings != null;
//
// this.rule = rule;
//
// ids = new long[ (1 + bindings.length ) * N];
//
// int i = 0;
//
// ids[i++] = head.s;
// ids[i++] = head.p;
// ids[i++] = head.o;
//
// for( SPO spo : bindings ) {
//
// ids[i++] = spo.s;
// ids[i++] = spo.p;
// ids[i++] = spo.o;
//
// }
//
// }
/**
* Returns the head as an {@link SPO}.
*
* Note: The {@link StatementEnum} associated with the head is actually
* unknown, but it is marked as {@link StatementEnum#Inferred} in the
* returned object. In order to discover the {@link StatementEnum} for the
* head you MUST either already know it (this is not uncommon) or you MUST
* read one of the statement indices.
*
* @return
*/
public SPO getHead() {
return new SPO(ivs[0], ivs[1], ivs[2], StatementEnum.Inferred);
}
/**
* Returns the tail as an {@link SPO}[].
*
* Note: The {@link StatementEnum} associated triple patterns in the tail is
* actually unknown, but it is marked as {@link StatementEnum#Inferred} in
* the returned object. In fact, since the tail consists of triple patterns
* and not necessarily fully bound triples, the concept of a
* {@link StatementEnum} is not even defined.
*
* @return
*/
public SPO[] getTail() {
// #of triple patterns in the tail.
final int m = (ivs.length / N) - 1;
SPO[] tail = new SPO[m];
// for each triple pattern in the tail.
int j = N;
for(int i=0; i
* Modify the new rule execution layer to assign variables an integer index
* in [0:nvars] for each rule and then we can do positional decoding of the
* binding set and loose the requirement for the rule when generating
* justifications.
*
* This might not be that important for scale-out since the solutions are, I
* believe, processed solely in a local buffer for Insert and Delete and
* only serialized for Query.
*/
public Justification(ISolution solution) {
// the rule that licensed the entailment.
final IRule rule = solution.getRule();
this.N = rule.getHead().arity();
// the entailed statement.
final SPO head = (SPO) solution.get();
// the binding set for the solution.
final IBindingSet bindingSet = solution.getBindingSet();
// final long[] bindings = new long[(rule.getTailCount() + 1) * N];
assert rule != null;
assert head != null;
assert head.isFullyBound();
assert bindingSet != null;
// // verify enough bindings for one or more triple patterns.
// assert bindings.length % N == 0 : "bindings.length=" + bindings.length;
// assert bindings.length / N >= 1 : "bindings.length=" + bindings.length;
this.rule = rule; // Note: transient field.
// #of triple patterns in the tail.
final int tailCount = rule.getTailCount();
// allocate enough for the head and the tail.
ivs = new IV[(1 + tailCount) * N];
int j = 0;
ivs[j++] = head.s;
ivs[j++] = head.p;
ivs[j++] = head.o;
/*
* Note: Some of variables in the tail(s) are left unbound by some of
* the rules, e.g., rdfs1. This is because any binding for those
* variables is valid. The justifications index treats these unbound
* variables as wildcards and represents them with 0L in the key. (The
* computed entailment is ALWAYS fully bound, it is just that some of
* the variables in the tails might be unbound).
*/
for (int tailIndex = 0; tailIndex < tailCount; tailIndex++) {
final IPredicate predicate = rule.getTail(tailIndex);
for(int i=0; i t = predicate.get(i);
final IV id;
if (t.isVar()) {
final IConstant c = bindingSet.get((IVariable)t);
if(c == null) {
id = null;
} else {
id = c.get();
}
} else {
id = t.get();
}
ivs[j++] = id;
}
}
}
// /**
// * Deserialize a justification from an index entry.
// *
// * @param itr
// * The iterator visiting the index entries.
// *
// * @todo use {@link ITupleSerializer} to deserialize
// */
// public Justification(ITupleIterator itr) {
//
// final ITuple tuple = itr.next();
//
// final ByteArrayBuffer kbuf = tuple.getKeyBuffer();
//
// final int keyLen = kbuf.limit();
//
// final byte[] data = kbuf.array();
//
// this.rule = null; // Not persisted.
//
// // verify key is even multiple of (N*sizeof(long)).
// assert keyLen % (N * Bytes.SIZEOF_LONG) == 0;
//
// // #of term identifiers in the key.
// final int m = keyLen / Bytes.SIZEOF_LONG;
//
// // A justification must include at least a head and one tuple in the tail.
// assert m >= N * 2 : "keyLen="+keyLen+", N="+N+", m="+m;
//
// ids = new long[m];
//
// for (int i = 0; i < m; i++) {
//
// ids[i] = KeyBuilder.decodeLong(data, i * Bytes.SIZEOF_LONG);
//
// }
//
// }
/**
* Used by the {@link JustificationTupleSerializer} to materialize
* justifications.
*
* @param ids
* The bindings on the head and tail(s).
*/
public Justification(final int N, final IV[] ids) {
this.rule = null; // not serialized.
this.N = N;
this.ivs = ids;
}
/**
* Serialize a justification as an index key. The key length is a function
* of the #of bindings in the justification.
*
* @param keyBuilder
* A key builder.
*
* @return The key.
*/
static public byte[] getKey(IKeyBuilder keyBuilder, Justification jst) {
if (keyBuilder == null)
throw new IllegalArgumentException();
if (jst == null)
throw new IllegalArgumentException();
keyBuilder.reset();
@SuppressWarnings("rawtypes")
final IV[] ivs = jst.ivs;
for (int i = 0; i < ivs.length; i++) {
IVUtility.encode(keyBuilder, ivs[i]);
}
return keyBuilder.getKey();
}
public boolean equals(final Justification o) {
// Note: ignores transient [rule].
if (this == o)
return true;
return Arrays.equals(ivs, o.ivs);
}
/**
* Places the justifications into an ordering that clusters them based on
* the entailment is being justified.
*/
public int compareTo(final Justification o) {
// the length of the longer ids[].
final int len = ivs.length > o.ivs.length ? ivs.length : o.ivs.length;
// compare both arrays until a difference emerges or one is exhausted.
for (int i = 0; i < len; i++) {
if (i >= ivs.length) {
// shorter with common prefix is ordered first.
return -1;
} else if (i >= o.ivs.length) {
// shorter with common prefix is ordered first.
return 1;
}
/*
* Both arrays have data for this index.
*
* Note: logic avoids possible overflow of [long] by not computing the
* difference between two longs.
*/
int ret = IVUtility.compare(ivs[i], o.ivs[i]);
if (ret != 0)
return ret;
}
// identical values and identical lengths.
assert ivs.length == o.ivs.length;
return 0;
}
public String toString() {
return toString(null);
}
public String toString(final AbstractTripleStore db) {
StringBuilder sb = new StringBuilder();
if (rule != null) {
sb.append(rule.getName());
sb.append("\n");
}
// tail
{
// #of triple patterns in the tail.
final int m = (ivs.length / N) - 1;
for (int i = 0; i < m; i++) {
sb.append("\t(");
for (int j = 0; j < N; j++) {
final IV,?> id = ivs[i * N + N + j];
sb.append((db == null ? "" + id : db.toString(id)));
if (j + 1 < N)
sb.append(", ");
}
sb.append(")");
if (i + 1 < m) {
sb.append(", \n");
}
}
sb.append("\n\t-> ");
}
// head
{
sb.append("(");
// Note: test on i id = ivs[i];
sb.append((db == null ? "" + id : db.toString(id)));
if (i + 1 < N)
sb.append(", ");
}
sb.append(")");
}
return sb.toString();
}
/**
* Return true iff a grounded justification chain exists for the statement.
*
* @param focusStore
* The focusStore contains the set of statements that are being
* retracted from the database. When looking for grounded
* justifications we do NOT consider any statement that is found
* in this store. This prevents statements that are being
* retracted from providing either their own justification or the
* justiciation of any other statement that is being retracted at
* the same time.
* @param db
* The database from which the statements are to be retracted and
* in which we will search for grounded justifications.
* @param head
* A triple pattern. When invoked on a statement during truth
* maintenance this will be fully bound. However, during
* recursive processing triple patterns may be encountered in the
* tail of {@link Justification}s that are not fully bound. In
* such cases we test for any statement matching the triple
* pattern that can be proven to be grounded.
* @param testHead
* When true
the head will be tested
* against the database on entry before seeking a grounded
* justification chain. When false
head will not
* be tested directly but we will still seek a grounded
* justification chain.
* @param testFocusStore
*
* @param visited
* A set of head (whether fully bound or query patterns) that
* have already been considered. This parameter MUST be newly
* allocated on each top-level call. It is used in order to avoid
* infinite loops by rejecting for further consideration any head
* which has already been visited.
*
* @return True iff the statement is entailed by a grounded justification
* chain in the database.
*
* @todo this is depth 1st. would breadth 1st be faster?
*/
public static boolean isGrounded(
InferenceEngine inf,
TempTripleStore focusStore,
AbstractTripleStore db,
SPO head,
boolean testHead,
boolean testFocusStore
) {
final VisitedSPOSet visited = new VisitedSPOSet(focusStore.getIndexManager());
try {
boolean ret = isGrounded(inf, focusStore, db, head, testHead, testFocusStore, visited);
if (log.isInfoEnabled())
log.info("head=" + head + " is " + (ret ? "" : "NOT ")
+ "grounded : testHead=" + testHead
+ ", testFocusStore=" + testFocusStore + ", #visited="
+ visited.size());
/*
* FIXME we could also memoize goals that have been proven false at
* this level since we know the outcome for a specific head (fully
* bound or a query pattern). experiment with this and see if it
* reduces the costs of TM. it certainly should if we are running
* the same query a lot!
*/
return ret;
} finally {
/*
* Note: This "closes" the visited set (dicards the BTree), but the
* visited set is backed by the [focusStore] and that MUST NOT be
* closed since it is still in use by the caller!
*/
visited.close();
}
}
public static boolean isGrounded(
InferenceEngine inf,
TempTripleStore focusStore,
AbstractTripleStore db,
ISPO head,
boolean testHead,
boolean testFocusStore,
VisitedSPOSet visited
) {
assert focusStore != null;
if(DEBUG) {
log.debug("head=" + head.toString(db) + ", testHead=" + testHead
+ ", testFocusStore=" + testFocusStore + ", #visited="
+ visited.size());
}
if(testHead) {
if(head.getStatementType()!=StatementEnum.Inferred) return true;
if(db.isAxiom(head.s(), head.p(), head.o())) return true;
if(!visited.add(head)) {
/*
* Note: add() returns true if the element was added and false
* if it was pre-existing. The presence of a pre-existing query
* or fully bound SPO in this set means that we have already
* consider it. In this case we return false without further
* consideration in order to avoid entering into an infinite
* loop among the justification chains.
*/
if(DEBUG) log.debug("Already visited: "+head.toString(db));
return false;
}
/*
* Scan the statement indices for the head. This covers both the
* case when it is fully bound (since we need to know whether or not
* it is explicit) and the case when it has unbound positions (where
* we need to scan them and see if any matching statements in the
* database are explicit).
*
* @todo could be optimized for a point test when fully bound?
*/
final IChunkedOrderedIterator itr = db.getAccessPath(head.s(),
head.p(), head.o()).iterator();
try {
while(itr.hasNext()) {
final ISPO spo = itr.next();
if(DEBUG) log.debug("considering: "+spo.toString(db));
if(db.isAxiom(spo.s(), spo.p(), spo.o())) return true;
if (spo.getStatementType() == StatementEnum.Explicit) {
/*
* If we do not have to test the focusStore then we are
* done.
*/
if (!testFocusStore) return true;
/*
* Before we can accept this spo as providing support
* for a grounded justification we have to test the
* focusStore and make sure that this is NOT one of the
* statements that is being retracted.
*/
if (!focusStore.hasStatement(spo.s(), spo.p(), spo.o())) {
/*
* This spo provides grounded support for a
* justification.
*/
return true;
}
// fall through.
}
/*
* depth-first recursion to see if the statement is grounded.
*
* Note: testHead is [false] now since we just tested the head.
*/
if (isGrounded(inf,focusStore, db, spo, false, testFocusStore, visited)) {
// recursively grounded somewhere.
return true;
}
// otherwise consider the next spo.
}
} finally {
itr.close();
}
}
if(head.isFullyBound()) {
/*
* Examine all justifications for the statement. If any of them are
* grounded then the statement is still entailed by the database.
*
* FIXME add the 'head' parameter to the JustificationIterator and
* then use it here so that we do not have to fully buffer the
* justifications for a given statement (how bad this is really
* depends on how many justifications we find for a given statement
* since the asynchronous read-ahead iterator will buffer a chunk
* anyway for efficiency and if that chunk spans all justifications
* for a given head then it is all the same thing).
*/
final IJustificationIterator itr =
new FullyBufferedJustificationIterator(db,head);
while(itr.hasNext()) {
/*
* For each justification we consider the bindings. The first N are
* just the statement that was proven. The remaining bindings are
* M-1 triple patterns of N elements each.
*/
final Justification jst = itr.next();
if (DEBUG)
log.debug("Considering:\n" + jst.toString(db));
final SPO[] tail = jst.getTail();
/*
* if all in tail are explicit in the statement indices, then done.
*
* since tail is triple patterns, we have to scan those patterns for
* the first explicit statement matched.
*
* if none in tail are explicit, then we can recurse. we could also
* scan the rest of the justifications for something that was easily
* proven to be explicit. it is a depth vs breadth 1st issue.
*
* this is definately going to be expensive in a distributed store
* since it is all random RPCs.
*/
boolean ok = true;
for( SPO t : tail ) {
if (!isGrounded(inf,focusStore, db, t, true/* testHead */,testFocusStore, visited)) {
ok = false;
if(DEBUG)
log.debug("Not grounded: tail="+t.toString(db));
break;
}
}
if(ok) {
if(DEBUG)
log.debug("Grounded:\n"+jst.toString(db));
return true;
}
} // next justification.
} // head.isFullyBound()
if(DEBUG) log.debug("Not grounded: "+head.toString(db));
return false;
}
/**
* A collection of {@link SPO} objects (either fully bound or query
* patterns) that have already been visited.
*
* Note: This is a very specialized {@link SPO} set implementation. How it
* is created and destroyed is tightly integrated with how
* {@link TruthMaintenance} works.
*
* @author Bryan
* Thompson
* @version $Id$
*
* @todo this class is public only because of TestJustifications. it should
* be private.
*/
public static class VisitedSPOSet {
private BTree btree;
private final SPOTupleSerializer tupleSer;
/**
* Create an {@link SPO} set backed by a {@link BTree} on the temporary
* store associated with the [focusStore] on which truth maintenance is
* being performed. The data written on this set will not last longer
* than
* {@link Justification#isGrounded(InferenceEngine, TempTripleStore, AbstractTripleStore, SPO, boolean, boolean)}
* . When that method exists it {@link #close()}s this
* {@link VisitedSPOSet} which causes the {@link BTree} to be discarded
* but DOES NOT close the backing store since it is still in use by
* {@link TruthMaintenance}.
*
* @param tempStore
* The backing store on which the set will be maintained.
* This is the [focusStore] for {@link TruthMaintenance}.
*/
public VisitedSPOSet(final TemporaryRawStore tempStore) {
final IndexMetadata metadata = new IndexMetadata(UUID.randomUUID());
// metadata.setBranchingFactor(32); // Use default. Could be store specific.
// FIXME quads : use different tupleSerializer IFF cross graph TM is supported.
// assert arity == 3;
// Note: keys are SPOs; no values stored for the tuples.
tupleSer = new SPOTupleSerializer(SPOKeyOrder.SPO, false/* sids */,
DefaultTupleSerializer.getDefaultLeafKeysCoder(),
EmptyRabaValueCoder.INSTANCE);
metadata.setTupleSerializer(tupleSer);
btree = BTree.create(tempStore, metadata);
}
/**
*
* @param spo
*
* @return true
iff the set did not already contain the
* element (i.e., if the element was added to the set).
*/
public boolean add(final ISPO spo) {
if (DEBUG)
log.debug(spo.toString());
final byte[] key = tupleSer.serializeKey(spo);
if (!btree.contains(key)) {
btree.insert(key, null);
return true;
}
return false;
}
public long size() {
return btree.getEntryCount();
}
/**
* Discards anything written on the btree. If nothing has been written
* on the backing store yet then nothing ever will be.
*/
public void close() {
if (btree != null) {
// discards the data in the btree, creating a new root.
btree.removeAll();
// discard the hard reference.
btree = null;
/*
* Note: !!!! DO NOT close the backing store here !!!!
*
* Note: The visited set is backed by the [focusStore] and that
* MUST NOT be closed since it is still in use by the caller!
* See isGrounded() which is where this gets used.
*/
// btree.getStore().close();
}
}
protected void finalized() throws Exception {
close();
}
}
}