All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.inf.Justification Maven / Gradle / Ivy

/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
package com.bigdata.rdf.inf;


import java.util.Arrays;
import java.util.UUID;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;

import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.IVariableOrConstant;
import com.bigdata.btree.BTree;
import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.raba.codec.EmptyRabaValueCoder;
import com.bigdata.journal.TemporaryRawStore;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVUtility;
import com.bigdata.rdf.model.StatementEnum;
import com.bigdata.rdf.rules.InferenceEngine;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.JustificationTupleSerializer;
import com.bigdata.rdf.spo.SPO;
import com.bigdata.rdf.spo.SPOKeyOrder;
import com.bigdata.rdf.spo.SPOTupleSerializer;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.IRawTripleStore;
import com.bigdata.rdf.store.TempTripleStore;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.eval.ISolution;
import com.bigdata.striterator.IChunkedOrderedIterator;

/**
 * 

* A justification for a {@link StatementEnum#Inferred} statement. The head is * the entailed statement. The tail of the justification is one or more triple * pattern(s). Consider rdf1 *

* *
 *               (?u ?a ?y) -> (?a rdf:type rdf:Property)
 * 
* *

* Then the triple pattern for the tail is: *

* *

 *           (0 ?a 0)
 * 
* *

* where 0 reprents a {@link IRawTripleStore#NULL} term identifier. *

*

* So a justification chain for rdf1 would be: *

* *
 *           head := [?a rdf:type rdf:Property]
 *           
 *           tail := [0 ?a 0]
 * 
* *

* In fact, the total bindings for the rule are represented as a long[] with the * head occupying the 1st N positions in that array and the bindings for the * tail appearing thereafter in the declared order of the predicates in the * tail. *

* *

* When a {@link StatementEnum#Explicit} statement is to be retracted from the * database we need to determined whether or not there exists a grounded * justification for that statement (same head). For each justification for that * statement we consider the tail. If there exists either an explicit statement * that satisifies the triple pattern for the tail or if there exists an * inference that satisifies the triple pattern for the tail and the inference * can be proven to be grounded by recursive examination of its justifications, * then the head is still valid and is converted from an explicit statement into * an inference. *

*

* This looks more or less like: Find all statements matching the * pattern. If any are explicit, then that part of the tail is grounded. If none * are explicit, then chase the justification recursively. Only retract a * justification when it can no longer be grounded. *

* *

* The concept of grounded vs ungrounded justifications is described in Inferencing and * Truth Maintenance in RDF Schema : Exploring a naive practical approach * by Jeen Broekstra and Arjohn Kampman. *

* * @todo the tails could be represented more efficiently if we only stored the * variable bindings and not all values in each tail. however, we might * then need the rule on hand in order to decode the tail(s) and * substitute in the missing constants. * * @author Bryan Thompson * @version $Id$ */ public class Justification implements Comparable { protected static transient final Logger log = Logger.getLogger(Justification.class); /** * True iff the {@link #log} level is INFO or less. */ final static public boolean INFO = log.getEffectiveLevel().toInt() <= Level.INFO .toInt(); /** * True iff the {@link #log} level is DEBUG or less. */ final static public boolean DEBUG = log.getEffectiveLevel().toInt() <= Level.DEBUG .toInt(); /** * The #of term identifiers in a statement. */ // private static final transient int N = IRawTripleStore.N; private final transient int N; /** * From the ctor, but not persisted. */ public final transient IRule rule; /** * Term identifiers for the head and bindings. *

* Divide the length by the #of terms in a statement #N and subtract one to * find the #of bindings for the tail. The first N entries are the head. The * rest are the tail. *

* Note: A term identifier MAY be {@link IRawTripleStore#NULL} to indicate a * wildcard. */ final IV[] ivs; // /** // * Construct an entailment for an {@link StatementEnum#Inferred} statement. // * // * @param rule // * The rule that licensed the entailment (this is only used for // * debugging). // * @param head // * The entailment licensed by the rule and the bindings. // * @param bindings // * The bindings for that rule that licensed the entailment. // * // * @todo no longer used? // */ // public Justification(Rule rule, SPO head, SPO[] bindings) { // //// assert rule != null; // assert head != null; // assert bindings != null; // // this.rule = rule; // // ids = new long[ (1 + bindings.length ) * N]; // // int i = 0; // // ids[i++] = head.s; // ids[i++] = head.p; // ids[i++] = head.o; // // for( SPO spo : bindings ) { // // ids[i++] = spo.s; // ids[i++] = spo.p; // ids[i++] = spo.o; // // } // // } /** * Returns the head as an {@link SPO}. *

* Note: The {@link StatementEnum} associated with the head is actually * unknown, but it is marked as {@link StatementEnum#Inferred} in the * returned object. In order to discover the {@link StatementEnum} for the * head you MUST either already know it (this is not uncommon) or you MUST * read one of the statement indices. * * @return */ public SPO getHead() { return new SPO(ivs[0], ivs[1], ivs[2], StatementEnum.Inferred); } /** * Returns the tail as an {@link SPO}[]. *

* Note: The {@link StatementEnum} associated triple patterns in the tail is * actually unknown, but it is marked as {@link StatementEnum#Inferred} in * the returned object. In fact, since the tail consists of triple patterns * and not necessarily fully bound triples, the concept of a * {@link StatementEnum} is not even defined. * * @return */ public SPO[] getTail() { // #of triple patterns in the tail. final int m = (ivs.length / N) - 1; SPO[] tail = new SPO[m]; // for each triple pattern in the tail. int j = N; for(int i=0; i * Modify the new rule execution layer to assign variables an integer index * in [0:nvars] for each rule and then we can do positional decoding of the * binding set and loose the requirement for the rule when generating * justifications. *

* This might not be that important for scale-out since the solutions are, I * believe, processed solely in a local buffer for Insert and Delete and * only serialized for Query. */ public Justification(ISolution solution) { // the rule that licensed the entailment. final IRule rule = solution.getRule(); this.N = rule.getHead().arity(); // the entailed statement. final SPO head = (SPO) solution.get(); // the binding set for the solution. final IBindingSet bindingSet = solution.getBindingSet(); // final long[] bindings = new long[(rule.getTailCount() + 1) * N]; assert rule != null; assert head != null; assert head.isFullyBound(); assert bindingSet != null; // // verify enough bindings for one or more triple patterns. // assert bindings.length % N == 0 : "bindings.length=" + bindings.length; // assert bindings.length / N >= 1 : "bindings.length=" + bindings.length; this.rule = rule; // Note: transient field. // #of triple patterns in the tail. final int tailCount = rule.getTailCount(); // allocate enough for the head and the tail. ivs = new IV[(1 + tailCount) * N]; int j = 0; ivs[j++] = head.s; ivs[j++] = head.p; ivs[j++] = head.o; /* * Note: Some of variables in the tail(s) are left unbound by some of * the rules, e.g., rdfs1. This is because any binding for those * variables is valid. The justifications index treats these unbound * variables as wildcards and represents them with 0L in the key. (The * computed entailment is ALWAYS fully bound, it is just that some of * the variables in the tails might be unbound). */ for (int tailIndex = 0; tailIndex < tailCount; tailIndex++) { final IPredicate predicate = rule.getTail(tailIndex); for(int i=0; i t = predicate.get(i); final IV id; if (t.isVar()) { final IConstant c = bindingSet.get((IVariable)t); if(c == null) { id = null; } else { id = c.get(); } } else { id = t.get(); } ivs[j++] = id; } } } // /** // * Deserialize a justification from an index entry. // * // * @param itr // * The iterator visiting the index entries. // * // * @todo use {@link ITupleSerializer} to deserialize // */ // public Justification(ITupleIterator itr) { // // final ITuple tuple = itr.next(); // // final ByteArrayBuffer kbuf = tuple.getKeyBuffer(); // // final int keyLen = kbuf.limit(); // // final byte[] data = kbuf.array(); // // this.rule = null; // Not persisted. // // // verify key is even multiple of (N*sizeof(long)). // assert keyLen % (N * Bytes.SIZEOF_LONG) == 0; // // // #of term identifiers in the key. // final int m = keyLen / Bytes.SIZEOF_LONG; // // // A justification must include at least a head and one tuple in the tail. // assert m >= N * 2 : "keyLen="+keyLen+", N="+N+", m="+m; // // ids = new long[m]; // // for (int i = 0; i < m; i++) { // // ids[i] = KeyBuilder.decodeLong(data, i * Bytes.SIZEOF_LONG); // // } // // } /** * Used by the {@link JustificationTupleSerializer} to materialize * justifications. * * @param ids * The bindings on the head and tail(s). */ public Justification(final int N, final IV[] ids) { this.rule = null; // not serialized. this.N = N; this.ivs = ids; } /** * Serialize a justification as an index key. The key length is a function * of the #of bindings in the justification. * * @param keyBuilder * A key builder. * * @return The key. */ static public byte[] getKey(IKeyBuilder keyBuilder, Justification jst) { if (keyBuilder == null) throw new IllegalArgumentException(); if (jst == null) throw new IllegalArgumentException(); keyBuilder.reset(); @SuppressWarnings("rawtypes") final IV[] ivs = jst.ivs; for (int i = 0; i < ivs.length; i++) { IVUtility.encode(keyBuilder, ivs[i]); } return keyBuilder.getKey(); } public boolean equals(final Justification o) { // Note: ignores transient [rule]. if (this == o) return true; return Arrays.equals(ivs, o.ivs); } /** * Places the justifications into an ordering that clusters them based on * the entailment is being justified. */ public int compareTo(final Justification o) { // the length of the longer ids[]. final int len = ivs.length > o.ivs.length ? ivs.length : o.ivs.length; // compare both arrays until a difference emerges or one is exhausted. for (int i = 0; i < len; i++) { if (i >= ivs.length) { // shorter with common prefix is ordered first. return -1; } else if (i >= o.ivs.length) { // shorter with common prefix is ordered first. return 1; } /* * Both arrays have data for this index. * * Note: logic avoids possible overflow of [long] by not computing the * difference between two longs. */ int ret = IVUtility.compare(ivs[i], o.ivs[i]); if (ret != 0) return ret; } // identical values and identical lengths. assert ivs.length == o.ivs.length; return 0; } public String toString() { return toString(null); } public String toString(final AbstractTripleStore db) { StringBuilder sb = new StringBuilder(); if (rule != null) { sb.append(rule.getName()); sb.append("\n"); } // tail { // #of triple patterns in the tail. final int m = (ivs.length / N) - 1; for (int i = 0; i < m; i++) { sb.append("\t("); for (int j = 0; j < N; j++) { final IV id = ivs[i * N + N + j]; sb.append((db == null ? "" + id : db.toString(id))); if (j + 1 < N) sb.append(", "); } sb.append(")"); if (i + 1 < m) { sb.append(", \n"); } } sb.append("\n\t-> "); } // head { sb.append("("); // Note: test on i id = ivs[i]; sb.append((db == null ? "" + id : db.toString(id))); if (i + 1 < N) sb.append(", "); } sb.append(")"); } return sb.toString(); } /** * Return true iff a grounded justification chain exists for the statement. * * @param focusStore * The focusStore contains the set of statements that are being * retracted from the database. When looking for grounded * justifications we do NOT consider any statement that is found * in this store. This prevents statements that are being * retracted from providing either their own justification or the * justiciation of any other statement that is being retracted at * the same time. * @param db * The database from which the statements are to be retracted and * in which we will search for grounded justifications. * @param head * A triple pattern. When invoked on a statement during truth * maintenance this will be fully bound. However, during * recursive processing triple patterns may be encountered in the * tail of {@link Justification}s that are not fully bound. In * such cases we test for any statement matching the triple * pattern that can be proven to be grounded. * @param testHead * When true the head will be tested * against the database on entry before seeking a grounded * justification chain. When false head will not * be tested directly but we will still seek a grounded * justification chain. * @param testFocusStore * * @param visited * A set of head (whether fully bound or query patterns) that * have already been considered. This parameter MUST be newly * allocated on each top-level call. It is used in order to avoid * infinite loops by rejecting for further consideration any head * which has already been visited. * * @return True iff the statement is entailed by a grounded justification * chain in the database. * * @todo this is depth 1st. would breadth 1st be faster? */ public static boolean isGrounded( InferenceEngine inf, TempTripleStore focusStore, AbstractTripleStore db, SPO head, boolean testHead, boolean testFocusStore ) { final VisitedSPOSet visited = new VisitedSPOSet(focusStore.getIndexManager()); try { boolean ret = isGrounded(inf, focusStore, db, head, testHead, testFocusStore, visited); if (log.isInfoEnabled()) log.info("head=" + head + " is " + (ret ? "" : "NOT ") + "grounded : testHead=" + testHead + ", testFocusStore=" + testFocusStore + ", #visited=" + visited.size()); /* * FIXME we could also memoize goals that have been proven false at * this level since we know the outcome for a specific head (fully * bound or a query pattern). experiment with this and see if it * reduces the costs of TM. it certainly should if we are running * the same query a lot! */ return ret; } finally { /* * Note: This "closes" the visited set (dicards the BTree), but the * visited set is backed by the [focusStore] and that MUST NOT be * closed since it is still in use by the caller! */ visited.close(); } } public static boolean isGrounded( InferenceEngine inf, TempTripleStore focusStore, AbstractTripleStore db, ISPO head, boolean testHead, boolean testFocusStore, VisitedSPOSet visited ) { assert focusStore != null; if(DEBUG) { log.debug("head=" + head.toString(db) + ", testHead=" + testHead + ", testFocusStore=" + testFocusStore + ", #visited=" + visited.size()); } if(testHead) { if(head.getStatementType()!=StatementEnum.Inferred) return true; if(db.isAxiom(head.s(), head.p(), head.o())) return true; if(!visited.add(head)) { /* * Note: add() returns true if the element was added and false * if it was pre-existing. The presence of a pre-existing query * or fully bound SPO in this set means that we have already * consider it. In this case we return false without further * consideration in order to avoid entering into an infinite * loop among the justification chains. */ if(DEBUG) log.debug("Already visited: "+head.toString(db)); return false; } /* * Scan the statement indices for the head. This covers both the * case when it is fully bound (since we need to know whether or not * it is explicit) and the case when it has unbound positions (where * we need to scan them and see if any matching statements in the * database are explicit). * * @todo could be optimized for a point test when fully bound? */ final IChunkedOrderedIterator itr = db.getAccessPath(head.s(), head.p(), head.o()).iterator(); try { while(itr.hasNext()) { final ISPO spo = itr.next(); if(DEBUG) log.debug("considering: "+spo.toString(db)); if(db.isAxiom(spo.s(), spo.p(), spo.o())) return true; if (spo.getStatementType() == StatementEnum.Explicit) { /* * If we do not have to test the focusStore then we are * done. */ if (!testFocusStore) return true; /* * Before we can accept this spo as providing support * for a grounded justification we have to test the * focusStore and make sure that this is NOT one of the * statements that is being retracted. */ if (!focusStore.hasStatement(spo.s(), spo.p(), spo.o())) { /* * This spo provides grounded support for a * justification. */ return true; } // fall through. } /* * depth-first recursion to see if the statement is grounded. * * Note: testHead is [false] now since we just tested the head. */ if (isGrounded(inf,focusStore, db, spo, false, testFocusStore, visited)) { // recursively grounded somewhere. return true; } // otherwise consider the next spo. } } finally { itr.close(); } } if(head.isFullyBound()) { /* * Examine all justifications for the statement. If any of them are * grounded then the statement is still entailed by the database. * * FIXME add the 'head' parameter to the JustificationIterator and * then use it here so that we do not have to fully buffer the * justifications for a given statement (how bad this is really * depends on how many justifications we find for a given statement * since the asynchronous read-ahead iterator will buffer a chunk * anyway for efficiency and if that chunk spans all justifications * for a given head then it is all the same thing). */ final IJustificationIterator itr = new FullyBufferedJustificationIterator(db,head); while(itr.hasNext()) { /* * For each justification we consider the bindings. The first N are * just the statement that was proven. The remaining bindings are * M-1 triple patterns of N elements each. */ final Justification jst = itr.next(); if (DEBUG) log.debug("Considering:\n" + jst.toString(db)); final SPO[] tail = jst.getTail(); /* * if all in tail are explicit in the statement indices, then done. * * since tail is triple patterns, we have to scan those patterns for * the first explicit statement matched. * * if none in tail are explicit, then we can recurse. we could also * scan the rest of the justifications for something that was easily * proven to be explicit. it is a depth vs breadth 1st issue. * * this is definately going to be expensive in a distributed store * since it is all random RPCs. */ boolean ok = true; for( SPO t : tail ) { if (!isGrounded(inf,focusStore, db, t, true/* testHead */,testFocusStore, visited)) { ok = false; if(DEBUG) log.debug("Not grounded: tail="+t.toString(db)); break; } } if(ok) { if(DEBUG) log.debug("Grounded:\n"+jst.toString(db)); return true; } } // next justification. } // head.isFullyBound() if(DEBUG) log.debug("Not grounded: "+head.toString(db)); return false; } /** * A collection of {@link SPO} objects (either fully bound or query * patterns) that have already been visited. *

* Note: This is a very specialized {@link SPO} set implementation. How it * is created and destroyed is tightly integrated with how * {@link TruthMaintenance} works. * * @author Bryan * Thompson * @version $Id$ * * @todo this class is public only because of TestJustifications. it should * be private. */ public static class VisitedSPOSet { private BTree btree; private final SPOTupleSerializer tupleSer; /** * Create an {@link SPO} set backed by a {@link BTree} on the temporary * store associated with the [focusStore] on which truth maintenance is * being performed. The data written on this set will not last longer * than * {@link Justification#isGrounded(InferenceEngine, TempTripleStore, AbstractTripleStore, SPO, boolean, boolean)} * . When that method exists it {@link #close()}s this * {@link VisitedSPOSet} which causes the {@link BTree} to be discarded * but DOES NOT close the backing store since it is still in use by * {@link TruthMaintenance}. * * @param tempStore * The backing store on which the set will be maintained. * This is the [focusStore] for {@link TruthMaintenance}. */ public VisitedSPOSet(final TemporaryRawStore tempStore) { final IndexMetadata metadata = new IndexMetadata(UUID.randomUUID()); // metadata.setBranchingFactor(32); // Use default. Could be store specific. // FIXME quads : use different tupleSerializer IFF cross graph TM is supported. // assert arity == 3; // Note: keys are SPOs; no values stored for the tuples. tupleSer = new SPOTupleSerializer(SPOKeyOrder.SPO, false/* sids */, DefaultTupleSerializer.getDefaultLeafKeysCoder(), EmptyRabaValueCoder.INSTANCE); metadata.setTupleSerializer(tupleSer); btree = BTree.create(tempStore, metadata); } /** * * @param spo * * @return true iff the set did not already contain the * element (i.e., if the element was added to the set). */ public boolean add(final ISPO spo) { if (DEBUG) log.debug(spo.toString()); final byte[] key = tupleSer.serializeKey(spo); if (!btree.contains(key)) { btree.insert(key, null); return true; } return false; } public long size() { return btree.getEntryCount(); } /** * Discards anything written on the btree. If nothing has been written * on the backing store yet then nothing ever will be. */ public void close() { if (btree != null) { // discards the data in the btree, creating a new root. btree.removeAll(); // discard the hard reference. btree = null; /* * Note: !!!! DO NOT close the backing store here !!!! * * Note: The visited set is backed by the [focusStore] and that * MUST NOT be closed since it is still in use by the caller! * See isGrounded() which is where this gets used. */ // btree.getStore().close(); } } protected void finalized() throws Exception { close(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy