com.bigdata.rdf.load.VerifyStatementBuffer Maven / Gradle / Ivy
package com.bigdata.rdf.load;
import java.beans.Statement;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.log4j.Logger;
import org.openrdf.model.Literal;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import com.bigdata.rdf.model.BigdataStatement;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.StatementEnum;
import com.bigdata.rdf.rio.StatementBuffer;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.SPO;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.striterator.IChunkedOrderedIterator;
/**
* Statements inserted into the buffer are verified against the database. No
* new {@link Value}s or {@link Statement}s will be written on the
* database by this class. The #of {@link URI}, {@link Literal}, and told
* triples not found in the database are reported by various counters.
*
* @author Bryan Thompson
* @version $Id$
*
* @todo The counters are being updated on each incremental write rather
* than tracked on a per-task basis and then updated iff the task as a whole
* succeeds. This causes double-counting of both found and not found totals
* when a task errors and then retries. The counters need to be attached to
* the task and the task logic extended to capture them rather than to the
* statement buffer (a bit of a mess).
*/
public class VerifyStatementBuffer extends StatementBuffer {
final private static Logger log = Logger.getLogger(VerifyStatementBuffer.class);
final AtomicLong nterms, ntermsNotFound, ntriples, ntriplesNotFound;
/**
* @param database
* @param capacity
*/
public VerifyStatementBuffer(AbstractTripleStore database,
int capacity, AtomicLong nterms, AtomicLong ntermsNotFound,
AtomicLong ntriples, AtomicLong ntriplesNotFound) {
super(database, capacity);
this.nterms = nterms;
this.ntermsNotFound = ntermsNotFound;
this.ntriples = ntriples;
this.ntriplesNotFound = ntriplesNotFound;
}
/**
* Overridden to batch verify the terms and statements in the buffer.
*
* FIXME Verify that {@link StatementBuffer#flush()} is doing the right
* thing for this case (esp, how it handles bnodes when appearing as
* {s,p,o} or when appearing as the statement identifier).
*/
@Override
protected void incrementalWrite() {
if (log.isInfoEnabled()) {
log.info("numValues=" + numValues + ", numStmts=" + numStmts);
}
// Verify terms (batch operation).
if (numValues > 0) {
database.getLexiconRelation()
.addTerms(values, numValues, true/* readOnly */);
}
for( int i=0; i 0) {
final SPO[] a = new SPO[numStmts];
final BigdataStatement[] b = new BigdataStatement[numStmts];
// #of SPOs generated for testing.
int n = 0;
for (int i = 0; i < numStmts; i++) {
final BigdataStatement stmt = stmts[i];
ntriples.incrementAndGet();
if (!stmt.isFullyBound()) {
log
.warn("Unknown statement (one or more unknown terms) "
+ stmt);
ntriplesNotFound.incrementAndGet();
continue;
}
a[n] = new SPO(stmt);
b[n] = stmt;
n++;
}
final IChunkedOrderedIterator itr = database
.bulkCompleteStatements(a, n);
try {
while (itr.hasNext()) {
itr.next();
}
} finally {
itr.close();
}
for (int i = 0; i < n; i++) {
final ISPO spo = a[i];
if (!spo.hasStatementType()) {
ntriplesNotFound.incrementAndGet();
log.warn("Statement not in database: " + b[i]+" ("+spo+")");
continue;
}
if (spo.getStatementType() != StatementEnum.Explicit) {
ntriplesNotFound.incrementAndGet();
log.warn("Statement not explicit database: "+b[i]+" is marked as "+spo.getStatementType());
continue;
}
}
}
// Reset the state of the buffer (but not the bnodes nor deferred stmts).
_clear();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy