com.bigdata.rdf.load.VerifyStatementBuffer Maven / Gradle / Ivy

Go to download
package com.bigdata.rdf.load;

import java.beans.Statement;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.log4j.Logger;
import org.openrdf.model.Literal;
import org.openrdf.model.URI;
import org.openrdf.model.Value;

import com.bigdata.rdf.model.BigdataStatement;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.StatementEnum;
import com.bigdata.rdf.rio.StatementBuffer;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.SPO;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.striterator.IChunkedOrderedIterator;

/**
 * Statements inserted into the buffer are verified against the database. No
 * new {@link Value}s or {@link Statement}s will be written on the
 * database by this class. The #of {@link URI}, {@link Literal}, and told
 * triples not found in the database are reported by various counters.
 * 
 * @author Bryan Thompson
 * @version $Id$
 * 
 * @todo The counters are being updated on each incremental write rather
 * than tracked on a per-task basis and then updated iff the task as a whole
 * succeeds. This causes double-counting of both found and not found totals
 * when a task errors and then retries. The counters need to be attached to
 * the task and the task logic extended to capture them rather than to the
 * statement buffer (a bit of a mess).
 */
public class VerifyStatementBuffer extends StatementBuffer {

    final private static Logger log = Logger.getLogger(VerifyStatementBuffer.class);
    
    final AtomicLong nterms, ntermsNotFound, ntriples, ntriplesNotFound;
    
    /**
     * @param database
     * @param capacity
     */
    public VerifyStatementBuffer(AbstractTripleStore database,
            int capacity, AtomicLong nterms, AtomicLong ntermsNotFound,
            AtomicLong ntriples, AtomicLong ntriplesNotFound) {
        
        super(database, capacity);

        this.nterms = nterms;
        
        this.ntermsNotFound = ntermsNotFound;
        
        this.ntriples = ntriples;
        
        this.ntriplesNotFound = ntriplesNotFound;
        
    }
    
    /**
     * Overridden to batch verify the terms and statements in the buffer.
     * 
     * FIXME Verify that {@link StatementBuffer#flush()} is doing the right
     * thing for this case (esp, how it handles bnodes when appearing as
     * {s,p,o} or when appearing as the statement identifier).
     */
    @Override
    protected void incrementalWrite() {

        if (log.isInfoEnabled()) {
            log.info("numValues=" + numValues + ", numStmts=" + numStmts);
        }

        // Verify terms (batch operation).
        if (numValues > 0) {

            database.getLexiconRelation()
                    .addTerms(values, numValues, true/* readOnly */);

        }

        for( int i=0; i 0) {

            final SPO[] a = new SPO[numStmts];
            final BigdataStatement[] b = new BigdataStatement[numStmts];
            
            // #of SPOs generated for testing.
            int n = 0;
            
            for (int i = 0; i < numStmts; i++) {

                final BigdataStatement stmt = stmts[i];

                ntriples.incrementAndGet();

                if (!stmt.isFullyBound()) {

                        log
                                .warn("Unknown statement (one or more unknown terms) "
                                        + stmt);

                    ntriplesNotFound.incrementAndGet();

                    continue;

                }

                a[n] = new SPO(stmt);
                
                b[n] = stmt;
                
                n++;
                
            }
            
            final IChunkedOrderedIterator itr = database
                    .bulkCompleteStatements(a, n);

            try {

                while (itr.hasNext()) {

                    itr.next();

                }

            } finally {

                itr.close();

            }
            
            for (int i = 0; i < n; i++) {

                final ISPO spo = a[i];

                if (!spo.hasStatementType()) {

                    ntriplesNotFound.incrementAndGet();

                    log.warn("Statement not in database: " + b[i]+" ("+spo+")");

                    continue;

                }

                if (spo.getStatementType() != StatementEnum.Explicit) {
                    
                    ntriplesNotFound.incrementAndGet();

                    log.warn("Statement not explicit database: "+b[i]+" is marked as "+spo.getStatementType());
                    
                    continue;
                    
                }
                
            }
            
        }
        
        // Reset the state of the buffer (but not the bnodes nor deferred stmts).
        _clear();

    }
    
}