com.bigdata.rdf.lexicon.ReverseIndexWriterTask Maven / Gradle / Ivy

Go to download
package com.bigdata.rdf.lexicon;

import java.util.concurrent.Callable;

import org.openrdf.model.BNode;
import org.openrdf.model.Value;

import com.bigdata.btree.IIndex;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.KVO;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.io.ByteArrayBuffer;
import com.bigdata.io.DataOutputBuffer;
import com.bigdata.rdf.lexicon.Id2TermWriteProc.Id2TermWriteProcConstructor;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.BigdataValueFactory;
import com.bigdata.rdf.model.BigdataValueSerializer;
import com.bigdata.rdf.spo.ISPO;

/**
 * Add terms to the reverse index, which is the index that we use to lookup the
 * RDF value by its term identifier so that we can externalize {@link ISPO}s as
 * RDF/XML or the like.
 * 
 * Note: Every term asserted against the forward mapping [terms] MUST be
 * asserted against the reverse mapping [ids] EVERY time. This is required in
 * order to guarantee that the reverse index remains complete and consistent.
 * Otherwise a client that writes on the terms index and fails before writing on
 * the ids index would cause those terms to remain undefined in the reverse
 * index.
 */
public class ReverseIndexWriterTask implements Callable {

    private final IIndex idTermIndex;

    private final BigdataValueSerializer ser;

    private final KVO[] a;

    private final int ndistinct;

    private final boolean storeBlankNodes;
    
    /**
     * 
     * @param idTermIndex
     *            The index on which to write the data.
     * @param valueFactory
     *            This determines how the {@link Value} objects are serialized
     *            on the index.
     * @param a
     *            The terms (in sorted order by their term identifiers).
     * @param ndistinct
     *            The #of elements in a.
     */
    public ReverseIndexWriterTask(final IIndex idTermIndex,
            final BigdataValueFactory valueFactory,
            final KVO[] a, final int ndistinct,
            final boolean storeBlankNodes) {

        if (idTermIndex == null)
            throw new IllegalArgumentException();

        if (valueFactory == null)
            throw new IllegalArgumentException();

        if (a == null)
            throw new IllegalArgumentException();

        if (ndistinct < 0 || ndistinct > a.length)
            throw new IllegalArgumentException();

        this.idTermIndex = idTermIndex;

        this.ser = valueFactory.getValueSerializer();

        this.a = a;

        this.ndistinct = ndistinct;
        
        this.storeBlankNodes = storeBlankNodes;
        
    }

    /**
     * @return the elapsed time for this task.
     */
    public Long call() throws Exception {

        final long _begin = System.currentTimeMillis();

        /*
         * Create a key buffer to hold the keys generated from the term
         * identifiers and then generate those keys.
         * 
         * Note: We DO NOT write BNodes on the reverse index.
         */
        final byte[][] keys = new byte[ndistinct][];
        final byte[][] vals = new byte[ndistinct][];
        int nonBNodeCount = 0; // #of non-bnodes.
        {

            // thread-local key builder removes single-threaded constraint.
            final IKeyBuilder keyBuilder = KeyBuilder.newInstance();

            final int initialCapacity = 128;
            
            // buffer is reused for each serialized term.
            final DataOutputBuffer out = new DataOutputBuffer(initialCapacity);

            // buffer is reused for each serialized term.
            final ByteArrayBuffer tmp = new ByteArrayBuffer(initialCapacity);
            
            for (int i = 0; i < ndistinct; i++) {

                final BigdataValue x =  a[i].obj;

                if (!storeBlankNodes && x instanceof BNode) {

                    // Blank nodes are not entered into the reverse index.
                    continue;

                }

                keys[nonBNodeCount] = x.getIV().encode(keyBuilder.reset())
                        .getKey();

                // Serialize the term.
                vals[nonBNodeCount] = ser.serialize(x, out.reset(), tmp);

                nonBNodeCount++;

            }

        }

        // run the procedure on the index.
        if (nonBNodeCount > 0) {

            idTermIndex.submit(0/* fromIndex */, nonBNodeCount/* toIndex */,
                    keys, vals, Id2TermWriteProcConstructor.INSTANCE, null/* resultHandler */
            );

        }

        return System.currentTimeMillis() - _begin;

    }

}