All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.lexicon.LexiconRelation Maven / Gradle / Ivy


/**

 Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

 Contact:
 SYSTAP, LLC DBA Blazegraph
 2501 Calvert ST NW #106
 Washington, DC 20008
 [email protected]

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; version 2 of the License.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
/*
 * Created on Jul 4, 2008
 */

package com.bigdata.rdf.lexicon;

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.TimeZone;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.log4j.Logger;
import org.omg.CORBA.portable.ValueFactory;
import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;

import com.bigdata.bop.BOp;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IVariableOrConstant;
import com.bigdata.bop.ap.Predicate;
import com.bigdata.btree.IIndex;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleSerializer;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.IndexTypeEnum;
import com.bigdata.btree.filter.PrefixFilter;
import com.bigdata.btree.filter.TupleFilter;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.KVO;
import com.bigdata.cache.ConcurrentWeakValueCacheWithBatchedUpdates;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.IJournal;
import com.bigdata.journal.IResourceLock;
import com.bigdata.journal.ITx;
import com.bigdata.journal.Journal;
import com.bigdata.journal.NoSuchIndexException;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.rdf.internal.IDatatypeURIResolver;
import com.bigdata.rdf.internal.IExtensionFactory;
import com.bigdata.rdf.internal.IInlineURIFactory;
import com.bigdata.rdf.internal.ILexiconConfiguration;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVUtility;
import com.bigdata.rdf.internal.LexiconConfiguration;
import com.bigdata.rdf.internal.NoExtensionFactory;
import com.bigdata.rdf.internal.NoInlineURIFactory;
import com.bigdata.rdf.internal.NoSuchVocabularyItem;
import com.bigdata.rdf.internal.VTE;
import com.bigdata.rdf.internal.XSD;
import com.bigdata.rdf.internal.impl.BlobIV;
import com.bigdata.rdf.internal.impl.TermId;
import com.bigdata.rdf.internal.impl.bnode.SidIV;
import com.bigdata.rdf.internal.impl.extensions.XSDStringExtension;
import com.bigdata.rdf.model.BigdataBNode;
import com.bigdata.rdf.model.BigdataLiteral;
import com.bigdata.rdf.model.BigdataURI;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.BigdataValueFactory;
import com.bigdata.rdf.model.BigdataValueFactoryImpl;
import com.bigdata.rdf.model.BigdataValueSerializer;
import com.bigdata.rdf.rio.StatementBuffer;
import com.bigdata.rdf.sail.BigdataSailHelper;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.AbstractTripleStore.Options;
import com.bigdata.rdf.vocab.NoVocabulary;
import com.bigdata.rdf.vocab.Vocabulary;
import com.bigdata.relation.AbstractRelation;
import com.bigdata.relation.RelationSchema;
import com.bigdata.relation.accesspath.AccessPath;
import com.bigdata.relation.accesspath.ArrayAccessPath;
import com.bigdata.relation.accesspath.EmptyAccessPath;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.locator.ILocatableResource;
import com.bigdata.relation.locator.IResourceLocator;
import com.bigdata.search.FullTextIndex;
import com.bigdata.service.IBigdataFederation;
import com.bigdata.service.geospatial.GeoSpatialConfig;
import com.bigdata.sparse.SparseRowStore;
import com.bigdata.striterator.ChunkedArrayIterator;
import com.bigdata.striterator.IChunkedOrderedIterator;
import com.bigdata.striterator.IKeyOrder;
import com.bigdata.util.Bytes;
import com.bigdata.util.NT;
import com.bigdata.util.concurrent.CanonicalFactory;

import cutthecrap.utils.striterators.Resolver;
import cutthecrap.utils.striterators.Striterator;

/**
 * The {@link LexiconRelation} handles all things related to the indices mapping
 * external RDF {@link Value}s onto {@link IV}s (internal values)s and provides
 * methods for efficient materialization of external RDF {@link Value}s from
 * {@link IV}s.
 * 
 * @author Bryan Thompson
 * @version $Id$
 */
public class LexiconRelation extends AbstractRelation 
        implements IDatatypeURIResolver {

    private final static Logger log = Logger.getLogger(LexiconRelation.class);

    private final Set indexNames;

    private final List> keyOrders;
    
    private final AtomicReference> viewRef = new AtomicReference>();

    /**
     * A new one for the subject-centric full text index.
     */
    private final AtomicReference> viewRef2 = new AtomicReference>();

    /**
     * Note: This is a stateless class.
     */
    private final BlobsIndexHelper h = new BlobsIndexHelper();

	@SuppressWarnings("unchecked")
    protected Class determineValueFactoryClass() {

        final String className = getProperty(
                AbstractTripleStore.Options.VALUE_FACTORY_CLASS,
                AbstractTripleStore.Options.DEFAULT_VALUE_FACTORY_CLASS);
        final Class cls;
        try {
            cls = Class.forName(className);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException("Bad option: "
                    + AbstractTripleStore.Options.VALUE_FACTORY_CLASS, e);
        }

        if (!BigdataValueFactory.class.isAssignableFrom(cls)) {
            throw new RuntimeException(
                    AbstractTripleStore.Options.VALUE_FACTORY_CLASS
                            + ": Must implement: "
                            + BigdataValueFactory.class.getName());
        }

        return (Class) cls;

	}

    @SuppressWarnings({ "unchecked", "rawtypes" })
    protected Class determineTextIndexerClass() {

        final String className = getProperty(
                AbstractTripleStore.Options.TEXT_INDEXER_CLASS,
                AbstractTripleStore.Options.DEFAULT_TEXT_INDEXER_CLASS);
        
        final Class cls;
        try {
            cls = Class.forName(className);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException("Bad option: "
                    + AbstractTripleStore.Options.TEXT_INDEXER_CLASS, e);
        }

        if (!IValueCentricTextIndexer.class.isAssignableFrom(cls)) {
            throw new RuntimeException(
                    AbstractTripleStore.Options.TEXT_INDEXER_CLASS
                            + ": Must implement: "
                            + IValueCentricTextIndexer.class.getName());
        }

        return (Class) cls;

    }
    
    @SuppressWarnings({ "unchecked", "rawtypes" })
    protected Class determineSubjectCentricTextIndexerClass() {

        final String className = getProperty(
                AbstractTripleStore.Options.SUBJECT_CENTRIC_TEXT_INDEXER_CLASS,
                AbstractTripleStore.Options.DEFAULT_SUBJECT_CENTRIC_TEXT_INDEXER_CLASS);
        
        final Class cls;
        try {
            cls = Class.forName(className);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException("Bad option: "
                    + AbstractTripleStore.Options.SUBJECT_CENTRIC_TEXT_INDEXER_CLASS, e);
        }

        if (!ISubjectCentricTextIndexer.class.isAssignableFrom(cls)) {
            throw new RuntimeException(
                    AbstractTripleStore.Options.SUBJECT_CENTRIC_TEXT_INDEXER_CLASS
                            + ": Must implement: "
                            + ISubjectCentricTextIndexer.class.getName());
        }

        return (Class) cls;

    }
    
    @SuppressWarnings("unchecked")
    protected Class determineExtensionFactoryClass() {

        final String defaultClassName;
        if (vocab == null || vocab.getClass() == NoVocabulary.class) {
            /*
             * If there is no vocabulary then you can not use the default
             * extension class (or probably any extension class for that matter
             * since the vocbulary is required in order to be able to resolve
             * the URIs for the extension).
             * 
             * @see https://sourceforge.net/apps/trac/bigdata/ticket/456
             */
            defaultClassName = NoExtensionFactory.class.getName();
        } else {
            defaultClassName = AbstractTripleStore.Options.DEFAULT_EXTENSION_FACTORY_CLASS;
        }

        final String className = getProperty(
                AbstractTripleStore.Options.EXTENSION_FACTORY_CLASS,
                defaultClassName);
        
        final Class cls;
        try {
            cls = Class.forName(className);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException("Bad option: "
                    + AbstractTripleStore.Options.EXTENSION_FACTORY_CLASS, e);
        }

        if (!IExtensionFactory.class.isAssignableFrom(cls)) {
            throw new RuntimeException(
                    AbstractTripleStore.Options.EXTENSION_FACTORY_CLASS
                            + ": Must implement: "
                            + IExtensionFactory.class.getName());
        }

        return (Class) cls;

    }

    @SuppressWarnings("unchecked")
    protected Class determineInlineURIFactoryClass() {

        final String defaultClassName;
        if (vocab == null || vocab.get(XSD.IPV4) == null) {
            /*
             * If there is no vocabulary then you can not use an inline URI
             * factory because the namespaces must be in the vocabulary. If the
             * XSD.IPV4 uri is not present in the vocabulary then either you are
             * using NoVocabulary.class or an older version of the vocabulary
             * that does not have that URI in it. Newer journals should be using
             * DefaultBigdataVocabulary.
             */
            defaultClassName = NoInlineURIFactory.class.getName();
        } else {
            defaultClassName = AbstractTripleStore.Options.DEFAULT_INLINE_URI_FACTORY_CLASS;
        }

        final String className = getProperty(
                AbstractTripleStore.Options.INLINE_URI_FACTORY_CLASS,
                defaultClassName);
        
        final Class cls;
        try {
            cls = Class.forName(className);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException("Bad option: "
                    + AbstractTripleStore.Options.INLINE_URI_FACTORY_CLASS, e);
        }

        if (!IInlineURIFactory.class.isAssignableFrom(cls)) {
            throw new RuntimeException(
                    AbstractTripleStore.Options.INLINE_URI_FACTORY_CLASS
                            + ": Must implement: "
                            + IInlineURIFactory.class.getName());
        }

        return (Class) cls;

    }

    /**
     * Note: The term:id and id:term indices MUST use unisolated write operation
     * to ensure consistency without write-write conflicts. The only exception
     * would be a read-historical view.
     * 
     * @param indexManager
     * @param namespace
     * @param timestamp
     * @param properties
     * 
     */
    public LexiconRelation(final IIndexManager indexManager,
            final String namespace, final Long timestamp,
            final Properties properties) {

        this(null/* container */, indexManager, namespace, timestamp,
                properties);

    }

    public LexiconRelation(final AbstractTripleStore container,
            final IIndexManager indexManager, final String namespace,
            final Long timestamp, final Properties properties) {

        super(container, indexManager, namespace, timestamp, properties);

        {
            
            this.textIndex = Boolean.parseBoolean(getProperty(
                    AbstractTripleStore.Options.TEXT_INDEX,
                    AbstractTripleStore.Options.DEFAULT_TEXT_INDEX));
         
            if (textIndex) {
                /*
                 * Explicitly disable overwrite for the full text index associated
                 * with the lexicon. By default, the full text index will replace
                 * the existing tuple for a key. We turn this property off because
                 * the RDF values are immutable as is the mapping from an RDF value
                 * to a term identifier. Hence if we observe the same key there is
                 * no need to update the index entry - it will only cause the
                 * journal size to grow but will not add any information to the
                 * index.
                 */
                properties
                        .setProperty(FullTextIndex.Options.OVERWRITE, "false");
//                /*
//                 * Explicitly set the class which knows how to handle IVs in the
//                 * keys of the full text index.
//                 */
//                properties.setProperty(
//                        FullTextIndex.Options.DOCID_FACTORY_CLASS,
//                        IVDocIdExtension.class.getName());

            }
            
            // just for now while I am testing, don't feel like rebuilding
            // the entire journal
            this.subjectCentricTextIndex = textIndex;
//            this.subjectCentricTextIndex = Boolean.parseBoolean(getProperty(
//                    AbstractTripleStore.Options.SUBJECT_CENTRIC_TEXT_INDEX,
//                    AbstractTripleStore.Options.DEFAULT_SUBJECT_CENTRIC_TEXT_INDEXER_CLASS));
         
        }
        
        this.storeBlankNodes = Boolean.parseBoolean(getProperty(
                AbstractTripleStore.Options.STORE_BLANK_NODES,
                AbstractTripleStore.Options.DEFAULT_STORE_BLANK_NODES));
        
        final int blobsThreshold;
        {

            blobsThreshold = Integer.parseInt(getProperty(
                    AbstractTripleStore.Options.BLOBS_THRESHOLD,
                    AbstractTripleStore.Options.DEFAULT_BLOBS_THRESHOLD));

			/**
			 * Note: Integer.MAX_VALUE disables the BLOBS index.
			 * 
			 * @see 
			 *      Disable BLOBS indexing completely for GPU 
			 */
			if (blobsThreshold < 0 || blobsThreshold > 4 * Bytes.kilobyte && blobsThreshold != Integer.MAX_VALUE) {

                throw new IllegalArgumentException(
                        AbstractTripleStore.Options.BLOBS_THRESHOLD + "="
                                + blobsThreshold);

            }

        }

        {

            if (indexManager instanceof IBigdataFederation
                    && ((IBigdataFederation) indexManager).isScaleOut()) {

                final String defaultValue = AbstractTripleStore.Options.DEFAULT_TERMID_BITS_TO_REVERSE;

                termIdBitsToReverse = Integer.parseInt(getProperty(
                        AbstractTripleStore.Options.TERMID_BITS_TO_REVERSE,
                        defaultValue));

                if (termIdBitsToReverse < 0 || termIdBitsToReverse > 31) {

                    throw new IllegalArgumentException(
                            AbstractTripleStore.Options.TERMID_BITS_TO_REVERSE
                                    + "=" + termIdBitsToReverse);

                }

            } else {

                // Note: Not used in standalone.
                termIdBitsToReverse = 0;

            }

        }

        {

            final Set set = new HashSet();

            set.add(getFQN(LexiconKeyOrder.TERM2ID));
            set.add(getFQN(LexiconKeyOrder.ID2TERM));
            set.add(getFQN(LexiconKeyOrder.BLOBS));

            if(textIndex) {
                
                set.add(getNamespace() + "." + FullTextIndex.NAME_SEARCH);

            }
            
            // @todo add names as registered to base class? but then how to
            // discover?  could be in the global row store.
            this.indexNames = Collections.unmodifiableSet(set);

            this.keyOrders = Arrays
                    .asList((IKeyOrder[]) new IKeyOrder[] { //
                            LexiconKeyOrder.TERM2ID,//
                            LexiconKeyOrder.ID2TERM,//
                            LexiconKeyOrder.BLOBS //
                            });

        }

        /*
         * Note: I am deferring resolution of the indices to minimize the
         * latency and overhead required to "locate" the relation. In scale out,
         * resolving the index will cause a ClientIndexView to spring into
         * existence for the appropriate timestamp, and we often do not need
         * that view for each index of the relation during query.
         */
        
//        /*
//         * cache hard references to the indices.
//         */
//
//        terms = super.getIndex(LexiconKeyOrder.TERM2ID);
//
//        if(textIndex) {
//            
//            getSearchEngine();
//            
//        }

        /*
         * Lookup/create value factory for the lexicon's namespace.
         * 
         * Note: The same instance is used for read-only tx, read-write tx,
         * read-committed, and unisolated views of the lexicon for a given
         * triple store.
         */
//        valueFactory = BigdataValueFactoryImpl.getInstance(namespace);
        try {
			final Class vfc = determineValueFactoryClass();
			final Method gi = vfc.getMethod("getInstance", String.class);
			this.valueFactory = (BigdataValueFactory) gi.invoke(null, namespace);
		} catch (NoSuchMethodException e) {
			throw new IllegalArgumentException(
					AbstractTripleStore.Options.VALUE_FACTORY_CLASS, e);
		} catch (InvocationTargetException e) {
			throw new IllegalArgumentException(
					AbstractTripleStore.Options.VALUE_FACTORY_CLASS, e);
		} catch (IllegalAccessException e) {
			throw new IllegalArgumentException(
					AbstractTripleStore.Options.VALUE_FACTORY_CLASS, e);
		}

        /*
         * @todo This should be a high concurrency LIRS or similar cache in
         * order to prevent the cache being flushed by the materialization of
         * low frequency terms.
         */
        {
            
            final int termCacheCapacity = Integer.parseInt(getProperty(
                    AbstractTripleStore.Options.TERM_CACHE_CAPACITY,
                    AbstractTripleStore.Options.DEFAULT_TERM_CACHE_CAPACITY));

            final Long commitTime = getCommitTime();
            
            if (commitTime != null && TimestampUtility.isReadOnly(timestamp)) {

                /*
                 * Shared for read-only views from sample commit time. Sharing
                 * allows us to reuse the same instances of the term cache for
                 * queries reading from the same commit point. The cache size is
                 * automatically increased to take advantage of the fact that it
                 * is a shared resource.
                 * 
                 * Note: Sharing is limited to the same commit time to prevent
                 * life cycle issues across drop/create sequences for the triple
                 * store.
                 */
                termCache = termCacheFactory.getInstance(new NT(namespace,
                        commitTime.longValue()), termCacheCapacity * 2);

            } else {

                /*
                 * Unshared for any other view of the triple store.
                 */
                termCache = new TermCache, BigdataValue>(//
                        new ConcurrentWeakValueCacheWithBatchedUpdates, BigdataValue>(//
                        termCacheCapacity, // queueCapacity
                        .75f, // loadFactor (.75 is the default)
                        16 // concurrency level (16 is the default)
                ));

            }
            
        }
        
        {
            
            inlineLiterals = Boolean.parseBoolean(getProperty(
                    AbstractTripleStore.Options.INLINE_XSD_DATATYPE_LITERALS,
                    AbstractTripleStore.Options.DEFAULT_INLINE_XSD_DATATYPE_LITERALS));

            inlineTextLiterals = Boolean.parseBoolean(getProperty(
                    AbstractTripleStore.Options.INLINE_TEXT_LITERALS,
                    AbstractTripleStore.Options.DEFAULT_INLINE_TEXT_LITERALS));
            
            maxInlineTextLength = Integer.parseInt(getProperty(
                    AbstractTripleStore.Options.MAX_INLINE_TEXT_LENGTH,
                    AbstractTripleStore.Options.DEFAULT_MAX_INLINE_STRING_LENGTH));
            
            inlineBNodes = storeBlankNodes && Boolean.parseBoolean(getProperty(
                    AbstractTripleStore.Options.INLINE_BNODES,
                    AbstractTripleStore.Options.DEFAULT_INLINE_BNODES));
            
            inlineDateTimes = Boolean.parseBoolean(getProperty(
                    AbstractTripleStore.Options.INLINE_DATE_TIMES,
                    AbstractTripleStore.Options.DEFAULT_INLINE_DATE_TIMES));
            
            inlineDateTimesTimeZone = TimeZone.getTimeZone(getProperty(
                    AbstractTripleStore.Options.INLINE_DATE_TIMES_TIMEZONE,
                    AbstractTripleStore.Options.DEFAULT_INLINE_DATE_TIMES_TIMEZONE));
            
            rejectInvalidXSDValues = Boolean.parseBoolean(getProperty(
                    AbstractTripleStore.Options.REJECT_INVALID_XSD_VALUES,
                    AbstractTripleStore.Options.DEFAULT_REJECT_INVALID_XSD_VALUES));
            

            // Resolve the vocabulary.
            vocab = getContainer().getVocabulary();

            
            // Resolve the geospatial configuration, if geospatial is enabled
            final Boolean geoSpatial = Boolean.parseBoolean(getProperty(
                    AbstractTripleStore.Options.GEO_SPATIAL,
                    AbstractTripleStore.Options.DEFAULT_GEO_SPATIAL));
            
            final GeoSpatialConfig geoSpatialConfig = 
                geoSpatial!=null && geoSpatial ?  getContainer().getGeoSpatialConfig() : null;


            final IExtensionFactory xFactory;
            try {
                
            	/*
            	 * Setup the extension factory.
            	 */
                final Class xfc = 
                    determineExtensionFactoryClass();

                xFactory = xfc.newInstance();

            } catch (InstantiationException e) {
                throw new IllegalArgumentException(
                        AbstractTripleStore.Options.EXTENSION_FACTORY_CLASS, e);
            } catch (IllegalAccessException e) {
                throw new IllegalArgumentException(
                        AbstractTripleStore.Options.EXTENSION_FACTORY_CLASS, e);
            }

            final IInlineURIFactory uriFactory;
            try {
                
                /*
                 * Setup the inline URI factory.
                 */
                final Class urifc = 
                    determineInlineURIFactoryClass();

                uriFactory = urifc.newInstance();
                uriFactory.init(vocab);

            } catch (InstantiationException e) {
                throw new IllegalArgumentException(
                        AbstractTripleStore.Options.INLINE_URI_FACTORY_CLASS, e);
            } catch (IllegalAccessException e) {
                throw new IllegalArgumentException(
                        AbstractTripleStore.Options.INLINE_URI_FACTORY_CLASS, e);
            }

            /*
             * Setup the lexicon configuration.
             */
            
            lexiconConfiguration = new LexiconConfiguration(
                    blobsThreshold,
                    inlineLiterals, inlineTextLiterals,
                    maxInlineTextLength, inlineBNodes, inlineDateTimes,
                    inlineDateTimesTimeZone,
                    rejectInvalidXSDValues, xFactory, vocab, valueFactory,
                    uriFactory, geoSpatial, geoSpatialConfig);

        }
        
    }
    
    /**
     * The canonical {@link BigdataValueFactoryImpl} reference (JVM wide) for the
     * lexicon namespace.
     */
	public BigdataValueFactory getValueFactory() {
        
        return valueFactory;
        
    }

	final private BigdataValueFactory valueFactory;
    
    /**
     * Strengthens the return type.
     */
    @Override
    public AbstractTripleStore getContainer() {
        
        return (AbstractTripleStore) super.getContainer();
        
    }
    
    public boolean exists() {

        for(String name : getIndexNames()) {
            
            if (getIndex(name) == null)
                return false;
            
        }
        
        return true;

    }

    @Override
    public LexiconRelation init() {

    	super.init();
    	
		/*
		 * Allow the extensions to resolve their datatype URIs into term
		 * identifiers.
		 */
    	lexiconConfiguration.initExtensions(this);
    	
    	return this;
        
    }
    
    @Override
    public void create() {
        
        final IResourceLock resourceLock = acquireExclusiveLock();
        
        try {

            super.create();

			if (textIndex && inlineTextLiterals
					&& maxInlineTextLength > (4 * Bytes.kilobyte32)) {
				/*
				 * Log message if full text index is enabled and we are inlining
				 * textual literals and MAX_INLINE_TEXT_LENGTH is GT some
				 * threshold value (e.g., 4096). This combination represents an
				 * unreasonable configuration due to the data duplication in the
				 * full text index. (The large literals will be replicated
				 * within the full text index for each token extracted from the
				 * literal by the text analyzer.)
				 */
				log
						.error("Configuration will duplicate large literals within the full text index"
								+ //
								": "
								+ AbstractTripleStore.Options.TEXT_INDEX
								+ "="
								+ textIndex
								+ //
								", "
								+ AbstractTripleStore.Options.INLINE_TEXT_LITERALS
								+ "="
								+ inlineTextLiterals
								+ //
								", "
								+ AbstractTripleStore.Options.MAX_INLINE_TEXT_LENGTH
								+ "=" + maxInlineTextLength//
						);

			}

            final IIndexManager indexManager = getIndexManager();

            // register the indices.

            indexManager
                    .registerIndex(getTerm2IdIndexMetadata(getFQN(LexiconKeyOrder.TERM2ID)));

            indexManager
                    .registerIndex(getId2TermIndexMetadata(getFQN(LexiconKeyOrder.ID2TERM)));

			if (getLexiconConfiguration().getBlobsThreshold() != Integer.MAX_VALUE) {

				// Do not create the BLOBS index if BLOBS support has been disabled.
				indexManager.registerIndex(getBlobsIndexMetadata(getFQN(LexiconKeyOrder.BLOBS)));
				
			}

            if (textIndex) {

                // Create the full text index

				final IValueCentricTextIndexer tmp = getSearchEngine();

                tmp.create();

            }

            /*
             * Note: defer resolution of the newly created index objects. This
             * is mostly about efficiency since the scale-out API does not
             * return the IIndex object when we register the index. 
             */
            
//            terms = super.getIndex(LexiconKeyOrder.TERMS);
//            assert terms != null;

            /*
    		 * Allow the extensions to resolve their datatype URIs into term
    		 * identifiers.
    		 */
        	lexiconConfiguration.initExtensions(this);
            
        } finally {

            unlock(resourceLock);

        }

    }

    @Override
    public void destroy() {

        final IResourceLock resourceLock = acquireExclusiveLock();

        try {

            final IIndexManager indexManager = getIndexManager();

            indexManager.dropIndex(getFQN(LexiconKeyOrder.TERM2ID));
            indexManager.dropIndex(getFQN(LexiconKeyOrder.ID2TERM));
			if (getLexiconConfiguration().getBlobsThreshold() != Integer.MAX_VALUE) {
				// Destroy BLOBS index IFF it exists.
				indexManager.dropIndex(getFQN(LexiconKeyOrder.BLOBS));
			}

            term2id = null;
            id2term = null;
            blobs = null;

            if (textIndex) {

                getSearchEngine().destroy();

                viewRef.set(null);

            }

            // discard the value factory for the lexicon's namespace.
            valueFactory.remove(/*getNamespace()*/);

            termCache.clear();
            
            super.destroy();

        } finally {

            unlock(resourceLock);

        }

    }
    
    /** The reference to the TERM2ID index. */
    volatile private IIndex term2id;

    /** The reference to the ID2TERM index. */
    volatile private IIndex id2term;

    /** The reference to the TERMS index. */
    volatile private IIndex blobs;
    
    /**
     * When true a full text index is maintained.
     * 
     * @see AbstractTripleStore.Options#TEXT_INDEX
     */
    private boolean textIndex;
    
    /**
	 * When true a secondary subject-centric full text index is
	 * maintained.
	 * 
	 * @see AbstractTripleStore.Options#SUBJECT_CENTRIC_TEXT_INDEX
	 * @deprecated Feature was never completed due to scalability issues. See
	 *             BZLG-1548, BLZG-563.
	 */
    @Deprecated
    private final boolean subjectCentricTextIndex;
    
    /**
     * When true the kb is using told blank nodes semantics.
     * 
     * @see AbstractTripleStore.Options#STORE_BLANK_NODES
     */
    private final boolean storeBlankNodes;
    
//    /**
//     * The maximum character length of an RDF {@link Value} before it will be
//     * inserted into the {@link LexiconKeyOrder#BLOBS} index rather than the
//     * {@link LexiconKeyOrder#TERM2ID} and {@link LexiconKeyOrder#ID2TERM}
//     * indices.
//     * 
//     * @see AbstractTripleStore.Options#BLOBS_THRESHOLD
//     */
//    private final int blobsThreshold;

    /**
     * @see AbstractTripleStore.Options#TERMID_BITS_TO_REVERSE
     */
    private final int termIdBitsToReverse;
    
    /**
     * Are xsd datatype primitive and numeric literals being inlined into the statement indices.
     * 
     * {@link AbstractTripleStore.Options#INLINE_XSD_DATATYPE_LITERALS}
     */
    final private boolean inlineLiterals;
    
    /**
     * Are textual literals being inlined into the statement indices.
     * 
     * {@link AbstractTripleStore.Options#INLINE_TEXT_LITERALS}
     */
    final private boolean inlineTextLiterals;

    /**
     * The maximum length of xsd:string literals which will be
     * inlined into the statement indices. The {@link XSDStringExtension} is
     * registered when GT ZERO.
     */
    final private int maxInlineTextLength;
    
    /**
     * Are bnodes being inlined into the statement indices.
     * 
     * {@link AbstractTripleStore.Options#INLINE_BNODES}
     */
    final private boolean inlineBNodes;
    
    /**
     * Are xsd:dateTime literals being inlined into the statement indices.
     * 
     * {@link AbstractTripleStore.Options#INLINE_DATE_TIMES}
     */
    final private boolean inlineDateTimes;

    /**
     * When true, XSD datatype literals which do not validate
     * against their datatype will be rejected rather than inlined.
     * 
     * {@link AbstractTripleStore.Options#REJECT_INVALID_XSD_VALUES}
     */
    final private boolean rejectInvalidXSDValues;
    
	/**
	 * The default time zone to be used for decoding inline xsd:datetime
	 * literals from the statement indices. Will use the current timezeon 
	 * unless otherwise specified using
	 * {@link AbstractTripleStore.Options#DEFAULT_INLINE_DATE_TIMES_TIMEZONE}.
	 */
    final private TimeZone inlineDateTimesTimeZone;

    /**
     * Return true if datatype literals are being inlined into
     * the statement indices.
     */
    final public boolean isInlineLiterals() {
        
        return inlineLiterals;
        
    }

    /**
     * Return the maximum length a string value which may be inlined into the
     * statement indices.
     */
    final public int getMaxInlineStringLength() {
        
        return maxInlineTextLength;
        
    }

    /**
     * Return true if xsd:datetime literals are being inlined into
     * the statement indices.
     */
    final public boolean isInlineDateTimes() {
        
        return inlineDateTimes;
        
    }

    /**
     * Return the default time zone to be used for inlining.
     */
    final public TimeZone getInlineDateTimesTimeZone() {
        
        return inlineDateTimesTimeZone;
        
    }
    
    /**
     * The #of low bits from the term identifier that are reversed and
     * rotated into the high bits when it is assigned.
     * 
     * @see AbstractTripleStore.Options#TERMID_BITS_TO_REVERSE
     */
    final public int getTermIdBitsToReverse() {
        
        return termIdBitsToReverse;
        
    }
    
    /**
     * true iff blank nodes are being stored in the lexicon's
     * forward index.
     * 
     * @see AbstractTripleStore.Options#STORE_BLANK_NODES
     */
    final public boolean isStoreBlankNodes() {
        
        return storeBlankNodes;
        
    }
    
    /**
     * true iff the (value centric) full text index is enabled.
     * 
     * @see AbstractTripleStore.Options#TEXT_INDEX
     */
    final public boolean isTextIndex() {
        
        return textIndex;
        
    }

    /**
	 * true iff the subject-centric full text index is enabled.
	 * 
	 * @see AbstractTripleStore.Options#SUBJECT_CENTRIC_TEXT_INDEX
	 * 
	 * @deprecated Feature was never completed due to scalability issues. See
	 *             BZLG-1548, BLZG-563.
	 */
    @Deprecated
    final public boolean isSubjectCentricTextIndex() {
        
        return subjectCentricTextIndex;
        
    }

	/**
	 * Overridden to use local cache of the index reference.
	 */
    @Override
    public IIndex getIndex(final IKeyOrder keyOrder) {

        if (keyOrder == LexiconKeyOrder.ID2TERM) {

            return getId2TermIndex();

        } else if (keyOrder == LexiconKeyOrder.TERM2ID) {

            return getTerm2IdIndex();

        } else if (keyOrder == LexiconKeyOrder.BLOBS) {

            return getBlobsIndex();

        } else {

            throw new AssertionError("keyOrder=" + keyOrder);

        }

    }

    final public IIndex getTerm2IdIndex() {

        if (term2id == null) {

            synchronized (this) {

                if (term2id == null) {

                    final long timestamp = getTimestamp();
                    
                    if (TimestampUtility.isReadWriteTx(timestamp)) {
                        /*
                         * We always use the unisolated view of the lexicon
                         * indices for mutation and the lexicon indices do NOT
                         * set the [isolatable] flag even if the kb supports
                         * full tx isolation. This is because we use an
                         * eventually consistent strategy to write on the
                         * lexicon indices.
                         * 
                         * Note: It appears that we have already ensured that
                         * we will be using the unisolated view of the lexicon
                         * relation in AbstractTripleStore#getLexiconRelation()
                         * so this code path should not be evaluated.
                         */
                        term2id = AbstractRelation
                                .getIndex(getIndexManager(),
                                        getFQN(LexiconKeyOrder.TERM2ID),
                                        ITx.UNISOLATED);
                    } else {
                        term2id = super.getIndex(LexiconKeyOrder.TERM2ID);
                    }

                    if (term2id == null)
                        throw new IllegalStateException();

                }

            }
            
        }

        return term2id;

    }

    final public IIndex getId2TermIndex() {

        if (id2term == null) {

            synchronized (this) {
                
                if (id2term == null) {

                    final long timestamp = getTimestamp();
                    
                    if (TimestampUtility.isReadWriteTx(timestamp)) {
                        /*
                         * We always use the unisolated view of the lexicon
                         * indices for mutation and the lexicon indices do NOT
                         * set the [isolatable] flag even if the kb supports
                         * full tx isolation. This is because we use an
                         * eventually consistent strategy to write on the
                         * lexicon indices.
                         * 
                         * Note: It appears that we have already ensured that
                         * we will be using the unisolated view of the lexicon
                         * relation in AbstractTripleStore#getLexiconRelation()
                         * so this code path should not be evaluated.
                         */
                        id2term = AbstractRelation
                                .getIndex(getIndexManager(),
                                        getFQN(LexiconKeyOrder.ID2TERM),
                                        ITx.UNISOLATED);
                    } else {
                        id2term = super.getIndex(LexiconKeyOrder.ID2TERM);
                    }
                
                    if (id2term == null)
                        throw new IllegalStateException();

                }

            }

        }

        return id2term;

    }

    final public IIndex getBlobsIndex() {

        if (blobs == null) {

            synchronized (this) {

                if (blobs == null) {

                    final long timestamp = getTimestamp();
                    
                    if (TimestampUtility.isReadWriteTx(timestamp)) {
                        /*
                         * We always use the unisolated view of the lexicon
                         * indices for mutation and the lexicon indices do NOT
                         * set the [isolatable] flag even if the kb supports
                         * full tx isolation. This is because we use an
                         * eventually consistent strategy to write on the
                         * lexicon indices.
                         * 
                         * Note: It appears that we have already ensured that
                         * we will be using the unisolated view of the lexicon
                         * relation in AbstractTripleStore#getLexiconRelation()
                         * so this code path should not be evaluated.
                         */
                        blobs = AbstractRelation
                                .getIndex(getIndexManager(),
                                        getFQN(LexiconKeyOrder.BLOBS),
                                        ITx.UNISOLATED);
                    } else {
                        blobs = super.getIndex(LexiconKeyOrder.BLOBS);
                    }

                    if (blobs == null)
                        throw new IllegalStateException();

                }

            }
            
        }

        return blobs;

    }
    
    /**
     * A factory returning the softly held singleton for the
     * {@link FullTextIndex}.
     * 
     * @see AbstractTripleStore.Options#TEXT_INDEX
     * 
     * @todo replace with the use of the {@link IResourceLocator} since it
     *       already imposes a canonicalizing mapping within for the index name
     *       and timestamp inside of a JVM.
     */
    public IValueCentricTextIndexer getSearchEngine() {

        if (!textIndex)
            return null;

        /*
         * Note: Double-checked locking pattern requires [volatile] variable or
         * AtomicReference. This uses the AtomicReference since that gives us a
         * lock object which is specific to this request.
         */
        if (viewRef.get() == null) {

            synchronized (viewRef) {// NB: Ignore find bugs complaint per above.

                if (viewRef.get() == null) {

                    final IValueCentricTextIndexer tmp;
                    try {
                        final Class vfc = determineTextIndexerClass();
                        final Method gi = vfc.getMethod("getInstance",
                                IIndexManager.class, String.class, Long.class,
                                Properties.class);
                        tmp = (IValueCentricTextIndexer) gi.invoke(null/* object */,
                                getIndexManager(), getNamespace(),
                                getTimestamp(), getProperties());
                        if(tmp instanceof ILocatableResource) {
                        	((ILocatableResource)tmp).init();
                        }
                        viewRef.set(tmp);
                    } catch (Throwable e) {
                        throw new IllegalArgumentException(
                                AbstractTripleStore.Options.TEXT_INDEXER_CLASS,
                                e);
                    }

                }

            }

        }

        return viewRef.get();

    }

    /**
	 * A factory returning the softly held singleton for the
	 * {@link FullTextIndex} representing the subject-centric full text index.
	 * 
	 * @see AbstractTripleStore.Options#TEXT_INDEX
	 * @deprecated Feature was never completed due to scalability issues. See
	 *             BZLG-1548, BLZG-563.
	 */
    @Deprecated
    public ISubjectCentricTextIndexer getSubjectCentricSearchEngine() {

    	if (!subjectCentricTextIndex)
            return null;

        /*
         * Note: Double-checked locking pattern requires [volatile] variable or
         * AtomicReference. This uses the AtomicReference since that gives us a
         * lock object which is specific to this request.
         */
        if (viewRef2.get() == null) {

            synchronized (viewRef2) {// NB: Ignore find bugs complaint per above.

                if (viewRef2.get() == null) {

                    final ISubjectCentricTextIndexer tmp;
                    try {
                        final Class vfc = determineSubjectCentricTextIndexerClass();
                        final Method gi = vfc.getMethod("getInstance",
                                IIndexManager.class, String.class, Long.class,
                                Properties.class);
                        tmp = (ISubjectCentricTextIndexer) gi.invoke(null/* object */,
                                getIndexManager(), getNamespace(),
                                getTimestamp(), getProperties());
                        if(tmp instanceof ILocatableResource) {
                        	((ILocatableResource)tmp).init();
                        }
                        viewRef2.set(tmp);
                    } catch (Throwable e) {
                        throw new IllegalArgumentException(
                                AbstractTripleStore.Options.SUBJECT_CENTRIC_TEXT_INDEXER_CLASS,
                                e);
                    }

                }

            }

        }

        return viewRef2.get();

    }

    /**
     * Return the {@link IndexMetadata} for the TERM2ID index.
     * 
     * @param name
     *            The name of the index.
     *            
     * @return The {@link IndexMetadata}.
     */
    protected IndexMetadata getTerm2IdIndexMetadata(final String name) {

        final IndexMetadata metadata = newIndexMetadata(name);

        metadata.setTupleSerializer(new Term2IdTupleSerializer(getProperties()));
        
        return metadata;

    }

    /**
     * Return the {@link IndexMetadata} for the ID2TERM index.
     * 
     * @param name
     *            The name of the index.
     * 
     * @return The {@link IndexMetadata}.
     * 
     * @see 
     *      Load, closure and query performance in 1.1.x versus 1.0.x 
     */
    protected IndexMetadata getId2TermIndexMetadata(final String name) {

        final IndexMetadata metadata = newIndexMetadata(name);

        metadata.setTupleSerializer(new Id2TermTupleSerializer(
                getNamespace(), getValueFactory()));

        /*
         * @see https://sourceforge.net/apps/trac/bigdata/ticket/506 (Load,
         * closure and query performance in 1.1.x versus 1.0.x)
         */
        if(true) {

            // enable raw record support.
            metadata.setRawRecords(true);

            /*
             * Very small RDF values can be inlined into the index, but after
             * that threshold we want to have the values out of line on the
             * backing store.
             * 
             * Note: I have tried it at 16 and 24 on LUBM U50. Raising it to 24
             * increases the data on the disk and might have a small negative
             * effect on the load and query rates. No difference on closure
             * rates was observed. It might all be in the noise, but it does
             * seem that less data on the disk is better.
             * 
             * @see https://sourceforge.net/apps/trac/bigdata/ticket/506 (Load,
             * closure and query performance in 1.1.x versus 1.0.x)
             */
            metadata.setMaxRecLen(16);
        
        }
        
        return metadata;

    }

	/**
	 * Return the {@link IndexMetadata} for the TERMS index.
	 * 
	 * @param name
	 *            The name of the index.
	 *            
	 * @return The {@link IndexMetadata}.
	 */
    protected IndexMetadata getBlobsIndexMetadata(final String name) {

		final IndexMetadata metadata = new IndexMetadata(getIndexManager(),
				getProperties(), name, UUID.randomUUID(), IndexTypeEnum.BTree);

        metadata.setTupleSerializer(new BlobsTupleSerializer(getNamespace(),
                valueFactory));

		// enable raw record support.
		metadata.setRawRecords(true);

        /*
         * The presumption is that we are storing large literals (blobs) in this
         * index so we always want to write them on raw records rather than have
         * them be inline in the leaves of the index.
         */
        metadata.setMaxRecLen(0);

        if ((getIndexManager() instanceof IBigdataFederation)
                && ((IBigdataFederation) getIndexManager()).isScaleOut()) {

            /*
             * Apply a constraint such that all entries within the same
             * collision bucket lie in the same shard.
             */
            
            metadata.setSplitHandler(new BlobsIndexSplitHandler());

        }

        return metadata;

    }

    public Set getIndexNames() {

        return indexNames;

    }

    public Iterator> getKeyOrders() {
        
        return keyOrders.iterator();
        
    }

    public LexiconKeyOrder getPrimaryKeyOrder() {
        
    	return LexiconKeyOrder.BLOBS;
        
    }

    /**
     * Note : this method is part of the mutation api. it is primarily (at this
     * point, only) invoked by the rule execution layer and, at present, no
     * rules can entail terms into the lexicon.
     * 
     * @throws UnsupportedOperationException
     */
    public BigdataValue newElement(List a, IBindingSet bindingSet) {

        throw new UnsupportedOperationException();
        
    }
    
    public Class getElementClass() {

        return BigdataValue.class;

    }

    /**
     * Note : this method is part of the mutation api. it is primarily (at this
     * point, only) invoked by the rule execution layer and, at present, no
     * rules can entail terms into the lexicon.
     * 
     * @throws UnsupportedOperationException
     */
    public long delete(IChunkedOrderedIterator itr) {

        throw new UnsupportedOperationException();

    }

    /**
     * Note : this method is part of the mutation api. it is primarily (at this
     * point, only) invoked by the rule execution layer and, at present, no
     * rules can entail terms into the lexicon.
     * 
     * @throws UnsupportedOperationException
     */
    public long insert(IChunkedOrderedIterator itr) {

        throw new UnsupportedOperationException();
        
    }

    /**
     * A scan of all literals having the given literal as a prefix.
     * 
     * @param lit
     *            A literal.
     * 
     * @return An iterator visiting the term identifiers for the matching
     *         {@link Literal}s.
     * 
     *         TODO Prefix scan only visits the TERM2ID index (blobs and inline
     *         literals will not be observed). This should be mapped onto a free
     *         text index query instead. In order to have the same semantics we
     *         must also verify that (a) the prefix match is at the start of the
     *         literal; and (b) the match is contiguous.
     */
    @SuppressWarnings("rawtypes")
    public Iterator prefixScan(final Literal lit) {

        if (lit == null)
            throw new IllegalArgumentException();
        
        return prefixScan(new Literal[] { lit });
        
    }

    /**
     * A scan of all literals having any of the given literals as a prefix.
     * 
     * @param lits
     *            An array of literals.
     * 
     * @return An iterator visiting the term identifiers for the matching
     *         {@link Literal}s.
     * 
     *         TODO Prefix scan only visits the TERM2ID index (blobs and inline
     *         literals will not be observed). This should be mapped onto a free
     *         text index query instead. In order to have the same semantics we
     *         must also verify that (a) the prefix match is at the start of the
     *         literal; and (b) the match is contiguous.
     */
    @SuppressWarnings({ "unchecked", "rawtypes" })
    public Iterator prefixScan(final Literal[] lits) {

        if (lits == null || lits.length == 0)
            throw new IllegalArgumentException();

        if (log.isInfoEnabled()) {

            log.info("#lits=" + lits.length);

        }

        /**
         * The KeyBuilder used to form the prefix keys.
         * 
         * Note: The prefix keys are formed with PRIMARY strength. This is
         * necessary in order to match all keys in the index since it causes the
         * secondary characteristics to NOT be included in the prefix key even
         * if they are present in the keys in the index.
         * 
         * @see 
         *      Name2Addr.indexNameScan(prefix) uses scan + filter 
         */
        final LexiconKeyBuilder keyBuilder = ((Term2IdTupleSerializer) getTerm2IdIndex()
                .getIndexMetadata().getTupleSerializer())
                .getLexiconPrimaryKeyBuilder();
//        {
//
//            final Properties properties = new Properties();
//
//            properties.setProperty(KeyBuilder.Options.STRENGTH,
//                    StrengthEnum.Primary.toString());
//
//            keyBuilder = new Term2IdTupleSerializer(
//                    new DefaultKeyBuilderFactory(properties)).getLexiconKeyBuilder();
//
//        }

        /*
         * Formulate the keys[].
         * 
         * Note: Each key is encoded with the appropriate bytes to indicate the
         * kind of literal (plain, languageCode, or datatype literal).
         * 
         * Note: The key builder was chosen to only encode the PRIMARY
         * characteristics so that we obtain a prefix[] suitable for the
         * completion scan.
         */

        final byte[][] keys = new byte[lits.length][];

        for (int i = 0; i < lits.length; i++) {

            final Literal lit = lits[i];

            if (lit == null)
                throw new IllegalArgumentException();

            keys[i] = keyBuilder.value2Key(lit);

        }

        final IIndex ndx = getTerm2IdIndex();

        final Iterator termIdIterator = new Striterator(
                ndx
                        .rangeIterator(
                                null/* fromKey */,
                                null/* toKey */,
                                0/* capacity */,
                                IRangeQuery.DEFAULT | IRangeQuery.CURSOR,
                                // prefix filter.
                                new PrefixFilter(keys)))
                .addFilter(new Resolver() {

                    private static final long serialVersionUID = 1L;

                    /**
                     * Decode the value, which is the term identifier.
                     */
                    @Override
                    protected Object resolve(final Object arg0) {

                        final byte[] bytes = ((ITuple) arg0).getValue(); 
                        
                        return IVUtility.decode(bytes);

                    }
                });

        return termIdIterator;
            
    }

    /**
     * {@inheritDoc}
     * 
     * @see IDatatypeURIResolver
     */
    public BigdataURI resolve(final URI uri) {

        if(uri == null)
            throw new IllegalArgumentException();

        // Turn the caller's argument into a BigdataURI.
        final BigdataURI value = valueFactory.asValue(uri);

        // Lookup against the Vocabulary.
        final IV iv = vocab.get(value);

        if (iv == null) {

            /*
             * The request URI is not part of the pre-declared vocabulary.
             */
            
            throw new NoSuchVocabularyItem("uri=" + uri + ", vocab=" + vocab);

        }
        
        // Cache the IV on the BigdataValue.
        value.setIV(iv);

        return value;
        
//        final BigdataURI buri = valueFactory.asValue(uri);
//        
//        if (buri.getIV() == null) {
//            
//            // Will set IV as a side effect
//            final IV iv = getTermId(buri);
//
//            if (iv == null) {
//
//                // Will set IV as a side effect
//                addTerms(new BigdataValue[] { buri }, 1, false);
//
//            }
//
//        }
//        
//        return buri.getIV() != null ? buri : null;
        
    }

    /**
     * Return true iff this {@link Value} would be stored in the
     * {@link LexiconKeyOrder#BLOBS} index.
     * 
     * @param v
     *            The value.
     * 
     * @return true if it is a "large value" according to the
     *         configuration of the lexicon.
     * 
     * @see AbstractTripleStore.Options#BLOBS_THRESHOLD
     */
    public boolean isBlob(final Value v) {

        final int blobsThreshold = lexiconConfiguration.getBlobsThreshold();

        if (blobsThreshold == 0)
            return true;
        
        final long strlen = BigdataValueSerializer.getStringLength(v);
        
		if (strlen >= blobsThreshold) {

			if (lexiconConfiguration.isBlobsDisabled()) {

				throw new IllegalArgumentException("Large literal but BLOBS index is disabled: strlen=" + strlen);

			}

        	return true;
        }
        
        return false;

    }

//    /**
//     * Return the threshold at which a literal would be stored in the
//     * {@link LexiconKeyOrder#BLOBS} index.
//     * 
//     * @see AbstractTripleStore.Options#BLOBS_THRESHOLD
//     */
//    public int getBlobsThreshold() {
//        
//        return blobsThreshold;
//        
//    }
    
    /**
     * Batch insert of terms into the database.
     * 

* Note: Duplicate {@link BigdataValue} references and {@link BigdataValue}s * that already have an assigned term identifiers are ignored by this * operation. *

* Note: This implementation is designed to use unisolated batch writes on * the terms and ids index that guarantee consistency. *

* If the full text index is enabled, then the terms will also be inserted * into the full text index. * * @param terms * An array whose elements [0:nterms-1] will be inserted. * @param numTerms * The #of terms to insert. * @param readOnly * When true, unknown terms will not be inserted * into the database. Otherwise unknown terms are inserted into * the database. * @return The #of distinct terms lacking a pre-assigned term identifier. If * writes were permitted, then this is also the #of terms written * onto the index. * * TODO If we refactor the search index shortly to use a * [token,S,P,O,(C)] key then search will become co-threaded with * the assertion and retraction of statements (writes on the * statement indices) rather than with ID2TERM writes. */ public long addTerms(final BigdataValue[] values, final int numTerms, final boolean readOnly) { if (log.isDebugEnabled()) log.debug("numTerms=" + numTerms + ", readOnly=" + readOnly); /* * Ensure that BigdataValue objects belong to the correct ValueFactory * for this LexiconRelation. * * @see BLZG-1593 (LexiconRelation.addTerms() does not reject * BigdataValue objects from another namespace nor call asValue() on * them to put them in the correct namespace) */ { final BigdataValueFactory vf = getValueFactory(); for (int i = 0; i < numTerms; i++) { final BigdataValue tmp = vf.asValue(values[i]); if (tmp != values[i]) { /* * Note: When the BigdataValue does not belong to this * namespace the IV can not be set on the BigdataValue as a * side-effect. */ throw new RuntimeException("Value does not belong to this namespace: value=" + values[i]); } values[i] = tmp; } } /* * Filter out inline terms from the supplied terms array and create a * collections of Values which will be resolved against the * TERM2ID/ID2TERM index and a collection of Values which will be * resolved against the BLOBS index. Duplicates are filtered out and * post-processed once the distinct BigdataValues have been resolved. */ // Will be resolved against TERM2ID/ID2TERM. final LinkedHashMap terms = new LinkedHashMap( numTerms); // Will be resolved against BLOBS. final LinkedHashMap blobs = new LinkedHashMap(/* default */); // Either same reference -or- distinct reference but equals(). final List dups = new LinkedList(); // Inline literals that should still make it into the text index. final LinkedHashSet textIndex = new LinkedHashSet(/* default */); int nunknown = 0, nblobs = 0, nterms = 0; for (int i = 0; i < numTerms; i++) { final BigdataValue v = values[i]; /* * Try to get an inline IV for the BigdataValue (sets the IV as a * side effect if not null). */ if (getInlineIV(v) == null) { /* * Value can not be inlined. We need to figure out which index * we need to use for this Value. * * Note: This also identifies duplicates (whether they are the * same reference or distinct references which are equals()). * Duplicates are put onto a List. That List is scanned after we * have resolved Values against the indices so we can set the * IVs for the duplicates as well. */ if (isBlob(v)) { if (blobs.get(v) != null) dups.add(v); else { if (blobs.put(v, v) != null) throw new AssertionError(); nblobs++; } } else { if (terms.get(v) != null) dups.add(v); else { if (terms.put(v, v) != null) throw new AssertionError(); nterms++; } } nunknown++; } else if (!readOnly && this.textIndex && v instanceof BigdataLiteral) { /* * Some inline IVs will be text indexed per the * LexiconConfiguration. */ final URI dt = ((BigdataLiteral) v).getDatatype(); if (dt == null || dt.equals(XSD.STRING)) { // always text index strings, even inline ones textIndex.add(v); } else if (lexiconConfiguration.isInlineDatatypeToTextIndex(dt)) { textIndex.add(v); } } } /* * Because we sometimes text index inline literals, we cannot assume * we're done just because nunknown == 0. */ if (nunknown == 0 && textIndex.isEmpty()) { return 0; } /* * Batch insert/lookup of Values against the indices. No duplicates. No * inline values. * * FIXME Co-thread the writes on the BLOBS and TERM2ID indices. */ final WriteTaskStats stats = new WriteTaskStats(); if (nblobs > 0) { final BigdataValue[] a = blobs.keySet().toArray( new BigdataValue[nblobs]); addBlobs(a, a.length, readOnly, stats); } if (nterms > 0) { final BigdataValue[] a = terms.keySet().toArray( new BigdataValue[nterms]); addTerms(a, a.length, readOnly, stats); } if (this.textIndex && textIndex.size() > 0) { /* * There were some inline literals that need to make it into the * text index. That is handled here. * * See BLZG-1525 */ try { stats.fullTextIndexTime .addAndGet(new FullTextIndexWriterTask( getSearchEngine(), textIndex.size()/* capacity */, textIndex.iterator()) .call()); } catch (Exception ex) { throw new RuntimeException(ex); } } if(!dups.isEmpty()) { /* * There was at least one BigdataValue which was a duplicate (either * the same reference or a distinct reference which is equals()). * Now that we have resolved the IVs against the indices, we run * through the List of duplicates and resolve the IVs against the * TermIV and BlobIV maps. A duplicate will wind up with a resolved * IV if it was found/written on the appropriate index. */ for(BigdataValue dup : dups) { BigdataValue resolved = blobs.get(dup); if (resolved == null) resolved = terms.get(dup); if (resolved != null) { final IV iv = resolved.getIV(); if (iv != null) { dup.setIV(iv); } } } } if (log.isInfoEnabled() && readOnly && stats.nunknown.get() > 0) { log.info("There are " + stats.nunknown + " unknown terms out of " + numTerms + " given"); } return stats.ndistinct.get(); } // BLOBS+SEARCH private void addBlobs(final BigdataValue[] terms, final int numTerms, final boolean readOnly, final WriteTaskStats stats) { final KVO[] a; try { // write on the BLOBS index (rync sharded RPC in scale-out) a = new BlobsWriteTask(getBlobsIndex(), valueFactory, readOnly, storeBlankNodes, numTerms, terms, stats).call(); } catch (Exception ex) { throw new RuntimeException(ex); } /* * Note: [a] is dense and its elements are distinct. It will be in TERMS * index order. */ final int ndistinct = a.length; if (ndistinct == 0) { // Nothing left to do. return; } if (!readOnly && textIndex) { /* * Write on the full text index. */ final long _begin = System.currentTimeMillis(); try { /* * Note: a[] is in BLOBS index order at this point and can * contain both duplicates and terms that already have term * identifiers and therefore are already in the index. * * [TODO: Is it true that it can have duplicates? This is in * direct conflict with the comments above. Figure out what is * what and update as appropriate.] * * Therefore, instead of a[], we use an iterator that resolves * the distinct terms in a[] (it is dense) to do the indexing. */ @SuppressWarnings({ "unchecked", "rawtypes" }) final Iterator itr = new Striterator( new ChunkedArrayIterator(ndistinct, a, null/* keyOrder */)) .addFilter(new Resolver() { private static final long serialVersionUID = 1L; @Override protected Object resolve(final Object obj) { return ((KVO) obj).obj; } }); stats.fullTextIndexTime .addAndGet(new FullTextIndexWriterTask( getSearchEngine(), ndistinct/* capacity */, itr) .call()); } catch (Exception e) { throw new RuntimeException(e); } stats.indexTime.addAndGet(System.currentTimeMillis() - _begin); } } // TERM2ID/ID2TERM+SEARCH private void addTerms(final BigdataValue[] terms, final int numTerms, final boolean readOnly, final WriteTaskStats stats) { final KVO[] a; try { // write on the forward index (sync RPC) a = new Term2IdWriteTask(getTerm2IdIndex(), readOnly, storeBlankNodes, termIdBitsToReverse, numTerms, terms, stats).call(); } catch (Exception ex) { throw new RuntimeException(ex); } /* * Note: [a] is dense and its elements are distinct. it will be in sort * key order for the Values. */ final int ndistinct = a.length; if (ndistinct == 0) { // Nothing left to do. return; } if(!readOnly) { { /* * Sort terms based on their assigned termId (when interpreted * as unsigned long integers). * * Note: We sort before the index writes since we will co-thread * the reverse index write and the full text index write. * Sorting first let's us read from the same array. */ final long _begin = System.currentTimeMillis(); Arrays.sort(a, 0, ndistinct, KVOTermIdComparator.INSTANCE); stats.keySortTime.add(System.currentTimeMillis() - _begin); } /* * Write on the reverse index and the full text index. */ { final long _begin = System.currentTimeMillis(); final List> tasks = new LinkedList>(); tasks.add(new ReverseIndexWriterTask(getId2TermIndex(), valueFactory, a, ndistinct, storeBlankNodes)); if (textIndex) { /* * Note: terms[] is in termId order at this point and can * contain both duplicates and terms that already have term * identifiers and therefore are already in the index. * * Therefore, instead of terms[], we use an iterator that * resolves the distinct terms in a[] (it is dense) to do * the indexing. */ @SuppressWarnings({ "unchecked", "rawtypes" }) final Iterator itr = new Striterator( new ChunkedArrayIterator(ndistinct, a, null/* keyOrder */)) .addFilter(new Resolver() { private static final long serialVersionUID = 1L; @Override protected Object resolve(final Object obj) { return ((KVO) obj).obj; } }); tasks.add(new FullTextIndexWriterTask(getSearchEngine(), ndistinct/* capacity */, itr)); } /* * Co-thread the reverse index writes and the search index * writes. */ try { final List> futures = getExecutorService() .invokeAll(tasks); stats.reverseIndexTime = futures.get(0).get(); if (textIndex) stats.fullTextIndexTime.addAndGet(futures.get(1).get()); // else // stats.fullTextIndexTime = 0L; } catch (Throwable t) { throw new RuntimeException(t); } stats.indexTime.addAndGet(System.currentTimeMillis() - _begin); } } } /** * Utility method to (re-)build the full text index. This is a high latency * operation for a database of any significant size. You must be using the * unisolated view of the {@link AbstractTripleStore} for this operation. * {@link AbstractTripleStore.Options#TEXT_INDEX} must be enabled. This * operation is only supported when the {@link IValueCentricTextIndexer} uses the * {@link FullTextIndex} class. * * @param forceCreate * When true a new text index will be created * for a namespace that had no it before. */ @SuppressWarnings("unchecked") public void rebuildTextIndex(final boolean forceCreate) { if (getTimestamp() != ITx.UNISOLATED) throw new UnsupportedOperationException("Unisolated connection required to rebuild full text index"); final IValueCentricTextIndexer textIndexer; if (textIndex) { IValueCentricTextIndexer oldTextIndexer = getSearchEngine(); // destroy the existing text index. oldTextIndexer.destroy(); // clear reference to the old FTS viewRef.set(null); // get a new instance of FTS textIndexer = getSearchEngine(); } else if (forceCreate) { textIndex = true; textIndexer = getSearchEngine(); SparseRowStore global = indexManager.getGlobalRowStore(); // Update "namespace" properties updateTextIndexConfiguration(global, getContainerNamespace()); // Update "namespace.lex" properties updateTextIndexConfiguration(global, getNamespace()); // Warning: only container and lexicon properties are updated // with new text index configuration, other indexes may require update as well } else { throw new UnsupportedOperationException("Could not rebuild full text index, because it is not enabled"); } // create a new index. textIndexer.create(); if (indexManager instanceof IJournal) { // make the changes restart safe (not required for federation). ((IJournal) indexManager).commit(); } // TermIVs { // The index to scan for the RDF Literals. final IIndex terms = getId2TermIndex(); // used to decode the @SuppressWarnings("rawtypes") final ITupleSerializer tupSer = terms.getIndexMetadata() .getTupleSerializer(); /* * Visit all plain, language code, and datatype literals in the * lexicon. * * Note: This uses a filter on the ITupleIterator in order to filter * out non-literal terms before they are shipped from a remote index * shard. */ final Iterator itr = new Striterator( terms.rangeIterator(null/* fromKey */, null/* toKey */, 0/* capacity */, IRangeQuery.DEFAULT, new TupleFilter() { private static final long serialVersionUID = 1L; protected boolean isValid( final ITuple obj) { @SuppressWarnings("rawtypes") final IV iv = (IV) tupSer .deserializeKey(obj); if (iv != null && iv.isLiteral()) { return true; } return false; } })).addFilter(new Resolver() { private static final long serialVersionUID = 1L; protected Object resolve(final Object obj) { final BigdataLiteral lit = (BigdataLiteral) tupSer .deserialize((ITuple) obj); // System.err.println("lit: "+lit); return lit; } }); final int capacity = 10000; textIndexer.index(capacity, itr); } // BlobIVs { // the index to scan for the RDF Literals. final IIndex terms = getBlobsIndex(); // used to decode the @SuppressWarnings("rawtypes") final ITupleSerializer tupSer = terms.getIndexMetadata() .getTupleSerializer(); /* * Visit all plain, language code, and datatype literals in the * lexicon. * * Note: This uses a filter on the ITupleIterator in order to filter * out non-literal terms before they are shipped from a remote index * shard. */ final Iterator itr = new Striterator( terms.rangeIterator(null/* fromKey */, null/* toKey */, 0/* capacity */, IRangeQuery.DEFAULT, new TupleFilter() { private static final long serialVersionUID = 1L; protected boolean isValid( final ITuple obj) { @SuppressWarnings("rawtypes") final IV iv = (IV) tupSer .deserializeKey(obj); if (iv != null && iv.isLiteral()) { return true; } return false; } })).addFilter(new Resolver() { private static final long serialVersionUID = 1L; protected Object resolve(final Object obj) { final BigdataLiteral lit = (BigdataLiteral) tupSer .deserialize((ITuple) obj); // System.err.println("lit: "+lit); return lit; } }); final int capacity = 10000; while (itr.hasNext()) { textIndexer.index(capacity, itr); } } } private void updateTextIndexConfiguration(final SparseRowStore global, final String namespace) { Map map = global.read( RelationSchema.INSTANCE, namespace); map.put(AbstractTripleStore.Options.TEXT_INDEX, "true"); map.put(FullTextIndex.Options.FIELDS_ENABLED, "false"); if (getNamespace().equals(namespace)) { map.put(FullTextIndex.Options.OVERWRITE, "false"); } global.write(RelationSchema.INSTANCE, map); } /** * Batch resolution of internal values to {@link BigdataValue}s. * * @param ivs * An collection of internal values * * @return A map from internal value to the {@link BigdataValue}. If an * internal value was not resolved then the map will not contain an * entry for that internal value. */ final public Map, BigdataValue> getTerms( final Collection> ivs) { /* * TODO The values below represent constants which were historically * hard coded into BatchResolveTermIVs and BatchResolveBlobIVs. This * value was not terribly sensitive when it was originally set, probably * because nearly all chunks are smaller than 4000 so basically all RDF * Value resolution was happening on the "one-chunk" code path. * * See ASTEvalHelper which can override these values. */ return getTerms(ivs, 4000/* termsChunkSize */, 4000/* blobsChunkSize */); } /** * Utility method to (re-)build the subject-based full text index. This is a * high latency operation for a database of any significant size. You must * be using the unisolated view of the {@link AbstractTripleStore} for this * operation. {@link AbstractTripleStore.Options#TEXT_INDEX} must be * enabled. This operation is only supported when the {@link ITextIndexer} * uses the {@link FullTextIndex} class. *

* The subject-based full text index is one that rolls up normal * object-based full text index into a similarly structured index that * captures relevancy across subjects. Instead of * * (t,s) => s.len, termWeight * * Where s is the subject's IV. The term weight has the same interpretation, * but it is across all literals which are linked to that subject and which * contain the given token. This index basically pre-computes the (?s ?p ?o) * join that sometimes follows the (?o bd:search "xyz") request. *

* Truth Maintenance *

* We will need to perform truth maintenance on the subject-centric text * index, that is - the index will need to be updated as statements are * added and removed (to the extent that those statements involving a * literal in the object position). Adding a statement is the easier case * because we will never need to remove entries from the index, we can * simply write over them with new relevance values. All that is involved * with truth maintenance for adding a statement is taking a post- commit * snapshot of the subject in the statement and running it through the * indexer (a "subject-refresh"). *

* The same "subject-refresh" will be necessary for truth maintenance for * removal, but an additional step will be necessary beforehand - the index * entries associated with the deleted subject/object (tokens+subject) will * need to be removed in case the token appears only in the removed literal. * After this pruning step the subject can be refreshed in the index exactly * the same as for truth maintenance on add. *

* It looks like the right place to hook in truth maintenance for add is * {@link AbstractTripleStore#addStatements(AbstractTripleStore, boolean, IChunkedOrderedIterator, com.bigdata.relation.accesspath.IElementFilter)} * after the ISPOs are added to the SPORelation. Likewise, the place to hook * in truth maintenance for delete is * {@link AbstractTripleStore#removeStatements(IChunkedOrderedIterator, boolean)} * after the ISPOs are removed from the SPORelation. * * @deprecated Feature was never completed due to scalability issues. See * BZLG-1548, BLZG-563. */ @Deprecated @SuppressWarnings("unchecked") public void buildSubjectCentricTextIndex() { if (getTimestamp() != ITx.UNISOLATED) throw new UnsupportedOperationException(); if (!subjectCentricTextIndex) throw new UnsupportedOperationException(); final ISubjectCentricTextIndexer textIndexer = getSubjectCentricSearchEngine(); try { // destroy the existing text index. textIndexer.destroy(); } catch (NoSuchIndexException ex) { if (log.isInfoEnabled()) log.info("could not destroy subject-centric full text index, does not currently exist"); } // create a new index. textIndexer.create(); // TermIVs { // The index to scan for the individual subjects and their literal // values. final IIndex spoNdx = getContainer().getSPORelation().getPrimaryIndex(); /* * For each S in SPO, collect up O values and pass this information * to the subject-centric text indexer for indexing. */ // used to decode the @SuppressWarnings("rawtypes") final ITupleSerializer tupSer = spoNdx.getIndexMetadata() .getTupleSerializer(); /* * Visit all plain, language code, and datatype literals in the * object position of the primary statement index. * * Note: This uses a filter on the ITupleIterator in order to filter * out non-literal terms before they are shipped from a remote index * shard. */ final Iterator itr = new Striterator( spoNdx.rangeIterator(null/* fromKey */, null/* toKey */, 0/* capacity */, IRangeQuery.DEFAULT, new TupleFilter() { private static final long serialVersionUID = 1L; protected boolean isValid( final ITuple obj) { final ISPO spo = (ISPO) tupSer .deserializeKey(obj); if (spo.o().isLiteral()) { return true; } return false; } })).addFilter(new Resolver() { private static final long serialVersionUID = 1L; protected Object resolve(final Object obj) { final ISPO spo = (ISPO) tupSer .deserializeKey((ITuple) obj); return spo; } }); /* * Keep track of the current subject being indexed. */ IV s = null; /* * Keep a collection of literals to be indexed for that subject. */ final Collection> literals = new LinkedList>(); long subjectCount = 0; long statementCount = 0; final boolean l = log.isInfoEnabled(); while (itr.hasNext()) { final ISPO spo = itr.next(); if (!spo.s().equals(s)) { // flush the old s to the text index if != null if (s != null) { textIndexer.index(s, getTerms(literals).values().iterator()); subjectCount++; statementCount += literals.size(); if (l && subjectCount % 1000 == 0) { log.info("indexed " + subjectCount + " subjects, " + statementCount + " statements"); } } // set the current s and clear the literals s = spo.s(); literals.clear(); } literals.add(spo.o()); } if (s != null) { // flush the last subject textIndexer.index(s, getTerms(literals).values().iterator()); subjectCount++; statementCount += literals.size(); if (log.isInfoEnabled()) { log.info("indexed " + subjectCount + " subjects, " + statementCount + " statements"); } } } } // @SuppressWarnings("unchecked") // public void refreshSubjectCentricTextIndex(final Set> subjects) { // // if (getTimestamp() != ITx.UNISOLATED) // throw new UnsupportedOperationException(); // // if (!subjectCentricTextIndex) // throw new UnsupportedOperationException(); // // final ISubjectCentricTextIndexer textIndexer = getSubjectCentricSearchEngine(); // // final AbstractTripleStore db = getContainer(); // // /* // * Keep a collection of literals to be indexed for each subject. // */ // final Collection> literals = new LinkedList>(); // // for (IV s : subjects) { // // literals.clear(); // // /* // * Visit all plain, language code, and datatype literals in the // * object position of the primary statement index. // * // * Note: This uses a filter on the ITupleIterator in order to filter // * out non-literal terms before they are shipped from a remote index // * shard. // */ // final Iterator itr = db.getAccessPath(s, null, null, new SPOFilter() { // private static final long serialVersionUID = 1L; // @Override // public boolean isValid(Object e) { // return ((ISPO)e).o().isLiteral(); // } // }).iterator(); // // while (itr.hasNext()) { // // final ISPO spo = itr.next(); // // literals.add(spo.o()); // // } // // // flush the last subject // textIndexer.index(s, getTerms(literals).values().iterator()); // // } // // } // // @SuppressWarnings("unchecked") // public void refreshSubjectCentricTextIndex(final Set removed) { // // if (getTimestamp() != ITx.UNISOLATED) // throw new UnsupportedOperationException(); // // if (!subjectCentricTextIndex) // throw new UnsupportedOperationException(); // // final ISubjectCentricTextIndexer textIndexer = getSubjectCentricSearchEngine(); // // final AbstractTripleStore db = getContainer(); // // /* // * Keep a collection of literals to be indexed for each subject. // */ // final Collection> literals = new LinkedList>(); // // for (ISPO spo : removed) { // // literals.clear(); // // /* // * Visit all plain, language code, and datatype literals in the // * object position of the primary statement index. // * // * Note: This uses a filter on the ITupleIterator in order to filter // * out non-literal terms before they are shipped from a remote index // * shard. // */ // final Iterator itr = db.getAccessPath(s, null, null, new SPOFilter() { // private static final long serialVersionUID = 1L; // @Override // public boolean isValid(Object e) { // return ((ISPO)e).o().isLiteral(); // } // }).iterator(); // // while (itr.hasNext()) { // // final ISPO spo = itr.next(); // // literals.add(spo.o()); // // } // // // flush the last subject //// textIndexer.index(s, getTerms(literals).values().iterator()); // // } // // } /** * Batch resolution of internal values to {@link BigdataValue}s. * * @param ivs * An collection of internal values * * @return A map from internal value to the {@link BigdataValue}. If an * internal value was not resolved then the map will not contain an * entry for that internal value. * * @see #getTerms(Collection) */ final public Map, BigdataValue> getTerms( final Collection> ivs, final int termsChunksSize, final int blobsChunkSize) { if (ivs == null) throw new IllegalArgumentException(); // Maximum #of IVs (assuming all are distinct). final int n = ivs.size(); if (n == 0) { return Collections.emptyMap(); } final long begin = System.currentTimeMillis(); /* * Note: A concurrent hash map is used since the request may be split * across shards, in which case updates on the map may be concurrent. * * Note: The also needs to be concurrent since the request can be split * across the ID2TERM and BLOBS indices. */ final ConcurrentHashMap/* iv */, BigdataValue/* term */> ret = new ConcurrentHashMap, BigdataValue>( n/* initialCapacity */); // TermIVs which must be resolved against an index. final Collection> termIVs = new LinkedList>(); // BlobIVs which must be resolved against an index. final Collection> blobIVs = new LinkedList>(); final Set> unrequestedSidTerms = new LinkedHashSet>(); /* * We need to materialize terms inside of SIDs so that the SIDs * can be materialized properly. */ for (IV iv : ivs) { if (iv instanceof SidIV) { handleSid((SidIV) iv, ivs, unrequestedSidTerms); } } /* * Add the SID terms to the IVs to materialize. */ for (IV iv : unrequestedSidTerms) { ivs.add(iv); } /* * Filter out the inline values first and those that have already * been materialized and cached. */ int numNotFound = 0; final boolean isDebugEnabled = log.isDebugEnabled(); for (IV iv : ivs) { if (iv == null) throw new AssertionError(); if (iv.hasValue()) { if (isDebugEnabled) log.debug("already materialized: " + iv.getValue()); // already materialized ret.put(iv, iv.getValue()); } else if (iv instanceof SidIV) { // defer until the end continue; } else if (iv.isInline()) { // translate it into a value directly ret.put(iv, iv.asValue(this)); } else { final BigdataValue value = _getTermId(iv); if (value != null) { assert value.getValueFactory() == valueFactory; // resolved. ret.put(iv, value);// valueFactory.asValue(value)); continue; } // We will need to read on an index. numNotFound++; if (iv instanceof TermId) { termIVs.add((TermId) iv); } else if (iv instanceof BlobIV) { blobIVs.add((BlobIV) iv); } else { throw new AssertionError("class=" + iv.getClass().getName()); } } } // if (numNotFound == 0) { // // // Done. // return ret; // // } if (numNotFound > 0) { // go to the indices /* * Setup and run task(s) to resolve IV(s). */ final ExecutorService service = getExecutorService(); final List> tasks = new LinkedList>(); if (!termIVs.isEmpty()) { tasks.add(new BatchResolveTermIVsTask(service, getId2TermIndex(), termIVs, ret, termCache, valueFactory, termsChunksSize)); } if (!blobIVs.isEmpty()) { tasks.add(new BatchResolveBlobIVsTask(service, getBlobsIndex(), blobIVs, ret, termCache, valueFactory, blobsChunkSize)); } if (log.isInfoEnabled()) log.info("nterms=" + n + ", numNotFound=" + numNotFound + ", cacheSize=" + termCache.size()); try { if (tasks.size() == 1) { tasks.get(0).call(); } else { // Co-thread tasks. final List> futures = getExecutorService() .invokeAll(tasks); // Verify no errors. for (Future f : futures) f.get(); } } catch (Exception ex) { throw new RuntimeException(ex); } } /* * SidIVs require special handling. */ for (IV iv : ivs) { if (iv instanceof SidIV) { cacheTerms((SidIV) iv, ret); // translate it into a value directly ret.put(iv, iv.asValue(this)); } } /* * Remove any IVs that were not explicitly requested in the method * call but that got pulled into materialization because of a SID. */ for (IV iv : unrequestedSidTerms) { ivs.remove(iv); ret.remove(iv); } final long elapsed = System.currentTimeMillis() - begin; if (log.isInfoEnabled()) log.info("resolved " + numNotFound + " terms: #TermIVs=" + termIVs.size() + ", #BlobIVs=" + blobIVs.size() + " in " + elapsed + "ms"); return ret; } /** * Add the terms inside a SID to the collection of IVs to materialize if * they are not already there. */ @SuppressWarnings("rawtypes") final private void handleSid(final SidIV sid, final Collection> ivs, final Set> unrequested) { final ISPO spo = sid.getInlineValue(); handleTerm(spo.s(), ivs, unrequested); handleTerm(spo.p(), ivs, unrequested); handleTerm(spo.o(), ivs, unrequested); if (spo.c() != null) { handleTerm(spo.c(), ivs, unrequested); } } /** * Add the terms inside a SID to the collection of IVs to materialize if * they are not already there. */ @SuppressWarnings("rawtypes") final private void handleTerm(final IV iv, final Collection> ivs, final Set> unrequested) { if (iv instanceof SidIV) { handleSid((SidIV) iv, ivs, unrequested); } else { if (!ivs.contains(iv)) { // ivs.add(iv); unrequested.add(iv); } } } /** * We need to cache the BigdataValues on the IV components within the * SidIV so that the SidIV can materialize itself into a BigdataBNode * properly. */ @SuppressWarnings("rawtypes") final private void cacheTerms(final SidIV sid, final Map, BigdataValue> terms) { final ISPO spo = sid.getInlineValue(); cacheTerm(spo.s(), terms); cacheTerm(spo.p(), terms); cacheTerm(spo.o(), terms); if (spo.c() != null) { cacheTerm(spo.c(), terms); } } /** * We need to cache the BigdataValues on the IV components within the * SidIV so that the SidIV can materialize itself into a BigdataBNode * properly. */ @SuppressWarnings({ "unchecked", "rawtypes" }) final private void cacheTerm(final IV iv, final Map, BigdataValue> terms) { if (iv instanceof SidIV) { cacheTerms((SidIV) iv, terms); } else { iv.setValue(terms.get(iv)); } } /** * Recently resolved term identifiers are cached to improve performance when * externalizing statements. * * @todo consider using this cache in the batch API as well or simply modify * the {@link StatementBuffer} to use a term cache in order to * minimize the #of terms that it has to resolve against the indices - * this especially matters for the scale-out implementation. *

* Or perhaps this can be rolled into the {@link ValueFactory} impl * along with the reverse bnodes mapping? */ // final private ConcurrentWeakValueCacheWithBatchedUpdates, BigdataValue> termCache; final private ITermCache,BigdataValue> termCache; /** * Factory used for {@link #termCache} for read-only views of the lexicon. */ static private CanonicalFactory, BigdataValue>, Integer/* state */> termCacheFactory = new CanonicalFactory, BigdataValue>, Integer>( 1/* queueCapacity */) { @Override protected ITermCache, BigdataValue> newInstance( NT key, Integer termCacheCapacity) { return new TermCache,BigdataValue>(// new ConcurrentWeakValueCacheWithBatchedUpdates, BigdataValue>(// termCacheCapacity.intValue(),// backing hard reference LRU queue capacity. .75f, // loadFactor (.75 is the default) 16 // concurrency level (16 is the default) )); } }; /** * Clear all term caches for the supplied namespace. */ @SuppressWarnings("rawtypes") static public void clearTermCacheFactory(final String namespace) { final Iterator it = termCacheFactory.entryIterator(); while (it.hasNext()) { final NT nt = (NT) ((Entry) it.next()).getKey(); if (nt.getName().equals(namespace)) { it.remove(); } } } /** * The {@link Vocabulary} implementation class. */ private final Vocabulary vocab; /** * The {@link ILexiconConfiguration} instance, which will determine how * terms are encoded and decoded in the key space. */ private final ILexiconConfiguration lexiconConfiguration; /** * Constant for the {@link LexiconRelation} namespace component. *

* Note: To obtain the fully qualified name of an index in the * {@link LexiconRelation} you need to append a "." to the relation's * namespace, then this constant, then a "." and then the local name of the * index. * * @see AbstractRelation#getFQN(IKeyOrder) */ public static final transient String NAME_LEXICON_RELATION = "lex"; /** * Handles non-inline {@link IV}s by synthesizing a {@link BigdataBNode} * using {@link IV#bnodeId()} (iff told bnodes support is enabled and the * {@link IV} represents a blank node) and testing the {@link #termCache * term cache} otherwise. * * @param iv * A non-inline {@link IV}. * * @return The corresponding {@link BigdataValue} if the {@link IV} * represents a blank node or is found in the {@link #termCache}, * and null otherwise. * * @throws IllegalArgumentException * if iv is null * @throws IllegalArgumentException * if {@link IV#isNullIV()} * @throws IllegalArgumentException * if the {@link IV} is {@link IV#isInline()}. */ private BigdataValue _getTermId(final IV iv) { if (iv == null) throw new IllegalArgumentException(); if (iv.isNullIV()) throw new IllegalArgumentException(); if (iv.isInline()) // only for non-inline IVs. throw new IllegalArgumentException(); if (!storeBlankNodes && iv.isBNode()) { /* * Except when the "told bnodes" mode is enabled, blank nodes are * not stored in the reverse lexicon (or the cache). * * Note: In a told bnodes mode, we need to store the blank nodes in * the lexicon and enter them into the term cache since their * lexical form will include the specified ID, not the term * identifier. */ final String id = 't' + ((BNode) iv).getID(); final BigdataBNode bnode = valueFactory.createBNode(id); // set the term identifier on the object. bnode.setIV(iv); return bnode; } // Test the term cache. return termCache.get(iv); } /** * Note: {@link BNode}s are not stored in the reverse lexicon and are * recognized using {@link AbstractTripleStore#isBNode(long)}. *

* Note: Statement identifiers (when enabled) are not stored in the reverse * lexicon and are recognized using * {@link AbstractTripleStore#isStatement(IV)}. If the term identifier is * recognized as being, in fact, a statement identifier, then it is * externalized as a {@link BNode}. This fits rather well with the notion * in a quad store that the context position may be either a {@link URI} or * a {@link BNode} and the fact that you can use {@link BNode}s to "stamp" * statement identifiers. *

* Note: Handles both unisolatable and isolatable indices. *

* Note: Sets {@link BigdataValue#getIV()} as a side-effect. *

* Note: this always mints a new {@link BNode} instance when the term * identifier identifies a {@link BNode} or a {@link Statement}. * * @return The {@link BigdataValue} -or- null iff there is no * {@link BigdataValue} for that term identifier in the lexicon. */ @SuppressWarnings("rawtypes") final public BigdataValue getTerm(final IV iv) { return getValue(iv, true); } /** * When readFromIndex=false, only handles inline, NULL, bnodes, SIDs, and * the termCache - does not attempt to read from disk. * * @param iv * The {@link IV}. * @param readFromIndex * When true an attempt will be made to resolve the * {@link IV} against the TERMS index iff none of the fast paths * succeed. */ @SuppressWarnings("rawtypes") final private BigdataValue getValue(final IV iv, final boolean readFromIndex) { // if (false) { // alternative forces the standard code path. // final Collection ivs = new LinkedList(); // ivs.add(iv); // final Map values = getTerms(ivs); // return values.get(iv); // } if (iv.isInline()) return iv.asValue(this); // handle bnodes, the termCache. BigdataValue value = _getTermId(iv); if (value != null || !readFromIndex) return value; if(iv instanceof BlobIV) { return __getBlob((BlobIV) iv); } return __getTerm((TermId) iv); } private BigdataValue __getTerm(final TermId iv) { final IIndex ndx = getId2TermIndex(); final Id2TermTupleSerializer tupleSer = (Id2TermTupleSerializer) ndx .getIndexMetadata().getTupleSerializer(); final byte[] key = tupleSer.id2key(iv); final byte[] data = ndx.lookup(key); if (data == null) return null; // This also sets the value factory. BigdataValue value = valueFactory.getValueSerializer().deserialize(data); // This sets the term identifier. value.setIV(iv); // Note: passing the IV object as the key. final BigdataValue tmp = termCache.putIfAbsent(iv, value); if (tmp != null) { value = tmp; } // assert value.getIV() == iv : "expecting iv=" + iv + ", but found " // + value.getIV(); // value.setTermId( id ); return value; } private BigdataValue __getBlob(final BlobIV iv) { final IIndex ndx = getBlobsIndex(); final BlobsTupleSerializer tupleSer = (BlobsTupleSerializer) ndx .getIndexMetadata().getTupleSerializer(); final byte[] key = tupleSer.serializeKey(iv); final byte[] data = ndx.lookup(key); if (data == null) return null; // This also sets the value factory. BigdataValue value = valueFactory.getValueSerializer().deserialize(data); // This sets the term identifier. value.setIV(iv); // Note: passing the IV object as the key. final BigdataValue tmp = termCache.putIfAbsent(iv, value); if (tmp != null) { value = tmp; } // Note: This assert could be tripped by a data race on the cache, which is not an error. // assert value.getIV() == iv : "expecting iv=" + iv + ", but found " // + value.getIV(); // // value.setTermId( id ); return value; } /** * WARNING DO NOT USE OUTSIDE OF THE UNIT TESTS: This * method is extremely inefficient for scale-out as it does one RMI per * request! *

* Note: If {@link BigdataValue#getIV()} is set, then returns that value * immediately. Next, try to get an inline internal value for the value. * Otherwise looks up the termId in the index and * {@link BigdataValue#setIV(IV) sets the term identifier} as a side-effect. * * @deprecated Not even the unit tests should be doing this. * * @see #getTerms(Collection), Use this method to resolve {@link Value} to * their {@link IV}s efficiently. */ @SuppressWarnings("rawtypes") final public IV getIV(final Value value) { if (value == null) return null; // see if it already has a value if (value instanceof BigdataValue) { final IV iv = ((BigdataValue) value).getIV(); if (iv != null) return iv; } // see if it can be assigned an inline value IV iv = getInlineIV(value); if (iv != null) return iv; // go to the index iv = getTermId(value); return iv; } /** * Attempt to convert the value to an inline internal value. If the caller * provides a {@link BigdataValue} and this method is successful, then the * {@link IV} will be set as a side-effect on the {@link BigdataValue}. * * @param value * The value to convert * * @return The inline internal value, or null if it cannot be * converted * * @see ILexiconConfiguration#createInlineIV(Value) */ @SuppressWarnings("rawtypes") final public IV getInlineIV(final Value value) { return getLexiconConfiguration().createInlineIV(value); } /** * This method assumes we've already exhausted all other possibilities and * need to go to the index for the {@link IV}. It is "optimized" for the * lookup of a single {@link Value}. Note, however, that single value lookup * is NOT efficient. {@link #getTerms(Collection)} SHOULD be used for * efficient batch resolution of {@link Value}s to {@link IV}s. *

* WARNING DO NOT USE OUTSIDE OF THE UNIT TESTS OR CAREFULLY VETTED * CODE: This method is extremely inefficient for scale-out as it * does one RMI per request! * * @param value * the value to lookup * * @return The {@link IV} for the value */ private IV getTermId(final Value value) { if(isBlob(value)) { return getBlobIV(value); } return getTermIV(value); } /** * * WARNING DO NOT USE OUTSIDE OF THE UNIT TESTS OR CAREFULLY VETTED * CODE: This method is extremely inefficient for scale-out as it * does one RMI per request! * * @param value * @return */ private TermId getTermIV(final Value value) { final IIndex ndx = getTerm2IdIndex(); final byte[] key; { final Term2IdTupleSerializer tupleSer = (Term2IdTupleSerializer) ndx .getIndexMetadata().getTupleSerializer(); // generate key iff not on hand. key = tupleSer.getLexiconKeyBuilder().value2Key(value); } // lookup in the forward index. final byte[] tmp = ndx.lookup(key); if (tmp == null) return null; final TermId iv = (TermId) IVUtility.decode(tmp); if(value instanceof BigdataValue) { final BigdataValue impl = (BigdataValue) value; // set as side-effect. impl.setIV(iv); /* * Note that we have the termId and the term, we stick the value * into in the term cache IFF it has the correct value factory, but * do not replace the entry if there is one already there. */ if (impl.getValueFactory() == valueFactory) { if (storeBlankNodes || !iv.isBNode()) { // if (termCache.get(id) == null) { // // termCache.put(id, value, false/* dirty */); // // } termCache.putIfAbsent(iv, impl); } } } return iv; } /** * WARNING DO NOT USE OUTSIDE OF THE UNIT TESTS OR CAREFULLY VETTED * CODE: This method is extremely inefficient for scale-out as it * does one RMI per request! */ private BlobIV getBlobIV(final Value value) { final IKeyBuilder keyBuilder = h.newKeyBuilder(); final BigdataValue asValue = valueFactory.asValue(value); final byte[] baseKey = h.makePrefixKey(keyBuilder.reset(), asValue); final byte[] val = valueFactory.getValueSerializer().serialize(asValue); final int counter = h.resolveOrAddValue(getBlobsIndex(), true/* readOnly */, keyBuilder, baseKey, val, null/* tmp */, null/* bucketSize */); if (counter == BlobsIndexHelper.NOT_FOUND) { // Not found. return null; } final BlobIV iv = new BlobIV(VTE.valueOf(asValue), asValue.hashCode(), (short) counter); if(value instanceof BigdataValue) { final BigdataValue impl = (BigdataValue) value; // set as side-effect. impl.setIV(iv); /* * Note that we have the termId and the term, we stick the value * into in the term cache IFF it has the correct value factory, but * do not replace the entry if there is one already there. */ if (impl.getValueFactory() == valueFactory) { if (storeBlankNodes || !iv.isBNode()) { // if (termCache.get(id) == null) { // // termCache.put(id, value, false/* dirty */); // // } termCache.putIfAbsent(iv, impl); } } } return iv; } /** * Visits all RDF {@link Value}s in the {@link LexiconKeyOrder#BLOBS} index * in {@link BlobIV} order (efficient index scan). */ @SuppressWarnings("unchecked") public Iterator blobsIterator() { final IIndex ndx = getBlobsIndex(); return new Striterator(ndx.rangeIterator(null, null, 0/* capacity */, IRangeQuery.VALS, null/* filter */)).addFilter(new Resolver() { private static final long serialVersionUID = 1L; protected Object resolve(final Object val) { return ((ITuple) val).getObject(); } }); } /** * Return the {@link #lexiconConfiguration} instance. Used to determine * how to encode and decode terms in the key space. */ public ILexiconConfiguration getLexiconConfiguration() { return lexiconConfiguration; } /** * {@inheritDoc} *

* This implementation examines the predicate, looking at the * {@link LexiconKeyOrder#SLOT_IV} and {@link LexiconKeyOrder#SLOT_TERM} * slots and chooses the appropriate index based on the {@link IV} and/or * {@link Value} which it founds bound. When both slots are bound it prefers * the index for the {@link IV} => {@link Value} mapping as that index will * be faster (ID2TERM has a shorter key and higher fan-out than TERM2ID). */ public IKeyOrder getKeyOrder(final IPredicate p) { /* * Examine the IV slot first. Reverse lookup (IV => Value). This is * always our fastest and most common access path. */ { @SuppressWarnings("unchecked") final IVariableOrConstant> t = (IVariableOrConstant>) p .get(LexiconKeyOrder.SLOT_IV); if (t != null) { final IV iv = t.get(); if (iv instanceof TermId) return LexiconKeyOrder.ID2TERM; if (iv instanceof BlobIV) return LexiconKeyOrder.BLOBS; throw new UnsupportedOperationException(p.toString()); } } /* * Examine the Value slot next. This is used for forward lookup (Value * => IV). */ { @SuppressWarnings("unchecked") final IVariableOrConstant v = (IVariableOrConstant) p .get(LexiconKeyOrder.SLOT_TERM); if (v != null) { final BigdataValue value = v.get(); if (isBlob(value)) { return LexiconKeyOrder.BLOBS; } return LexiconKeyOrder.TERM2ID; } } throw new UnsupportedOperationException(p.toString()); } /** * Necessary for lexicon joins, which are injected into query plans as * necessary by the query planner. You can use a {@link LexPredicate} to * perform either a forward ({@link BigdataValue} to {@link IV}) or reverse * ( {@link IV} to {@link BigdataValue}) lookup. Either lookup will cache * the {@link BigdataValue} on the {@link IV} as a side effect. *

* Note: If you query with {@link IV} or {@link BigdataValue} which is * already cached (either on one another or in the termsCache) then the * cached value will be returned (fast path). *

* Note: Blank nodes will not unify with themselves unless you are using * told blank node semantics. *

* Note: This has the side effect of caching materialized * {@link BigdataValue}s on {@link IV}s using * {@link IV#setValue(BigdataValue)} for use in downstream operators that * need materialized values to evaluate properly. The query planner is * responsible for managing when we materialize and cache values. This keeps * us from wiring {@link BigdataValue} onto {@link IV}s all the * time. *

* The lexicon has a single TERMS index. The keys are {@link BlobIV}s formed * from the {@link VTE} of the {@link BigdataValue}, * {@link BigdataValue#hashCode()}, and a collision counter. The value is * the {@link BigdataValue} as serialized by the * {@link BigdataValueSerializer}. *

* There are four possible ways to query this index using the * {@link LexPredicate}. *

*
lex(-BigdataValue,+IV)
*
The {@link IV} is given and its {@link BigdataValue} will be sought.
*
lex(+BigdataValue,-IV)
*
The {@link BigdataValue}is given and its {@link IV} will be sought. * This case requires a key-range scan with a filter. It has to scan the * collision bucket and filter for the specified Value. We get the collision * bucket by creating a prefix key for the Value (using its VTE and * hashCode). This will either return the IV for that Value or nothing.
*
lex(+BigdataValue,+IV)
*
The predicate is fully bound. In this case we can immediately verify * that the Value is consistent with the IV (same VTE and hashCode) and then * do a point lookup on the IV.
*
* * @see LexAccessPatternEnum * @see LexPredicate * @see LexiconKeyOrder */ @SuppressWarnings({ "unchecked", "rawtypes" }) @Override public IAccessPath newAccessPath( final IIndexManager localIndexManager, final IPredicate predicate, final IKeyOrder keyOrder ) { /* * Figure out which access pattern is being used. */ final LexAccessPatternEnum accessPattern = LexAccessPatternEnum .valueOf(predicate); switch (accessPattern) { case FullyBound: { /* * Special case first verifies that the IV and Value are consistent * and then falls through to IVBound, which is a point lookup * against the TERMS index. */ final BigdataValue val = (BigdataValue) predicate.get( LexiconKeyOrder.SLOT_TERM).get(); final IV iv = (IV) predicate.get(LexiconKeyOrder.SLOT_IV).get(); if (VTE.valueOf(val) != iv.getVTE()) { /* * The VTE is not consistent so the access path is provably * empty. */ return new EmptyAccessPath(); } if (val.hashCode() != iv.hashCode()) { /* * The hashCode is not consistent so the access path is * provably empty. */ return new EmptyAccessPath(); } /* * Fall through. */ } case IVBound: { final IV iv = (IV) predicate.get(LexiconKeyOrder.SLOT_IV).get(); // if (log.isDebugEnabled()) // log.debug("materializing: " + iv); // Attempt to resolve the IV directly to a Value (no IO). final BigdataValue val = getValue(iv, false/* readIndex */); if (val != null) { // if (log.isDebugEnabled()) // log.debug("found term in the term cache: " + val); // cache the IV on the value val.setIV(iv); // cache the value on the IV iv.setValue(val); return new ArrayAccessPath( new BigdataValue[] { val }, predicate, keyOrder); } // if (log.isDebugEnabled()) // log.debug("did not find term in the term cache: " + iv); if (!storeBlankNodes && iv.isBNode()) { /* * Blank nodes do not unify with themselves unless you are using * told blank nodes semantics. */ return new EmptyAccessPath(); } final CacheValueFilter filter = CacheValueFilter.newInstance(); final IPredicate tmp = (IPredicate) predicate .setProperty(Predicate.Annotations.ACCESS_PATH_FILTER, filter); final AccessPath ap = new AccessPath( this, localIndexManager, tmp, keyOrder).init(); return ap; } case ValueBound: { final BigdataValue val = (BigdataValue) predicate.get( LexiconKeyOrder.SLOT_TERM).get(); // See if it already has an IV or can be assigned an inline IV IV iv = val.getIV(); if (iv == null) { iv = getInlineIV(val); } if (iv != null) { // cache the IV on the value val.setIV(iv); // cache the value on the IV iv.setValue(val); return new ArrayAccessPath( new BigdataValue[] { val }, predicate, keyOrder); } final CacheValueFilter filter = CacheValueFilter.newInstance(); final IPredicate tmp = (IPredicate) predicate .setProperty(Predicate.Annotations.ACCESS_PATH_FILTER, filter); final AccessPath ap = new AccessPath(this, localIndexManager, tmp, keyOrder); return ap; } case NoneBound: { /* * TODO Could be supported. This is a full index scan on both the * IV2TERM and BLOBS indices. */ } default: throw new UnsupportedOperationException("" + accessPattern); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy