All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.relation.AbstractResource Maven / Gradle / Ivy

/*

 Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

 Contact:
 SYSTAP, LLC DBA Blazegraph
 2501 Calvert ST NW #106
 Washington, DC 20008
 [email protected]

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; version 2 of the License.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 */
/*
 * Created on Jul 10, 2008
 */

package com.bigdata.relation;

import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.UUID;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;

import org.apache.log4j.Logger;

import com.bigdata.bop.BOp;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.bop.fed.QueryEngineFactory;
import com.bigdata.config.Configuration;
import com.bigdata.config.IValidator;
import com.bigdata.config.IntegerValidator;
import com.bigdata.config.LongValidator;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.IJournal;
import com.bigdata.journal.IResourceLock;
import com.bigdata.journal.IResourceLockService;
import com.bigdata.journal.NoSuchIndexException;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.rdf.rules.FastClosure;
import com.bigdata.rdf.rules.FullClosure;
import com.bigdata.rdf.rules.RuleFastClosure5;
import com.bigdata.rdf.rules.RuleFastClosure6;
import com.bigdata.rdf.sparql.ast.cache.CacheConnectionFactory;
import com.bigdata.rdf.sparql.ast.cache.ICacheConnection;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.relation.accesspath.AccessPath;
import com.bigdata.relation.accesspath.BlockingBuffer;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.relation.locator.DefaultResourceLocator;
import com.bigdata.relation.locator.ILocatableResource;
import com.bigdata.relation.rule.eval.IJoinNexus;
import com.bigdata.relation.rule.eval.IJoinNexusFactory;
import com.bigdata.relation.rule.eval.ISolution;
import com.bigdata.relation.rule.eval.ProgramTask;
import com.bigdata.service.IBigdataFederation;
import com.bigdata.sparse.GlobalRowStoreUtil;

/**
 * Base class for locatable resources.
 * 
 * @author Bryan Thompson
 * @version $Id$
 * 
 * @param 
 */
abstract public class AbstractResource implements IMutableResource {

    protected final transient static Logger log = Logger.getLogger(AbstractResource.class);

    protected final IIndexManager indexManager;
    
    final private String namespace;

    final private String containerNamespace;

    final private long timestamp;
    final private Long commitTime;
    
    final private Properties properties;
    
    final private int chunkOfChunksCapacity;
    final private int chunkCapacity;
    final private long chunkTimeout;
    private final int fullyBufferedReadThreshold;
    private final boolean forceSerialExecution;
    private final int maxParallelSubqueries;
//    private final boolean nestedSubquery;

    /**
     * The capacity of the buffers accumulating chunks from concurrent producers.
     * 
     * @see Options#CHUNK_OF_CHUNKS_CAPACITY
     * 
     * @deprecated by {@link BOp} annotations.
     */
    final public int getChunkOfChunksCapacity() {
        
        return chunkOfChunksCapacity;
        
    }

    /**
     * The target chunk size.
     * 
     * @see Options#CHUNK_CAPACITY
     * 
     * @deprecated by {@link BOp} annotations.
     */
    final public int getChunkCapacity() {
        
        return chunkCapacity;
        
    }
    
    /**
     * The timeout in milliseconds that the {@link BlockingBuffer} will wait for
     * another chunk to combine with the current chunk before returning the
     * current chunk. This may be ZERO (0) to disable the chunk combiner.
     * 
     * @see Options#CHUNK_TIMEOUT
     * 
     * @deprecated by {@link BOp} annotations.
     */
    public final long getChunkTimeout() {

        return chunkTimeout;

    }

    /**
     * If the estimated range count for an
     * {@link IAccessPath#iterator(long,long, int)} is LTE this threshold then
     * do a fully buffered (synchronous) read. Otherwise we will do an
     * asynchronous read.
     * 
     * @see Options#FULLY_BUFFERED_READ_THRESHOLD
     * 
     * @deprecated by {@link BOp} annotations.
     */
    public int getFullyBufferedReadThreshold() {

        return fullyBufferedReadThreshold;
        
    }
    
    /**
     * When true, rule sets will be forced to execute
     * sequentially even when they are not flagged as a sequential program.
     * 
     * @see Options#FORCE_SERIAL_EXECUTION
     * 
     * @deprecated by {@link BOp} annotations.
     */
    public boolean isForceSerialExecution() {

        return forceSerialExecution;
        
    }

    /**
     * The maximum #of subqueries for the first join dimension that will be
     * issued in parallel. Use ZERO(0) to avoid submitting tasks to the
     * {@link ExecutorService} entirely and ONE (1) to submit a single task at a
     * time to the {@link ExecutorService}.
     * 
     * @see Options#MAX_PARALLEL_SUBQUERIES
     * 
     * @deprecated by {@link BOp} annotations.
     */
    public int getMaxParallelSubqueries() {

        return maxParallelSubqueries;
        
    }

    /**
     * Options for locatable resources.
     * 
     * @todo most of these options effect asynchronous iterators, access path
     *       behavior, and join behavior. these are general features for bigdata
     *       resources, but some of the code to support this stuff is still
     *       local to the RDF module. That can be fixed using an abstract base
     *       class for {@link IJoinNexus} and {@link IJoinNexusFactory}.
     * 
     * @todo some of these defaults need to be re-examined. see notes in the
     *       javadoc below.
     * 
     * @author Bryan Thompson
     * @version $Id$
     */
    public static interface Options {
        
        /**
         * 

* Set the maximum #of chunks from concurrent producers that can be * buffered before an {@link IBuffer} containing chunks of * {@link ISolution}s would block (default * {@link #DEFAULT_CHUNK_OF_CHUNKS_CAPACITY}). This is used to * provision a {@link BlockingQueue} for {@link BlockingBuffer}. A * value of ZERO(0) indicates that a {@link SynchronousQueue} should be * used instead. The best value may be more than the #of concurrent * producers if the producers are generating small chunks, e.g., because * there are few solutions for a join subquery. *

* @deprecated by {@link BOp} annotations. */ String CHUNK_OF_CHUNKS_CAPACITY = BlockingBuffer.class.getName() + ".chunkOfChunksCapacity"; /** * Default for {@link #CHUNK_OF_CHUNKS_CAPACITY} * @deprecated by {@link BOp} annotations. */ String DEFAULT_CHUNK_OF_CHUNKS_CAPACITY = "10"; // was 1000 /** *

* Sets the capacity of the {@link IBuffer}s used to accumulate a chunk * when evaluating rules, etc (default {@value #CHUNK_CAPACITY}). Note * that many processes use a {@link BlockingBuffer} to accumulate * "chunks of chunks". *

* * @see #CHUNK_OF_CHUNKS_CAPACITY * @deprecated by {@link BOp} annotations. */ String CHUNK_CAPACITY = IBuffer.class.getName() + ".chunkCapacity"; /** * Default for {@link #CHUNK_CAPACITY} *

* Note: This used to be 20k, but chunks of chunks works better than * just a large chunk. * * @deprecated by {@link BOp} annotations. */ String DEFAULT_CHUNK_CAPACITY = "100"; // was 100 /** * The timeout in milliseconds that the {@link BlockingBuffer} will wait * for another chunk to combine with the current chunk before returning * the current chunk (default {@link #DEFAULT_CHUNK_TIMEOUT}). This may * be ZERO (0) to disable the chunk combiner. * * @deprecated by {@link BOp} annotations. */ String CHUNK_TIMEOUT = BlockingBuffer.class.getName() + ".chunkTimeout"; /** * The default for {@link #CHUNK_TIMEOUT}. * * @todo this is probably much larger than we want. Try 10ms. * @deprecated by {@link BOp} annotations. */ String DEFAULT_CHUNK_TIMEOUT = "10"; // was 1000 /** * If the estimated rangeCount for an * {@link AccessPath#iterator()} is LTE this threshold then use * a fully buffered (synchronous) iterator. Otherwise use an * asynchronous iterator whose capacity is governed by * {@link #CHUNK_OF_CHUNKS_CAPACITY}. * * @deprecated by {@link BOp} annotations. */ String FULLY_BUFFERED_READ_THRESHOLD = AccessPath.class .getName() + ".fullyBufferedReadThreadshold"; /** * Default for {@link #FULLY_BUFFERED_READ_THRESHOLD} * * @todo figure out how good this value is. * @deprecated by {@link BOp} annotations. */ String DEFAULT_FULLY_BUFFERED_READ_THRESHOLD = "200";//""+20*Bytes.kilobyte32; /** * When true ({@value #DEFAULT_FORCE_SERIAL_EXECUTION}), * rule sets will be forced to execute sequentially even when they are * not flagged as a sequential program. * * @todo The following discussion applies to the * {@link AbstractTripleStore}. and should be relocated. *

* The {@link #CLOSURE_CLASS} option defaults to * {@link FastClosure}, which has very little possible parallelism * (it is mostly a sequential program by nature). For that reason, * {@link #FORCE_SERIAL_EXECUTION} defaults to false * since the overhead of parallel execution is more likely to * lower the observed performance with such limited possible * parallelism. However, when using {@link FullClosure} the * benefits of parallelism MAY justify its overhead. *

* The following data are for LUBM datasets. * *

         * U1  Fast Serial   : closure =  2250ms; 2765, 2499, 2530
         * U1  Fast Parallel : closure =  2579ms; 2514, 2594
         * U1  Full Serial   : closure = 10437ms.
         * U1  Full Parallel : closure = 10843ms.
         * 
         * U10 Fast Serial   : closure = 41203ms, 39171ms (38594, 35360 when running in caller's thread rather than on the executorService).
         * U10 Fast Parallel : closure = 30722ms. 
         * U10 Full Serial   : closure = 108110ms.
         * U10 Full Parallel : closure = 248550ms.
         * 
* * Note that the only rules in the fast closure program that have * potential parallelism are {@link RuleFastClosure5} and * {@link RuleFastClosure6} and these rules are not being * triggered by these datasets, so there is in fact NO potential * parallelism (in the data) for these datasets. *

* It is possible that a machine with more cores would perform * better under the "full" closure program with parallel rule * execution (these data were collected on a laptop with 2 cores) * since performance tends to be CPU bound for small data sets. * However, the benefit of the "fast" closure program is so large * that there is little reason to consider parallel rule execution * for the "full" closure program. * * @todo collect new timings for this option. The LUBM performance has * basically doubled since these data were collected. Look further * into ways in which overhead might be reduced for rule * parallelism and also for when rule parallelism is not enabled. * * @todo rename as parallel_rule_execution. * @deprecated by {@link BOp} annotations. */ String FORCE_SERIAL_EXECUTION = ProgramTask.class.getName() + ".forceSerialExecution"; /** * @deprecated by {@link BOp} annotations. */ String DEFAULT_FORCE_SERIAL_EXECUTION = "true"; /** * The maximum #of subqueries for the first join dimension that will be * issued in parallel. Use ZERO(0) to avoid submitting tasks to the * {@link ExecutorService} entirely and ONE (1) to submit a single task * at a time to the {@link ExecutorService}. * * @deprecated by {@link BOp} annotations. */ String MAX_PARALLEL_SUBQUERIES = ProgramTask.class.getName() + ".maxParallelSubqueries"; /** * @deprecated by {@link BOp} annotations. */ String DEFAULT_MAX_PARALLEL_SUBQUERIES = "5"; } protected AbstractResource(final IIndexManager indexManager, final String namespace, final Long timestamp, final Properties properties) { this(null/* container */, indexManager, namespace, timestamp, properties); } /** * Alternative version used when a resource exists within some container. * The additional container argument provides access to the container * before the container has been written to the global row store. */ protected AbstractResource(final ILocatableResource container, final IIndexManager indexManager, final String namespace, final Long timestamp, final Properties properties) { // Note: [container] MAY be null. if (indexManager == null) throw new IllegalArgumentException(); if (namespace == null) throw new IllegalArgumentException(); if (timestamp == null) throw new IllegalArgumentException(); if (properties == null) throw new IllegalArgumentException(); // Note: Non-null if this resource exists in some container this.container = container; // Note: Bound before we lookup property values! this.indexManager = indexManager; // Note: Bound before we lookup property values! this.namespace = namespace; this.containerNamespace = properties.getProperty(RelationSchema.CONTAINER); this.timestamp = timestamp; this.properties = properties; properties.setProperty(RelationSchema.NAMESPACE, namespace); properties.setProperty(RelationSchema.CLASS, getClass().getName()); if (log.isInfoEnabled()) { log.info("namespace=" + namespace + ", timestamp=" + timestamp + ", container=" + containerNamespace + ", indexManager=" + indexManager); } /* * Resolve the commit time from which this view was materialized (if * known and otherwise null). */ commitTime = (Long)properties.get(RelationSchema.COMMIT_TIME); forceSerialExecution = Boolean.parseBoolean(getProperty( Options.FORCE_SERIAL_EXECUTION, Options.DEFAULT_FORCE_SERIAL_EXECUTION)); maxParallelSubqueries = getProperty(Options.MAX_PARALLEL_SUBQUERIES, Options.DEFAULT_MAX_PARALLEL_SUBQUERIES, IntegerValidator.GTE_ZERO); chunkOfChunksCapacity = getProperty(Options.CHUNK_OF_CHUNKS_CAPACITY, Options.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY, IntegerValidator.GT_ZERO); chunkCapacity = getProperty(Options.CHUNK_CAPACITY, Options.DEFAULT_CHUNK_CAPACITY, IntegerValidator.GT_ZERO); chunkTimeout = getProperty(Options.CHUNK_TIMEOUT, Options.DEFAULT_CHUNK_TIMEOUT, LongValidator.GTE_ZERO); fullyBufferedReadThreshold = getProperty( Options.FULLY_BUFFERED_READ_THRESHOLD, Options.DEFAULT_FULLY_BUFFERED_READ_THRESHOLD, IntegerValidator.GT_ZERO); } public final String getNamespace() { return namespace; } public final String getContainerNamespace() { return containerNamespace; } /** * Return the container. * * @return The container -or- null if there is no container. */ public ILocatableResource getContainer() { if (container == null) { synchronized (this) { if (container == null) { if (getContainerNamespace() != null) { if (log.isInfoEnabled()) { log.info("resolving container: " + getContainerNamespace()); } container = getIndexManager() .getResourceLocator() .locate(getContainerNamespace(), getTimestamp()); } } } } return container; } private volatile ILocatableResource container; public final long getTimestamp() { return timestamp; } /** * The commit time from which a read-only view was materialized (if known) * and otherwise null. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/266 */ protected Long getCommitTime() { return commitTime; } /** * Wrap and return the properties specified to the ctor. Wrapping the * {@link Properties} object prevents inadvertent side-effects. */ public final Properties getProperties() { return new Properties(properties); } /** * Return the {@link Properties} object without wrapping it. This method can * be used in those cases where you need to access non-String property * values. The caller is responsible for avoiding mutation to the returned * Properties object. *

* Note: This explicitly does NOT wrap the properties. Doing so makes it * impossible to access the default properties using Hashtable#get(), which * in turn means that we can not access non-String objects which have been * materialized from the GRS in the {@link Properties}. This does introduce * some potential for side-effects between read-only instances of the same * resource view which share the same properties object. */ protected final Properties getBareProperties() { return properties; } /** * Return the object used to locate indices, relations, and relation * containers and to execute operations on those resources. *

* Note: For scale-out, this is always the federation's index * manager NOT the data service local index manager. This is an artifact of * how we resolve the metadata for the relation against the global row * store. * * @return The {@link IIndexManager}. * * @todo If we support the notion of a "relation shard" then this could * become the shard's data service local index manager in that * instance but, regardless, we would need a means to resolve the * metadata for the relation against the federation's index manager */ public IIndexManager getIndexManager() { return indexManager; } final public ExecutorService getExecutorService() { return indexManager.getExecutorService(); } /** * The class name, timestamp and namespace for the relation view. */ public String toString() { return getClass().getSimpleName() + "{timestamp=" + timestamp + ", namespace=" + namespace + ", container=" + containerNamespace + ", indexManager=" + indexManager + "}"; } /** * The default implementation only logs the event. */ public AbstractResource init() { if (log.isInfoEnabled()) log.info(toString()); return this; } /** * * @todo Lock service supporting shared locks, leases and lease renewal, * escalation of shared locks to exclusive locks, deadlock detection, * and possibly a resource hierarchy. Leases should be Callable * objects that are submitted by the client to its executor service so * that they will renew automatically until cancelled (and will cancel * automatically if not renewed). *

* There is existing code that could be adapted for this purpose. It * might have to be adapted to support lock escalation (shared to * exclusive), a resource hierarchy, and a delay queue to cancel * leases that are not renewed. It would have to be wrapped up as a * low-latency service and made available via the * {@link IBigdataFederation}. It also needs to use a weak reference * cache for the collection of resource queues so that they are GC'd * rather than growing as new resources are locked and never * shrinking. *

* If we require pre-declaration of locks, then we do not need the * dependency graph since deadlocks can only arise with 2PL. *

* Since the service is remote it should use {@link UUID}s to * identify the lock owner(s). *

* The lock service would be used to bracket operations such as * relation {@link #create()} and {@link #destroy()} and would be used * to prevent those operations while a lease is held by concurrent * processes with a shared lock. *

* Add ctor flag to create iff not found? *

* There needs to be a lock protocol for subclasses so that they can * ensure that they are the only task running create (across the * federation) and so that they can release the lock when they are * done. The lock can be per the notes above, but the protocol with * the subclass will require some coordinating methods. *

* Full transactions are another way to solve this problem. */ @Override public void create() { if (log.isInfoEnabled()) log.info(toString()); /* * Convert the Properties to a Map. */ final Map map = GlobalRowStoreUtil.convert(properties); // Write the map on the row store. final Map afterMap = indexManager.getGlobalRowStore() .write(RelationSchema.INSTANCE, map); if(log.isDebugEnabled()) { log.debug("Properties after write: " + afterMap); } /* * Add this instance to the locator cache. * * Note: Normally, the instances are created by the locator cache * itself. In general the only time the application creates an instance * directly is when it is going to attempt to create the relation. This * takes advantage of that pattern to notify the locator that it should * cache this instance. */ ((DefaultResourceLocator) getIndexManager().getResourceLocator()) .putInstance(this); } @Override public void destroy() { if (log.isInfoEnabled()) log.info(toString()); /* * Destroy all indices spanned by the namespace for this relation. */ { final Iterator itr = indexManager.indexNameScan(namespace + ".", timestamp); while (itr.hasNext()) { final String name = itr.next(); try { indexManager.dropIndex(name); } catch (NoSuchIndexException ex) { // If the index does not exist, keep on going. log.warn("Ignored: " + ex); } } } /* * Destroy any caches associated with this relation. */ { if (indexManager instanceof IJournal || indexManager instanceof IBigdataFederation) { /* * Note: No cache associated with a TemporaryStore (since no * QueryEngine is associated with a TemporaryStore). * * Note: If the cache is remote, then we need to create the * QueryEngine instance on demand so we can access the cache. * Hence getQueryController() and not getExistingQueryEngine(). * * TODO This could wind up using the wrong index manager if the * destroy() is executed by an AbstractTask (neither the Journal * nor the JiniFederation). */ final QueryEngine queryEngine = QueryEngineFactory.getInstance() .getQueryController(indexManager); /* * Connect to the cache provider. * * Note: This will create the cache if it does not exist. At all * other places in the code we use getExistingSparqlCache() to * access the cache IFF it exists. Here is where we create it. */ final ICacheConnection cacheConn = CacheConnectionFactory .getCacheConnection(queryEngine); if (cacheConn != null) { cacheConn.destroyCaches(namespace, timestamp); } } } // Delete the entry for this relation from the row store. indexManager.getGlobalRowStore().delete(RelationSchema.INSTANCE, namespace); // Clear the entry from the resource locator cache. indexManager.getResourceLocator().discard(this, true/* destroyed */); } /** * Acquires an exclusive lock for the {@link #getNamespace()}. * * @return the lock. * * @throws RuntimeException * if anything goes wrong. * * @see IResourceLockService */ protected IResourceLock acquireExclusiveLock() { return indexManager.getResourceLockService().acquireLock(getNamespace()); } /** * Release the lock. * * @param resourceLock * The lock. */ protected void unlock(final IResourceLock resourceLock) { resourceLock.unlock(); } /** * Resolve the property value using the {@link IIndexManager}, the * namespace of the resource, and the {@link Properties} instance to be * tested as hidden parameters. * * @param globalName * The global property name. * @param defaultValue * The default. * * @return The resolved property value. * * @see Configuration */ public String getProperty(final String localName, final String defaultValue) { return Configuration.getProperty(indexManager, properties, namespace, localName, defaultValue); } /** * Resolves, parses, and validates the property value. * * @param name * The property name. * @param defaultValue * The default value. * @return */ public T getProperty(final String name, final String defaultValue, final IValidator validator) { return Configuration.getProperty(indexManager, properties, namespace, name, defaultValue, validator); } // /** // * Sets the property on the underlying properties object but DOES NOT set // * the property on the global row store (GRS). This method may be used when // * a resource is newly created in order to cache objects which are persisted // * on the GRS. // * // * @param name // * The property name. // * @param value // * The property value. // */ // protected void setProperty(final String name, final Object value) { // // properties.put(name, value); // // } public boolean isReadOnly() { return TimestampUtility.isReadOnly(getTimestamp()); } final protected void assertWritable() { if(isReadOnly()) { throw new IllegalStateException("READ_ONLY"); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy