All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.gom.om.ObjectMgrModel Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Mar 19, 2012
 */
package com.bigdata.gom.om;

import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.log4j.Logger;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.query.BindingSet;

import com.bigdata.cache.ConcurrentWeakValueCache;
import com.bigdata.gom.gpo.GPO;
import com.bigdata.gom.gpo.IGPO;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.model.BigdataBNode;
import com.bigdata.rdf.model.BigdataURI;
import com.bigdata.rdf.model.BigdataValueFactory;

import cutthecrap.utils.striterators.ICloseableIterator;

/**
 * Base class for {@link IObjectManager} implementations. This class handles
 * {@link IObjectManager} protocol for maintaining an transaction edit list.
 * Concrete implementations need to provide for communication with the database
 * (either remote or embedded) and the DESCRIBE (aka Object) cache.
 * 
 * @author Martyn
 *         Cutcher
 * @author Bryan Thompson
 */
public abstract class ObjectMgrModel implements IObjectManager {

    private static final Logger log = Logger.getLogger(ObjectMgrModel.class);

    /**
     * The {@link UUID} for this object manager instance.
     */
    private final UUID m_uuid;
    
    protected final BigdataValueFactory m_valueFactory;
    
    /** Object Creation and ID Management patterns. */
    private final IIDGenerator m_idGenerator;

    /**
     * The "running object table." Dirty objects are wired into this table by
     * the existence of a hard reference in the {@link #m_dirtyGPOs} list. The
     * keys are either {@link Resource}s or {@link Statement}s.
     */
    private final ConcurrentWeakValueCache m_dict;
	
    /**
     * This is only for the predicates and provides the guarantee that we can
     * reference test on predicates within the scope of a given object manager.
     */
    private final ConcurrentHashMap m_internedKeys = new ConcurrentHashMap();

    /**
     * We need to maintain a dirty list in order to pin object references that
     * are dirty. On commit, we need to send the retracts and the asserts in a
     * single operation. The GPO.GPOEntry tracks those individual asserts and
     * retracts.
     */
    // private final List m_dirtyGPOs = new LinkedList();
    // Sample code indicates that an ArrayList is less overhead than a LinkedList
    private final List m_dirtyGPOs = new ArrayList();

	private final URI s_nmeMgr;

    /**
     * A lock for things which need to be serialized, initially just the native
     * transaction stuff. Avoid using "synchronized(this)" or the synchronized
     * keyword as that forces everything to contend for the same lock. If you
     * can use different locks for different types of things then you have
     * better concurrency (but, of course, only as appropriate).
     */
    private final Lock lock = new ReentrantLock();
    
    /**
     * Default to maximum dirty list size to lock out any incremental flushing.
     * 

* Note: Incremental eviction breaks the ACID contract for updates. Thus, * the dirty list should not be limited in capacity. */ protected int m_maxDirtyListSize = Integer.MAX_VALUE; /** * The native transaction counter. */ private int m_transactionCounter = 0; /** * * @param endpoint * The SPARQL endpoint that can be used to communicate with the * database. * @param valueFactory * The value factory. */ public ObjectMgrModel( final String endpoint, final BigdataValueFactory valueFactory) { m_valueFactory = valueFactory; m_uuid = UUID.randomUUID(); m_idGenerator = new IDGenerator(endpoint, m_uuid, m_valueFactory); /* * FIXME UUIG COINING. Plus this needs to be global if we have a * "name manager" object. Frankly, I do not see any reason to have * "named roots" in RDF GOM. Any URI can be a named root - you just need * to use the URI! */ s_nmeMgr = m_valueFactory.createURI("gpo:nmeMgr/"+m_uuid); /* * Note: This sets the hard reference queue capacity. */ m_dict = new ConcurrentWeakValueCache(1000/* queueCapacity */); } public IGPO getDefaultNameMgr() { return getGPO(s_nmeMgr); } public UUID getID() { return m_uuid; } @Override final public BigdataValueFactory getValueFactory() { return m_valueFactory; } /** * Intern a predicate (internal API). This provides the guarantee that we * can use reference tests (==) for URIs within the scope of a * given object manager. * * @param key * The predicate. * * @return The interned version of the predicate. */ public BigdataURI internKey(final URI aKey) { // Ensure URI is for the namespace associated with this OM. final BigdataURI key = m_valueFactory.asValue(aKey); // Internal the URI. final BigdataURI old = m_internedKeys.putIfAbsent(key, key); // Resolve data race. final BigdataURI uri = old != null ? old : key; return uri; } /** * Make a best effort attempt to use the {@link Resource} associated with an * {@link IGPO} in the running object table * * @param t * Some identifier. * * @return Either the same reference or one that will be canonical as long * as that {@link IGPO} remains pinned in the running object table. */ public T bestEffortIntern(final T t) { if (t instanceof Resource) { final IGPO gpo = m_dict.get(t); if (gpo == null) return t; return (T) gpo.getId(); } return t; } /** * GPOs are added to the dirty list when initially modified. The dirty list * is not bounded. Large updates should be done using the RDF and SPARQL * layer which do not have this implicit scaling limit. *

* Note: We can not do incremental eviction unless we are holding open a * connection to the database that will isolate those edits. This could be * done in principle with either full read/write transactions or with the * unisolated connection (if embedded) but we do not yet support remove * create/run/commit for full read/write transactions in the NSS REST API. * The problem with holding the unisolated connection across incremental * updates is that it will lock out any other updates against the backing * store for the life cycle of the object manager. */ public void addToDirtyList(final GPO gpo) { if(gpo == null) throw new IllegalArgumentException(); if (!gpo.isDirty()) throw new IllegalStateException(); m_dirtyGPOs.add(gpo); if (m_dirtyGPOs.size() > m_maxDirtyListSize) { if (log.isTraceEnabled()) log.trace("Incremental flush of dirty objects"); flushDirtyObjects(); } } /** * * @return size of dirty list */ public int getDirtyObjectCount() { return m_dirtyGPOs.size(); } // abstract void flushTerms(); /** * Commit. */ private void flushDirtyObjects() { // // prepare values // final Iterator newValues = m_dirtyGPOs.iterator(); // while (newValues.hasNext()) { // final GPO gpo = newValues.next(); // gpo.prepareBatchTerms(); // } // // // flush terms // flushTerms(); final long start = System.currentTimeMillis(); final long count = m_dirtyGPOs.size(); { /* * Gather up and apply the edit set (statements added and removed). */ final List inserts = new LinkedList(); final List removes = new LinkedList(); final Iterator updates = m_dirtyGPOs.iterator(); while (updates.hasNext()) { updates.next().prepareBatchUpdate(inserts, removes); } // Atomic commit. flushStatements(inserts, removes); } { /* * Tell the dirty objects that they have been committed and are now * clean. */ final Iterator updates = m_dirtyGPOs.iterator(); while (updates.hasNext()) { updates.next().doCommit(); } // Clear the dirty object list. m_dirtyGPOs.clear(); } if (log.isTraceEnabled()) log.trace("Flush took " + (System.currentTimeMillis() - start) + "ms for " + count + " objects"); } /** * Flush statements to be inserted and removed to the backing store.. * * @param insertList * The list of statements to be added. * @param removeList * The list of statements to be removed. */ abstract protected void flushStatements(final List insertList, final List removeList); @Override public IGPO getGPO(final Resource id) { IGPO ret = m_dict.get(id); if (ret == null) { final IGPO tmp = m_dict.putIfAbsent(id, ret = new GPO(this, id)); if (tmp != null) { // Lost the data race. ret = tmp; } } return ret; } /** * {@inheritDoc} * * FIXME This is using the {@link String} representation of the * {@link Statement} as the blank node ID. It needs to work with the stable * {@link IV}s as assigned by the lexicon. However, we need to ensure that * the {@link IV}s are being reported through to the object manager in the * various interchange formats that it uses and work through how we will * provide that information in the SELECT query as well as CONSTRUCT and * DESCRIBE (basically, we need to conneg for a MIME Type that supports it). */ public IGPO getGPO(final Statement stmt) { final BigdataBNode id = m_valueFactory.createBNode(stmt.toString()); // Flag indicating that this GPO is a Statement. id.setStatementIdentifier(true); IGPO ret = m_dict.get(id); if (ret == null) { final IGPO tmp = m_dict.putIfAbsent(id, ret = new GPO(this, id, stmt)); if (tmp != null) { // Lost the data race. ret = tmp; } } return ret; } public Iterator> getGPOs() { return m_dict.iterator(); } @Override public void materialize(final IGPO gpo) { if (gpo == null) throw new IllegalArgumentException(); if (log.isTraceEnabled()) log.trace("Materializing: " + gpo.getId()); ((GPO) gpo).dematerialize(); if (true) { materializeWithDescribe(gpo); } else { materializeWithSelect(gpo); } } long m_materialized = 0; protected void materializeWithDescribe(final IGPO gpo) { final String query = "DESCRIBE <" + gpo.getId().toString() + ">"; initGPO((GPO) gpo, evaluateGraph(query)); /** * */ m_materialized++; if (m_materialized % 10000 == 0) System.out.println("Materialized: " + m_materialized + ", dictionary: " + m_dict.size() + ", m_dirtyGPOs: " + m_dirtyGPOs.size()); } protected void materializeWithSelect(final IGPO gpo) { final String query = "SELECT ?p ?v WHERE {<" + gpo.getId().toString() + "> ?p ?v}"; final ICloseableIterator res = evaluate(query); while (res.hasNext()) { final BindingSet bs = res.next(); ((GPO) gpo).initValue((URI) bs.getValue("p"), bs.getValue("v")); } } public Map initGPOs(final ICloseableIterator itr) { return initGPO(null/* gpo */, itr); } /** * Initialize one or more {@link IGPO}s from a collection of statements. * * @param gpo * The gpo (optional). When given, only the specified * {@link IGPO} will be initialized. When not provided, all * {@link Resource}s in the subject and object position of the * visited {@link Statement}s will be resolved to {@link IGPO}s * and the corresponding properties and/or links initialized from * the {@link Statement}s. * @param stmts * The statements. * * @return A hard reference collection that will keep the any materialized * {@link IGPO}s from being finalized before the caller has a chance * to do something with them. */ protected Map initGPO(final GPO gpo, final ICloseableIterator stmts) { final Map map; if (gpo != null) { map = Collections.singletonMap((Resource) gpo.getId(), (IGPO) gpo); } else { map = new HashMap(); } try { final Resource id = gpo == null ? null : gpo.getId(); int statements = 0; while (stmts.hasNext()) { final Statement stmt = stmts.next(); final Resource subject = stmt.getSubject(); final URI predicate = stmt.getPredicate(); final Value value = stmt.getObject(); if (id != null) { /* * Initializing some specific gpo provided by the caller. */ if (subject.equals(id)) { // property or link out. gpo.initValue(predicate, value); } else { // links in - add to LinkSet gpo.initLinkValue(predicate, subject); } } else { /* * Initial GPOs for all resources visited. */ { final GPO tmp = (GPO) getGPO(subject); // property or link out. tmp.initValue(predicate, value); map.put(tmp.getId(), tmp); } if(value instanceof Resource) { final GPO tmp = (GPO) getGPO((Resource) value); // Link in. tmp.initLinkValue(predicate, subject); map.put(tmp.getId(), tmp); } } statements++; } if (log.isTraceEnabled()) log.trace("Materialized: " + (gpo == null ? "null" : gpo.getId()) + " with " + statements + " statements"); return map; } finally { stmts.close(); } } @Override public int beginNativeTransaction() { lock.lock(); try { return m_transactionCounter++; } finally { lock.unlock(); } } @Override public int commitNativeTransaction(final int expectedCounter) { lock.lock(); try { final int ret = --m_transactionCounter; if (ret != expectedCounter) { throw new IllegalArgumentException( "Unexpected transaction counter"); } if (ret == 0) { flushDirtyObjects(); // doCommit(); } return ret; } finally { lock.unlock(); } } // /** // * Hook for extended commit processing. // */ // protected abstract void doCommit(); @Override public int getNativeTransactionCounter() { /* * Note: You must obtain the lock for visibility of the current value * unless the transaction counter is either volatile or an * AtomicInteger. */ lock.lock(); try { return m_transactionCounter; } finally { lock.unlock(); } } @Override public void rollbackNativeTransaction() { lock.lock(); try { clearCache(); m_transactionCounter = 0; if (m_idGenerator != null) { m_idGenerator.rollback(); } // doRollback(); } finally { lock.unlock(); } } // /** // * Hook for extended rollback processing. // */ // abstract protected void doRollback(); @Override public IGPO createGPO() { final Resource uri = m_idGenerator.genId(); // addNewTerm((BigdataValue) uri); final GPO ret = (GPO) getGPO(uri); ret.setMaterialized(true); return ret; } @Override final public void remove(final IGPO gpo) { gpo.remove(); } /** * Simple save/recall interface that the ObjectManager provides to simplify * other pattern implementations. Internally it uses a NameManager GPO */ @Deprecated // no need for explicit save/recall. public void save(final URI key, Value value) { getGPO(s_nmeMgr).setValue(key, value); } /** * Simple save/recall interface that the ObjectManager provides to simplify * other pattern implementations. Internally it uses a NameManager GPO */ @Deprecated // no need for explicit recall. public Value recall(final URI key) { return getGPO(s_nmeMgr).getValue(key); } @Deprecated // no need for explicit recall. public IGPO recallAsGPO(final URI key) { final Value val = recall(key); if (val instanceof Resource) { return getGPO((Resource) val); } else { return null; } } /** * Return the list of names that have been used to save references. These * are the properties of the internal NameManager. */ public Iterator getNames() { final GPO nmgr = (GPO) getGPO(s_nmeMgr); return nmgr.getPropertyURIs(); } @Override public void close() { clearCache(); } final public void clearCache() { m_dict.clear(); m_dirtyGPOs.clear(); } /** * Encode a URL, Literal, or blank node for inclusion in a SPARQL query to * be sent to the remote service. * * @param v * The resource. * * @return The encoded representation of the resource. * * TODO This must correctly encode a URL, Literal, or blank node for * inclusion in a SPARQL query to be sent to the remote service. */ public String encode(final Resource v) { return v.stringValue(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy