openllet.shared.hash.SharedObjectFactory Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of openllet-functions Show documentation
Openllet management of abstracts functions
There is a newer version: 2.6.5
/*
 * Copyright (c) 2003-2007, CWI and INRIA All rights reserved. Redistribution and use in source and
 * binary forms, with or without modification, are permitted provided that the following conditions
 * are met: * Redistributions of source code must retain the above copyright notice, this list of
 * conditions and the following disclaimer. * Redistributions in binary form must reproduce the
 * above copyright notice, this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution. * Neither the name of the University of
 * California, Berkeley nor the names of its contributors may be used to endorse or promote products
 * derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED
 * BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
 * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package openllet.shared.hash;

import java.lang.ref.WeakReference;

import openllet.atom.OpenError;

/**
 * The SharedObjectsFactory is a 'weak' constant pool for uniquely represented objects.
 *
 * This class is fully thread-safe, but tries to avoid (contended) locking as much as is
 * reasonably achievable. As a result this implementation should scale fairly well on
 * multi-core / processor systems; while also limiting synchronization overhead.
 *
 * WARNING: Do not edit this class unless you fully understand both Java's memory and threading model
 * and how the garbage collector(s) work. (You'll almost certainly 'break' something otherwise).
 * The JMM spec (previously known as JSR-133) can be found here: http://java.sun.com/docs/books/jls/third_edition/html memory.html
 * Even experts should be cautious. Every line of code is in a certain place for a reason. Modifying
 * anything may break thread-safety and / or can have a serious impact on performance.
 *
 * @author Arnold Lankamp
 */
public class SharedObjectFactory
{
	private final static int DEFAULT_NR_OF_SEGMENTS_BITSIZE = 5;

	private final Segment[] _segments;

	/**
	 * Default constructor.
	 */
	public SharedObjectFactory()
	{
		super();

		_segments = new Segment[1 << DEFAULT_NR_OF_SEGMENTS_BITSIZE];
		for (int i = _segments.length - 1; i >= 0; i--)
			_segments[i] = new Segment(i);
	}

	/**
	 * Removes stale entries from the set.
	 */
	public void cleanup()
	{
		for (int i = _segments.length - 1; i >= 0; i--)
		{
			final Segment segment = _segments[i];
			synchronized (segment)
			{
				segment.cleanup();
			}
		}
	}

	/**
	 * Returns statistics.
	 *
	 * @see java.lang.Object#toString()
	 */
	@Override
	public String toString()
	{
		final StringBuilder sb = new StringBuilder();
		final int nrOfSegments = _segments.length;
		for (int i = 0; i < nrOfSegments; i++)
		{
			final int startHash = i << Segment.MAX_SEGMENT_BITSIZE;
			final int endHash = (i + 1 << Segment.MAX_SEGMENT_BITSIZE) - 1;

			sb.append("Segment hash range: ");
			sb.append(startHash);
			sb.append(" till ");
			sb.append(endHash);
			sb.append(" | ");
			sb.append(_segments[i].toString());
			sb.append("\n");
		}
		return sb.toString();
	}

	/**
	 * Finds or creates the unique version of the given openllet.shared.hash object prototype.
	 *
	 * @param prototype of the openllet.shared.hash object we want the unique reference too.
	 * @return The reference to the unique openllet.shared.hash object associated with the argument.
	 */
	public SharedObject build(final SharedObject prototype)
	{
		final int hash = prototype.hashCode();
		final int segmentNr = hash >>> 32 - DEFAULT_NR_OF_SEGMENTS_BITSIZE;

		return _segments[segmentNr].get(prototype, hash);
	}

	/**
	 * Checks if the given openllet.shared.hash object is present in this factory.
	 *
	 * @param object is the openllet.shared.hash object.
	 * @return True if this factory contains the given openllet.shared.hash object; false otherwise.
	 */
	public boolean contains(final SharedObject object)
	{
		final int hash = object.hashCode();
		final int segmentNr = hash >>> 32 - DEFAULT_NR_OF_SEGMENTS_BITSIZE;

		return _segments[segmentNr].contains(object, hash);
	}

	/**
	 * A segment is a hashtable that represents a certain part of the 'hashset'.
	 */
	private final static class Segment
	{
		private final static int MAX_SEGMENT_BITSIZE = 32 - DEFAULT_NR_OF_SEGMENTS_BITSIZE;
		private final static int DEFAULT_SEGMENT_BITSIZE = 5;
		private final static float DEFAULT_LOAD_FACTOR = 2f;

		private volatile Entry[] _entries;

		private int _bitSize;

		private int _threshold;
		private int _load;

		public volatile boolean _flaggedForCleanup;
		public volatile WeakReference _garbageCollectionDetector;
		private int _cleanupScaler;
		private int _cleanupThreshold;

		private final int _segmentID;

		private int _numberOfFreeIDs;
		private int[] _freeIDs;
		private int _freeIDsIndex;
		private int _nextFreeID;
		private final int _maxFreeIDPlusOne;

		/**
		 * Constructor.
		 *
		 * @param segmentID The number identifying this segment.
		 */
		public Segment(final int segmentID)
		{
			super();

			_segmentID = segmentID;

			_bitSize = DEFAULT_SEGMENT_BITSIZE;
			final int nrOfEntries = 1 << _bitSize;

			_entries = new Entry[nrOfEntries];

			_threshold = (int) (nrOfEntries * DEFAULT_LOAD_FACTOR);
			_load = 0;

			_flaggedForCleanup = false;
			_garbageCollectionDetector = new WeakReference<>(new GarbageCollectionDetector(this)); // Allocate a (unreachable) GC detector.
			_cleanupScaler = 50; // Init as 50% average cleanup percentage, to make sure the cleanup can and will be executed the first time.
			_cleanupThreshold = _cleanupScaler;

			_numberOfFreeIDs = 1 << _bitSize;
			_freeIDs = new int[_numberOfFreeIDs];
			_freeIDsIndex = 0;
			_nextFreeID = segmentID << MAX_SEGMENT_BITSIZE;
			_maxFreeIDPlusOne = segmentID + 1 << MAX_SEGMENT_BITSIZE;
		}

		/**
		 * Removes entries, who's value have been garbage collected, from this segment.
		 */
		public void cleanup()
		{
			final Entry[] table = _entries;
			int newLoad = _load;

			for (int i = _entries.length - 1; i >= 0; i--)
			{
				Entry e = table[i];
				if (e != null)
				{
					Entry previous = null;
					do
					{
						final Entry next = e._next;

						if (e.get() == null)
						{
							if (previous == null)
								table[i] = next;
							else
								previous._next = next;

							newLoad--;

							if (e instanceof EntryWithID)
							{
								final EntryWithID ewid = (EntryWithID) e;
								releaseID(ewid._id);
							}
						}
						else
							previous = e;

						e = next;
					} while (e != null);
				}
			}

			_load = newLoad;

			_entries = table; // Create happens-before edge, to ensure any changes become visible.
		}

		/**
		 * Rehashes the segment. The entries in the bucket will remain in the same order, so there are
		 * only 'young -> old' references and not the other way around. This will reduce 'minor'
		 * garbage collection times.
		 */
		private void rehash()
		{
			final int nrOfEntries = 1 << ++_bitSize;
			final int newHashMask = nrOfEntries - 1;

			final Entry[] oldEntries = _entries;
			final Entry[] newEntries = new Entry[nrOfEntries];

			// Construct temporary entries that function as roots for the entries that remain in the current bucket
			// and those that are being shifted.
			final Entry currentEntryRoot = new Entry(null, null, 0);
			final Entry shiftedEntryRoot = new Entry(null, null, 0);

			int newLoad = _load;
			final int oldSize = oldEntries.length;
			for (int i = oldSize - 1; i >= 0; i--)
			{
				Entry e = oldEntries[i];
				if (e != null)
				{
					Entry lastCurrentEntry = currentEntryRoot;
					Entry lastShiftedEntry = shiftedEntryRoot;
					do
					{
						if (e.get() != null)
						{ // Cleared entries should not be copied.
							final int position = e._hash & newHashMask;

							if (position == i)
							{
								lastCurrentEntry._next = e;
								lastCurrentEntry = e;
							}
							else
							{
								lastShiftedEntry._next = e;
								lastShiftedEntry = e;
							}
						}
						else
						{
							newLoad--;

							if (e instanceof EntryWithID)
							{
								final EntryWithID ewid = (EntryWithID) e;
								releaseID(ewid._id);
							}
						}

						e = e._next;
					} while (e != null);

					// Set the next pointers of the last entries in the buckets to null.
					lastCurrentEntry._next = null;
					lastShiftedEntry._next = null;

					newEntries[i] = currentEntryRoot._next;
					newEntries[i | oldSize] = shiftedEntryRoot._next; // The entries got shifted by the size of the old table.
				}
			}

			_load = newLoad;

			_threshold <<= 1;
			_entries = newEntries; // Volatile write. Creates happens-before edge with the above changes.
		}

		/**
		 * Ensures the load in this segment will not exceed a certain threshold.
		 */
		private void ensureCapacity()
		{
			// Rehash if the load exceeds the threshold,
			// unless the segment is already stretched to it's maximum (since that would be a useless thing to do).
			if (_load > _threshold && _bitSize < MAX_SEGMENT_BITSIZE)
				rehash();
		}

		/**
		 * Attempts to run a cleanup if the garbage collector ran before the invocation of this function.
		 * This ensures that, in most cases, the buckets will contain no cleared entries. By doing this we
		 * speed up lookups significantly. Note that we will automatically throttle the frequency of the cleanups;
		 * in case we hardly every collect anything (either because there is no garbage or collections occur
		 * very frequently) it will be slowed down to as little as once per four garbage collections. When a lot
		 * of entries are being cleared the cleanup will run after every collection. Using this strategy
		 * ensures us that we clean the segment exactly when it is needed and possible.
		 */
		private void tryCleanup()
		{
			if (_flaggedForCleanup)
				synchronized (this)
				{
					if (_garbageCollectionDetector == null)
					{ // Yes, in Java DCL works on volatiles.
						_flaggedForCleanup = false;
						if (_cleanupThreshold > 8)
						{ // The 'magic' number 8 is chosen, so the cleanup will be done at least once after every four garbage collections.
							final int oldLoad = _load;

							cleanup();

							int cleanupPercentate;
							if (oldLoad == 0)
								cleanupPercentate = 50; // This prevents division by zero errors in case the table is still empty (keep the cleanup percentage that 50% in this case).
							else
								cleanupPercentate = 100 - _load * 100 / oldLoad; // Calculate the percentage of entries that has been cleaned.
							_cleanupScaler = _cleanupScaler * 25 + cleanupPercentate * 7 >> 5; // Modify the scaler, depending on the history (weight = 25) and how much we cleaned up this time (weight = 7).
							if (_cleanupScaler > 0)
								_cleanupThreshold = _cleanupScaler;
							else
								_cleanupThreshold = 1; // If the scaler value became 0 (when we hardly every collect something), set the threshold to 1, so we only skip the next three garbage collections.
						}
						else
							_cleanupThreshold <<= 1;

						_garbageCollectionDetector = new WeakReference<>(new GarbageCollectionDetector(this)); // Allocate a new (unreachable) GC detector.
					}
				}
		}

		/**
		 * Inserts the given openllet.shared.hash object into the set.
		 *
		 * @param object The openllet.shared.hash object to insert.
		 * @param hash The hash the corresponds to the given openllet.shared.hash object.
		 */
		private void put(final SharedObject object, final int hash)
		{
			final Entry[] table = _entries;
			final int hashMask = table.length - 1;
			final int position = hash & hashMask;

			final Entry next = table[position];
			// Assign a unique id if needed.
			if (object instanceof SharedObjectWithID)
			{
				final SharedObjectWithID sharedObjectWithID = (SharedObjectWithID) object;
				final int id = generateID();
				sharedObjectWithID.setUniqueIdentifier(id);

				table[position] = new EntryWithID(next, sharedObjectWithID, hash, id);
			}
			else
				table[position] = new Entry(next, object, hash);

			_load++;

			_entries = table; // Create a happens-before edge for the added entry, to ensure visibility.
		}

		/**
		 * Check if the given openllet.shared.hash object is present in this segment.
		 * NOTE: This method contains some duplicate code for efficiency reasons.
		 *
		 * @param prototype The openllet.shared.hash object.
		 * @param hash The hash associated with the given openllet.shared.hash object.
		 * @return True if this segment contains the given openllet.shared.hash object; false otherwise.
		 */
		public boolean contains(final SharedObject prototype, final int hash)
		{
			Entry[] currentEntries = _entries;
			int hashMask = currentEntries.length - 1;

			// Find the object (lock free).
			int position = hash & hashMask;
			Entry e = currentEntries[position];
			if (e != null)
				do
				{
					if (e.get() == prototype)
						return true;
					e = e._next;
				} while (e != null);

			synchronized (this)
			{
				currentEntries = _entries;
				hashMask = currentEntries.length - 1;

				// Try again while holding the global lock for this segment.
				position = hash & hashMask;
				e = currentEntries[position];
				if (e != null)
					do
					{
						if (e.get() == prototype)
							return true;
						e = e._next;
					} while (e != null);
			}
			return false;
		}

		/**
		 * Returns a reference to the unique version of the given openllet.shared.hash object prototype.
		 * NOTE: This method contains some duplicate code for efficiency reasons.
		 *
		 * @param prototype A prototype matching the openllet.shared.hash object we want a reference to.
		 * @param hash The hash associated with the given openllet.shared.hash object prototype.
		 * @return The reference to the unique version of the openllet.shared.hash object.
		 */
		public final SharedObject get(final SharedObject prototype, final int hash)
		{
			// Cleanup if necessary.
			tryCleanup();

			Entry[] currentEntries = _entries;
			int hashMask = currentEntries.length - 1;

			// Find the object (lock free).
			int position = hash & hashMask;
			Entry e = currentEntries[position];
			if (e != null)
				do
				{
					if (hash == e._hash)
					{
						final SharedObject object = e.get();
						if (object != null)
							if (prototype.equivalent(object))
								return object;
					}
					e = e._next;
				} while (e != null);

			synchronized (this)
			{
				// Try again while holding the global lock for this segment.
				currentEntries = _entries;
				hashMask = currentEntries.length - 1;
				position = hash & hashMask;
				e = currentEntries[position];
				if (e != null)
					do
					{
						if (hash == e._hash)
						{
							final SharedObject object = e.get();
							if (object != null && prototype.equivalent(object))
								return object;
						}
						e = e._next;
					} while (e != null);

				// If we still can't find it, add it.
				ensureCapacity();
				final SharedObject result = prototype.duplicate();
				put(result, hash);
				return result;
			}
		}

		/**
		 * Generates a unique identifier.
		 *
		 * @return A unique identifier.
		 */
		private int generateID()
		{
			if (_freeIDsIndex > 0)
			{// Half the size of the freeIDs array when it is empty for three quarters.
				if (_freeIDsIndex < _numberOfFreeIDs >> 2 && _numberOfFreeIDs > 32)
				{
					final int newNumberOfFreeIDs = _numberOfFreeIDs >> 1;
					final int[] newFreeIds = new int[newNumberOfFreeIDs];
					System.arraycopy(_freeIDs, 0, newFreeIds, 0, newNumberOfFreeIDs);
					_freeIDs = newFreeIds;
					_numberOfFreeIDs = newNumberOfFreeIDs;
				}
				return _freeIDs[--_freeIDsIndex];
			}

			if (_nextFreeID != _maxFreeIDPlusOne)
				return _nextFreeID++;

			// We ran out of id's.
			cleanup(); // In a last desperate attempt, try to do a cleanup to free up ids.
			if (_freeIDsIndex > 0)
				return _freeIDs[--_freeIDsIndex];

			// If we still can't get a free id throw an exception.
			throw new OpenError("No more unique identifiers available for segment(" + _segmentID + ").");
		}

		/**
		 * Releases the given unique identifier, so it can be reused.
		 *
		 * @param id
		 *            The identifier to release.
		 */
		private void releaseID(final int id)
		{
			if (_freeIDsIndex == _numberOfFreeIDs)
			{// Double the size of the freeIDs array when it is full.
				final int newNumberOfFreeIDs = _numberOfFreeIDs << 1;
				final int[] newFreeIds = new int[newNumberOfFreeIDs];
				System.arraycopy(_freeIDs, 0, newFreeIds, 0, _numberOfFreeIDs);
				_freeIDs = newFreeIds;
				_numberOfFreeIDs = newNumberOfFreeIDs;
			}

			_freeIDs[_freeIDsIndex++] = id;
		}

		/**
		 * Returns statistics.
		 *
		 * @see java.lang.Object#toString()
		 */
		@Override
		public String toString()
		{
			final StringBuilder sb = new StringBuilder();

			synchronized (this)
			{
				final Entry[] table = _entries;

				final int tableSize = table.length;

				sb.append("Table size: ");
				sb.append(tableSize);
				sb.append(", ");

				sb.append("Number of entries: ");
				sb.append(_load);
				sb.append(", ");

				sb.append("Threshold: ");
				sb.append(_threshold);
				sb.append(", ");

				int nrOfFilledBuckets = 0;
				int totalNrOfCollisions = 0;
				int maxBucketLength = 0;
				for (int i = 0; i < tableSize; i++)
				{
					Entry e = table[i];
					if (e != null)
					{
						nrOfFilledBuckets++;
						int bucketLength = 1;
						while ((e = e._next) != null)
							bucketLength++;
						if (bucketLength > maxBucketLength)
							maxBucketLength = bucketLength;
						totalNrOfCollisions += bucketLength - 1;
					}
				}
				// Do some voodoo to round the results on a certain amount of decimals (3 and 1 respectively); or at least try to do so ....
				double averageBucketLength = 0;
				double distribution = 100;
				if (nrOfFilledBuckets != 0)
				{
					averageBucketLength = (double) (totalNrOfCollisions * 1000 / nrOfFilledBuckets) / 1000 + 1;
					distribution = 100 - (double) (totalNrOfCollisions * 1000 / nrOfFilledBuckets / DEFAULT_LOAD_FACTOR) / 10;
				}

				sb.append("Number of filled buckets: ");
				sb.append(nrOfFilledBuckets);
				sb.append(", ");

				sb.append("Load factor: ");
				sb.append(DEFAULT_LOAD_FACTOR);
				sb.append(", ");

				sb.append("Distribution (collisions vs filled buckets): "); // Total number of collisions vs number of filled buckets.
				sb.append(distribution);
				sb.append("%, ");

				sb.append("Total number of collisions: ");
				sb.append(totalNrOfCollisions);
				sb.append(", ");

				sb.append("Average (filled) bucket length: ");
				sb.append(averageBucketLength);
				sb.append(", ");

				sb.append("Maximal bucket length: ");
				sb.append(maxBucketLength);
				sb.append(", ");

				sb.append("Cleanup scaler: ");
				sb.append(_cleanupScaler);
				sb.append("%");
			}

			return sb.toString();
		}

		/**
		 * An object that can be used to detect when a garbage collection has been executed.
		 * Instances of this object must be made weakly reachable for this to work.
		 *
		 * @author Arnold Lankamp
		 */
		private static class GarbageCollectionDetector
		{
			private final Segment _segment;

			/**
			 * Constructor.
			 *
			 * @param segment
			 *            The segment that we need to flag for cleanup after a garbage collection occurred.
			 */
			public GarbageCollectionDetector(final Segment segment)
			{
				_segment = segment;
			}

			/**
			 * Executed after the garbage collector detects that this object is eligible for reclamation.
			 * When this happens it will flag the associated segment for cleanup.
			 *
			 * @see java.lang.Object#finalize
			 */
			@Override
			public void finalize()
			{
				_segment._garbageCollectionDetector = null;
				_segment._flaggedForCleanup = true;
			}
		}

		/**
		 * A bucket entry for a openllet.shared.hash object.
		 *
		 * @author Arnold Lankamp
		 */
		private static class Entry extends WeakReference
		{
			public final int _hash;
			public volatile Entry _next; // This field is not final because we need to change it during cleanup and while rehashing.

			/**
			 * Constructor.
			 *
			 * @param next The next entry in the bucket.
			 * @param sharedObject The openllet.shared.hash object.
			 * @param hash The hash that is associated with the given openllet.shared.hash object.
			 */
			public Entry(final Entry next, final SharedObject sharedObject, final int hash)
			{
				super(sharedObject);

				_next = next;
				_hash = hash;
			}
		}

		/**
		 * A bucket entry for a openllet.shared.hash object with a unique identifier.
		 *
		 * @author Arnold Lankamp
		 */
		private static class EntryWithID extends Entry
		{
			public final int _id;

			/**
			 * Constructor.
			 *
			 * @param next The next entry in the bucket.
			 * @param sharedObjectWithID The openllet.shared.hash object.
			 * @param hash The hash that is associated with the given openllet.shared.hash object.
			 * @param id The unique identifier.
			 */
			public EntryWithID(final Entry next, final SharedObjectWithID sharedObjectWithID, final int hash, final int id)
			{
				super(next, sharedObjectWithID, hash);

				_id = id;
			}
		}
	}
}