com.bigdata.journal.Name2Addr Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.journal;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.log4j.Logger;
import com.bigdata.btree.BTree;
import com.bigdata.btree.Checkpoint;
import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.ICheckpointProtocol;
import com.bigdata.btree.IDirtyListener;
import com.bigdata.btree.IIndex;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.keys.DefaultKeyBuilderFactory;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.IKeyBuilderFactory;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.btree.keys.StrengthEnum;
import com.bigdata.btree.keys.SuccessorUtil;
import com.bigdata.cache.ConcurrentWeakValueCache;
import com.bigdata.cache.ConcurrentWeakValueCacheWithTimeout;
import com.bigdata.cache.LRUCache;
import com.bigdata.cache.WeakValueCache;
import com.bigdata.counters.CounterSet;
import com.bigdata.counters.ICounterSet;
import com.bigdata.io.DataInputBuffer;
import com.bigdata.mdi.LocalPartitionMetadata;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.resources.IndexManager;
import com.bigdata.resources.ResourceManager;
import com.bigdata.util.Bytes;
import com.bigdata.util.BytesUtil;
import com.bigdata.util.concurrent.ExecutionExceptions;
import cutthecrap.utils.striterators.IStriterator;
import cutthecrap.utils.striterators.Resolver;
import cutthecrap.utils.striterators.Striterator;
/**
*
* {@link Name2Addr} is a {@link BTree} mapping index names to an {@link Entry}
* containing the last {@link Checkpoint} record committed for the named index
* and the timestamp of that commit. The keys are Unicode strings using the
* default {@link Locale}. The {@link Entry}s in {@link Name2Addr} are the set
* of registered named indices for an {@link AbstractJournal}.
*
*
* The {@link AbstractJournal} maintains an instance of this class that evolves
* with each {@link AbstractJournal#commit()} and tracks the {@link Checkpoint}
* records of the registered {@link ITx#UNISOLATED} indices. However, the
* journal also makes use of historical states for the {@link Name2Addr} index
* in order to resolve the historical state of a named index. Of necessity, the
* {@link Name2Addr} objects used for this latter purpose MUST be distinct from
* the evolving instance otherwise the current version of the named index would
* be resolved. Note further that the historical {@link Name2Addr} states are
* accessed using a canonicalizing mapping but that current evolving
* {@link Name2Addr} instance is NOT part of that mapping.
*
*
* Concurrent reads are permitted against the historical {@link Name2Addr}
* objects since the {@link BTree} is thread-safe for read-only operations.
* Likewise, writes are only allowed on the {@link ITx#UNISOLATED}
* {@link Name2Addr} instance. Write access to the underlying
* {@link BTree} MUST be synchronized on the {@link ITx#UNISOLATED}
* {@link Name2Addr} instance since the {@link BTree} is NOT safe for concurrent
* writers. Further, read access to {@link ITx#UNISOLATED}
* {@link Name2Addr} object MUST be synchronized so as to not conflict with
* writes on that {@link BTree}. Therefore all write methods on this
* class are declared as synchronized but the caller MUST synchronize
* on {@link Name2Addr} if they are performing reads on the {@link ITx#UNISOLATED}
* {@link Name2Addr} instance. This allows readers on historical {@link Name2Addr}
* instances to have full concurrency.
*
*
* Note: {@link Name2Addr} by itself is NOT sufficient to handle commits with
* concurrent task execution, such as arises with the group commit protocol. The
* problem is concurrency in the data structure that keeps track of add/drop for
* named indices and also tracks which named indices are dirty. In order to account for
* tasks running concurrent with commit processing, {@link AbstractTask} isolates
* {@link Name2Addr} and makes the set of changes {registering indices, dropping
* indices, and updating the {@link Entry} in {@link Name2Addr} to reflect the
* current {@link Checkpoint} record for an index) an atomic state change that
* is performed IFF the task completes successfully and is synchronized on
* {@link Name2Addr} to prevent that happening concurrent with commit
* processing.
*
*/
public class Name2Addr extends BTree {
private static final Logger log = Logger.getLogger(Name2Addr.class);
/**
* Cache of added/retrieved btrees by _name_. This cache is ONLY used by the
* "live" {@link Name2Addr} instance.
*
* Map from the name of an index to a weak reference for the corresponding
* "live" version of the named index. Entries will be cleared from this map
* after they have become only weakly reachable. In order to prevent dirty
* indices from being cleared, we register an {@link IDirtyListener}. When
* it is informed that an index is dirty it places a hard reference to that
* index into the {@link #commitList}.
*
* Note: The capacity of the backing hard reference LRU effects how many
* _clean_ indices can be held in the cache. Dirty indices remain strongly
* reachable owing to their existence in the {@link #commitList}.
*/
private ConcurrentWeakValueCache indexCache = null;
/**
* Holds hard references for the dirty indices along with the index name.
* This collection prevents dirty indices from being cleared from the
* {@link #indexCache}, which would result in lost updates.
*
* Note: Operations on unisolated indices always occur on the "current"
* state of that index. The "current" state is either unchanged (following a
* successful commit) or rolled back to the last saved state (by an abort
* following an unsuccessful commit). Therefore all unisolated index write
* operations MUST complete before a commit and new unisolated operations
* MUST NOT begin until the commit has either succeeded or been rolled back.
* Failure to observe this constraint can result in new unisolated
* operations writing on indices that should have been rolled back if the
* commit is not successfull.
*/
private ConcurrentHashMap commitList = new ConcurrentHashMap();
/**
* An instance of this {@link DirtyListener} is registered with each named
* index that we administer to listen for events indicating that the index
* is dirty. When we get that event we stick the {@link DirtyListener} on
* the {@link #commitList}. This makes the commit protocol simpler since
* the {@link DirtyListener} has both the name of the index and the
* reference to the index and we need both on hand to do the commit.
*
* @author Bryan Thompson
*/
private class DirtyListener implements IDirtyListener, Comparable {
final String name;
final ICheckpointProtocol btree;
boolean needsCheckpoint;
long checkpointAddr = 0L;
@Override
public String toString() {
return "DirtyListener{name="
+ name
+ ","
+ (needsCheckpoint ? "needsCheckpoint" : "checkpointAddr="
+ checkpointAddr) + "}";
}
private DirtyListener(final String name,
final ICheckpointProtocol btree, final boolean needsCheckpoint) {
assert name!=null;
assert btree!=null;
this.name = name;
this.btree = btree;
this.needsCheckpoint = needsCheckpoint;
if(!needsCheckpoint) {
/*
* Grab the checkpointAddr for the object.
*/
try {
checkpointAddr = btree.getCheckpoint().getCheckpointAddr();
} catch(IllegalStateException ex) {
throw new RuntimeException(
"Checkpoint record not written: " + name);
}
}
}
/**
* Return the {@link Name2Addr} instance to which this listener is
* reporting.
*/
private Name2Addr getName2Addr() {
return Name2Addr.this;
}
/**
* Add this to the {@link Name2Addr#commitList} and set
* {@link #needsCheckpoint} to true
.
*
* @param btree
*/
@Override
public void dirtyEvent(final ICheckpointProtocol btree) {
assert btree == this.btree;
@SuppressWarnings("unused")
final boolean added;
synchronized(Name2Addr.this) {
final ICheckpointProtocol cached = indexCache.get(name);
if (cached == null) {
/*
* There is no index in the cache for this name. This can
* occur if someone is holding a reference to a mutable
* BTree and they write on it after a commit or abort.
*/
throw new RuntimeException("No index in cache: name="+name);
}
if (cached != btree) {
/*
* There is a different index in the cache for this name.
* This can occur if someone is holding a reference to a
* mutable BTree and they write on it after a commit or
* abort but the named index has already been re-loaded into
* the cache.
*/
throw new RuntimeException("Different index in cache: "+name);
}
/*
* Note: This MUST be synchronized to prevent loss of dirty
* notifications that arrive while a concurrent commit is in
* progress.
*/
added = commitList.putIfAbsent(name, this) != null;
needsCheckpoint = true;
checkpointAddr = 0L;
if(log.isInfoEnabled()) {
/*
* Note: The size of the commit list can appear to increment
* by more than one if there are concurrent writes on
* different indices (e.g., if this log message is written
* outside of the synchronized block).
*/
log.info("name=" + name + ", commitListSize="
+ commitList.size() + ", file="
+ getStore().getFile());
}
} // synchronized.
}
/**
* Puts instances into order by their {@link #name}
*/
public int compareTo(DirtyListener arg0) {
return name.compareTo(arg0.name);
}
}
/**
* Create a new instance.
*
* @param store
* The backing store.
*
* @return The new instance.
*/
static public Name2Addr create(final IRawStore store) {
final IndexMetadata metadata = new IndexMetadata(UUID.randomUUID());
metadata.setBTreeClassName(Name2Addr.class.getName());
/*
* TODO configure unicode sort key behavior explicitly?
*
* Note: This only applies to new Name2Addr objects. However, historical
* Name2Addr objects were created using the default collator for the
* platform. When the ICU library is available, that is the default.
*/
final Properties p = new Properties();
// p.setProperty(KeyBuilder.Options.COLLATOR, CollatorEnum.ASCII.name());
p.setProperty(KeyBuilder.Options.STRENGTH,
StrengthEnum.Identical.name());
metadata.setTupleSerializer(new Name2AddrTupleSerializer(
new DefaultKeyBuilderFactory(p)));
return (Name2Addr) BTree.create(store, metadata);
}
/**
* Load from the store (de-serialization constructor).
*
* @param store
* The backing store.
* @param checkpoint
* The {@link Checkpoint} record.
* @param metadata
* The metadata record for the index.
*/
public Name2Addr(final IRawStore store, final Checkpoint checkpoint,
final IndexMetadata metadata, final boolean readOnly) {
super(store, checkpoint, metadata, readOnly);
}
/**
* Many methods on this class will throw an {@link IllegalStateException}
* unless they are invoked on the {@link ITx#UNISOLATED} {@link Name2Addr}
* instance. This method is used to test that assertion for those methods.
*
* @throws IllegalStateException
* unless this is the {@link ITx#UNISOLATED} {@link Name2Addr}
* instance.
*/
final protected void assertUnisolatedInstance() {
if(indexCache == null) {
throw new IllegalStateException();
}
}
/**
* Return true
iff this is the {@link ITx#UNISOLATED}
* {@link Name2Addr} instance.
*/
final protected boolean isUnisolatedInstance() {
return indexCache != null;
}
/**
* Setup the {@link #indexCache}.
*
* Note: This cache is null
unless initialized and is ONLY
* used by the "live" version of the {@link Name2Addr} index. The only
* method that creates or loads the "live" {@link Name2Addr} index is
* {@link AbstractJournal#setupName2AddrBTree()}.
*
* @param cacheCapacity
* The capacity of the inner {@link LRUCache} for the
* {@link WeakValueCache}.
* @param cacheTimeout
* The timeout in milliseconds for stale entries in the cache.
*
* @see Options#LIVE_INDEX_CACHE_CAPACITY
* @see Options#LIVE_INDEX_CACHE_TIMEOUT
*/
protected void setupCache(final int cacheCapacity, final long cacheTimeout) {
if (indexCache != null) {
// Cache was already configured.
throw new IllegalStateException();
}
// indexCache = new WeakValueCache(
// new LRUCache(cacheCapacity));
indexCache = new ConcurrentWeakValueCacheWithTimeout(
cacheCapacity, TimeUnit.MILLISECONDS.toNanos(cacheTimeout));
}
/**
* An iterator that visits the entries in the internal {@link #indexCache}.
* You must test the weak reference for each entry in order to determine
* whether its value has been cleared as of the moment that you request that
* value.
*
* @throws IllegalStateException
* unless this is the {@link ITx#UNISOLATED} instance.
*/
private Iterator>> indexCacheEntryIterator() {
assertUnisolatedInstance();
return indexCache.entryIterator();
}
/**
* Return the approximate number of indices in the live index cache.
*
* @throws IllegalStateException
* unless this is the {@link ITx#UNISOLATED} instance.
*/
public int getIndexCacheSize() {
assertUnisolatedInstance();
return indexCache.size();
}
/**
* Return true
iff the named index is on the commit list.
*
* Note: This is synchronized even through the commitList is thread-safe in
* order to make the test atomic with respect to {@link #handleCommit(long)}.
*
* @param name
* The index name.
*
* @throws IllegalStateException
* unless this is the {@link ITx#UNISOLATED} instance.
*/
synchronized public boolean willCommit(final String name) {
assertUnisolatedInstance();
return commitList.containsKey(name);
}
/**
* Flush a dirty index to the disk. This is used to flush each dirty index
* in parallel, providing increased IO throughput and reduced latency during
* the commit.
*
* @author Bryan
* Thompson
* @see Flush indices in parallel during checkpoint to reduce IO
* latency
*/
private static class CommitIndexTask implements Callable {
private final DirtyListener l;
private final long commitTime;
private final AtomicLong checkpointAddr = new AtomicLong(0L);
/**
* Return the address of the {@link Checkpoint} record.
*/
public long getCheckpointAddr() {
return checkpointAddr.get();
}
/**
*
* @param l
* The {@link DirtyListener}.
* @param commitTime
* The commitTime associated with the commitPoint.
*/
public CommitIndexTask(final DirtyListener l, final long commitTime) {
if (l == null)
throw new IllegalArgumentException();
this.l = l;
this.commitTime = commitTime;
}
/**
* @return self
*/
@Override
public CommitIndexTask call() throws Exception {
if (log.isInfoEnabled())
log.info("Will commit: " + l.name);
final long checkpointAddr;
if (l.needsCheckpoint) {
/*
* Note: AbstractTask flags [needsCheckpoint := false] on the
* DirtyListener and handles the BTree checkpoint itself in
* order to avoid the possibility of a concurrent modification
* by this code during commit processing.
*/
try {
// checkpoint the index.
checkpointAddr = l.btree.handleCommit(commitTime);
// we just did the checkpoint.
l.needsCheckpoint = false;
} catch (Throwable t) {
// adds the name to the stack trace.
throw new RuntimeException("Could not commit index: name="
+ l.name, t);
}
} else {
/*
* Note: AbstractTask avoids concurrent modification of the
* BTree checkpoint record during a commit by synchronizing on
* Name2Addr.
*
* Note: The DirtyListener grabs the current checkpointAddr from
* the BTree when [needsCheckpoint := false]. This allows us to
* have asynchronous checkpoints of the BTree (by concurrent
* tasks) without causing the checkpoint address on the
* commitList to be advanced until the next atomic transfer of
* state from a completed AbstractTask down to Name2Addr.
* (Without this, the checkpointAddr on the BTree winds up
* updated concurrently but we only notice the problem when it
* happens to be 0L because the BTree is in the midst of
* updating its checkpoint!)
*/
// use last recorded checkpoint.
checkpointAddr = l.checkpointAddr;
if (checkpointAddr == 0L) {
throw new RuntimeException(
"Checkpoint address not written: name=" + l.name);
}
}
// set commitTime on the btree (transient field).
l.btree.setLastCommitTime(commitTime);
// publish the checkpoint address.
this.checkpointAddr.set(checkpointAddr);
// Done.
return this;
}
} // CommitIndexTask
/**
* Commit processing for named indices.
*
* This method applies the {@link #commitList} and then flushes the backing
* {@link ICheckpointProtocol} object to the store. The {@link #commitList}
* consists of {@link DirtyListener}s. If the listener has its
* {@link DirtyListener#needsCheckpoint} flag set, then the
* {@link ICheckpointProtocol} implementation to which that listener is
* attached will have its {@link BTree#writeCheckpoint() checkpoint written}
* . Otherwise the current {@link Checkpoint} address is recovered. Either
* way, the {@link Entry} in {@link Name2Addr}s backing {@link BTree} is
* updated to reflect the commitTime and {@link Checkpoint} address
* for the index.
*
* Finally {@link Name2Addr} {@link Checkpoint}s itself using
* {@link ICommitter#handleCommit(long)} and returns the address from which
* {@link Name2Addr} may be reloaded.
*
* Note: The {@link #commitList} MUST be protected against concurrent
* modification during the commit otherwise concurrent tasks could be
* reporting dirty objects while we are doing a commit and those notices
* would be lost. Persistence capable objects ({@link ICheckpointProtocol}
* implementations) get onto the {@link #commitList} via the
* {@link DirtyListener}, so it is also synchronized.
*
* Note: {@link Name2Addr} DOES NOT obtain a resource lock on the
* {@link ICheckpointProtocol} implementation. Therefore it MUST NOT
* checkpoint an index on which an {@link AbstractTask} has obtained a
* resource lock. Otherwise we have concurrent writers on the {@link BTree}
* and the {@link BTree} is not thread-safe for concurrent writers. Instead,
* the {@link AbstractTask} checkpoints the {@link BTree} itself while it is
* holding the resource lock and then sets
* {@link DirtyListener#needsCheckpoint} to false
using
* {@link #putOnCommitList(String, ICheckpointProtocol, boolean)} as an
* indication to {@link Name2Addr} that it MUST persist the current
* checkpointAddr for the {@link BTree} on its next commit (and MUST NOT
* write on the index when it does that commit).
*
* @see Flush indices in parallel during checkpoint to reduce IO
* latency
*/
@Override
synchronized
public long handleCommit(final long commitTime) {
assertUnisolatedInstance();
// snapshot the commit list
final DirtyListener[] a = commitList.values().toArray(new DirtyListener[] {});
// clear the commit list.
commitList.clear();
/*
* Place into sorted order as an aid to debugging when examining the
* commit list.
*
* Note: This also approximates the order of the generated keys for the
* indices which makes the operations on the underlying BTree somewhat
* more efficient as they are more or less in key order. (The order is
* only approximate since a Unicode collator determines the real order
* for the sort keys generated from the index names).
*/
Arrays.sort(a);
if (log.isInfoEnabled()) {
log.info("Store file="+getStore().getFile());
log.info("There are " + a.length + " dirty indices : "
+ Arrays.toString(a));
}
// for each entry in the snapshot of the commit list.
final List tasks = new ArrayList(a.length);
for (int i = 0; i < a.length; i++) {
final DirtyListener l = a[i];
if (log.isInfoEnabled())
log.info("Will commit: " + l.name);
tasks.add(new CommitIndexTask(l, commitTime));
}
/*
* Submit checkpoint tasks in parallel.
*
* Note: This relies on getStore() providing access to the IIndexManager
* interface.
*/
final List> futures;
try {
final ExecutorService executorService = ((IIndexManager) getStore())
.getExecutorService();
/*
* Invoke tasks.
*
* Note: Blocks until all tasks are done. Hence we do NOT have to
* cancel these Futures. If we obtain them, then they are already
* done.
*/
futures = executorService.invokeAll(tasks);
} catch (InterruptedException e) {
// Interrupted while awaiting checkpoint(s).
throw new RuntimeException(e);
}
// for each entry in the snapshot of the commit list.
final List causes = new LinkedList();
for (Future f : futures) {
try {
final CommitIndexTask task = f.get();
final DirtyListener l = task.l;
final long checkpointAddr = task.getCheckpointAddr();
// encode the index name as a key.
final byte[] key = getKey(l.name);
// lookup the current entry (if any) for that index.
final byte[] val = lookup(key);
// de-serialize iff entry was found.
final Entry oldEntry = (val == null ? null
: EntrySerializer.INSTANCE.deserialize(new DataInputBuffer(
val)));
/*
* Update if there is no existing entry or if the checkpointAddr has
* changed or if there was no commit time on the old entry.
*/
if (oldEntry == null || oldEntry.checkpointAddr != checkpointAddr
|| oldEntry.commitTime == 0L) {
final Entry entry = new Entry(l.name, checkpointAddr, commitTime);
// update persistent mapping.
insert(key, EntrySerializer.INSTANCE.serialize( entry ));
}
} catch (InterruptedException e) {
log.error("l.name: " + e, e);
causes.add(e);
} catch (ExecutionException e) {
log.error("l.name: " + e, e);
causes.add(e);
}
// if (l.needsCheckpoint) {
//
// /*
// * Note: AbstractTask flags [needsCheckpoint := false] on the
// * DirtyListener and handles the BTree checkpoint itself in
// * order to avoid the possibility of a concurrent modification
// * by this code during commit processing.
// */
//
// try {
//
// // checkpoint the index.
// checkpointAddr = l.btree.handleCommit(commitTime);
//
// // we just did the checkpoint.
// l.needsCheckpoint = false;
//
// } catch (Throwable t) {
//
// // adds the name to the stack trace.
// throw new RuntimeException("Could not commit index: name="
// + l.name + ", commitList=" + Arrays.toString(a), t);
//
// }
//
// } else {
//
// /*
// * Note: AbstractTask avoids concurrent modification of the
// * BTree checkpoint record during a commit by synchronizing on
// * Name2Addr.
// *
// * Note: The DirtyListener grabs the current checkpointAddr from
// * the BTree when [needsCheckpoint := false]. This allows us to
// * have asynchronous checkpoints of the BTree (by concurrent
// * tasks) without causing the checkpoint address on the
// * commitList to be advanced until the next atomic transfer of
// * state from a completed AbstractTask down to Name2Addr.
// * (Without this, the checkpointAddr on the BTree winds up
// * updated concurrently but we only notice the problem when it
// * happens to be 0L because the BTree is in the midst of
// * updating its checkpoint!)
// */
//
// // use last recorded checkpoint.
// checkpointAddr = l.checkpointAddr;
//
// if (checkpointAddr == 0L) {
//
// throw new RuntimeException(
// "Checkpoint address not written: name=" + l.name);
//
// }
//
// }
//
// // set commitTime on the btree (transient field).
// l.btree.setLastCommitTime(commitTime);
} // next Future.
/*
* If there were any errors, then throw an exception listing them.
*/
if (!causes.isEmpty()) {
// Throw exception back to the leader.
if (causes.size() == 1)
throw new RuntimeException(causes.get(0));
throw new RuntimeException("nerrors=" + causes.size(),
new ExecutionExceptions(causes));
}
// and flushes out this btree as well.
return super.handleCommit(commitTime);
}
/**
* Encodes a unicode string into a key.
*
* @param name
* The name of the btree.
*
* @return The corresponding key.
*/
private byte[] getKey(final String name) {
final byte[] a = metadata.getTupleSerializer().serializeKey(name);
// log.error("name=" + name + ", key=" + BytesUtil.toString(a));
return a;
// return KeyBuilder.asSortKey(name);
}
/**
* Return the {@link ITx#UNISOLATED} view of the named persistence capable
* data structure - this method tests a cache of the named persistence
* capable data structures and will return the existing instance if the
* index is found in the cache and will otherwise load the
* {@link ITx#UNISOLATED} view of the data structure from the backing store.
*
* @param name
* The index name.
*
* @return The named index or null
iff there is no index with
* that name.
*
* @throws IllegalArgumentException
* if name is null
.
* @throws IllegalStateException
* if this is not the {@link ITx#UNISOLATED} {@link Name2Addr}
* instance.
*/
public ICheckpointProtocol getIndex(final String name) {
assertUnisolatedInstance();
if (name == null) {
throw new IllegalArgumentException();
}
ICheckpointProtocol ndx;
synchronized(this) {
/*
* Note: Synchronized since some operations (remove+add) are not
* otherwise atomic.
*/
ndx = indexCache.get(name);
}
if (ndx != null) {
if (ndx.getDirtyListener() == null) {
/*
* Note: We can't return an unisolated view of a BTree to the
* caller without having a dirty listener set on it that will
* report any changes back to this name2addr instance. An
* exception thrown here indicates that the BTree was able to
* remain in (or enter into) the indexCache without having its
* dirty listener set.
*/
throw new AssertionError();
}
/*
* Further verify that the dirty listener is reporting to this
* name2addr instance.
*/
assert ((DirtyListener)ndx.getDirtyListener()).getName2Addr() == this;
return ndx;
}
final byte[] val = super.lookup(getKey(name));
if (val == null) {
return null;
}
// deserialize entry.
// final Entry entry = EntrySerializer.INSTANCE.deserialize(new DataInputBuffer(val));
final Entry entry = EntrySerializer.INSTANCE
.deserialize(new DataInputStream(new ByteArrayInputStream(val)));
// Load from the backing store.
ndx = Checkpoint.loadFromCheckpoint(store, entry.checkpointAddr,
false/* readOnly */);
// Set the lastCommitTime on the index.
ndx.setLastCommitTime(entry.commitTime);
// Save name -> btree mapping in transient cache.
putIndexCache(name, ndx, false/*replace*/);
// listen for dirty events so that we know when to add this to the commit list.
final DirtyListener l = new DirtyListener(name, ndx, false/* needsCheckpoint */);
ndx.setDirtyListener( l );
// report event (loaded btree).
ResourceManager.openUnisolatedIndex(name);
// return btree.
return ndx;
}
/**
* Return the {@link Entry} for the named index.
*
* Note: This is a lower-level access mechanism that is used by
* {@link Journal#getIndex(String, ICommitRecord)} when accessing historical
* named indices from an {@link ICommitRecord}.
*
* @param name
* The index name.
*
* @return The {@link Entry} for the named index -or- null
if
* there is no entry for that name.
*/
public Entry getEntry(final String name) {
// lookup in the index.
final byte[] val = super.lookup(getKey(name));
Entry entry = null;
if (val != null) {
// deserialize entry.
entry = EntrySerializer.INSTANCE.deserialize(new DataInputBuffer(
val));
}
return entry;
}
/**
* Add an entry for the named index.
*
* @param name
* The index name.
*
* @param btree
* The index.
*
* @exception IllegalArgumentException
* if name is null
.
* @exception IllegalArgumentException
* if btree is null
.
* @exception IndexExistsException
* if there is already an index registered under that name.
*/
synchronized public void registerIndex(final String name,
final ICheckpointProtocol btree) {
assertUnisolatedInstance();
if (name == null)
throw new IllegalArgumentException();
if (btree == null)
throw new IllegalArgumentException();
final byte[] key = getKey(name);
if (super.contains(key)) {
throw new IndexExistsException(name);
}
// flush btree to the store to get the checkpoint record address.
final long checkpointAddr = btree.writeCheckpoint();
/*
* Add a serialized entry to the persistent index.
*
* Note: The commit time here is a placeholder. It will be replaced with
* the actual commit time by the next commit since the newly created
* B+Tree is on our commit list. If there is an abort, then the entry is
* simply discarded along with the rest of the Name2Addr state.
*/
final Entry entry = new Entry(name, checkpointAddr, 0L/* commitTime */);
super.insert(key, EntrySerializer.INSTANCE.serialize( entry ));
putOnCommitList(name, btree, false/* needsCheckpoint */);
// report event (the application has access to the named index).
ResourceManager.openUnisolatedIndex(name);
}
/**
* Adds the named index to the commit list and sets a {@link DirtyListener}
* on the {@link ICheckpointProtocol} so that this {@link Name2Addr} object
* will be informed if the associated persistent data structure becomes
* dirty.
*
* @param name
* The index name.
* @param btree
* The persistence capable data structure.
* @param needsCheckpoint
* Specify true
if {@link Name2Addr} should invoke
* {@link ICheckpointProtocol#writeCheckpoint()} rather than just
* updating the {@link Entry} for the persistenc capable data
* structure using {@link ICheckpointProtocol#getCheckpoint()}
*/
synchronized protected void putOnCommitList(final String name,
final ICheckpointProtocol btree, final boolean needsCheckpoint) {
assertUnisolatedInstance();
if (name == null)
throw new IllegalArgumentException();
if (btree == null)
throw new IllegalArgumentException();
// setup a dirty listener.
final DirtyListener l = new DirtyListener(name, btree, needsCheckpoint);
// and set it on the btree.
btree.setDirtyListener(l);
putIndexCache(name, btree, true/*replace*/);
// add to the commit list.
commitList.put(name, l);
if(log.isInfoEnabled()) {
log.info("name=" + name + ", commitListSize=" + commitList.size()
+ ", needsCheckpoint=" + needsCheckpoint + ", file="
+ getStore().getFile());
}
}
/**
* Adds the named index to the {@link ITx#UNISOLATED} index cache.
*
* @param name
* The index name.
* @param btree
* The {@link ITx#UNISOLATED} view of the persistence capable
* data structure.
* @param replace
* If an existing entry for that name may be replaced.
*/
synchronized protected void putIndexCache(final String name,
final ICheckpointProtocol btree, final boolean replace) {
assertUnisolatedInstance();
// /*
// * Note: the WeakValueCache does not let you replace an existing entry
// * so we first remove the old entry under the key if there is one.
// */
// if (replace) {
//
// indexCache.remove(name);
//
// }
//
// // add name -> btree mapping to the transient cache.
// indexCache.put(name, btree, true/*dirty*/);
if (replace) {
indexCache.put(name, btree);
} else {
indexCache.putIfAbsent(name, btree);
}
}
/**
* Return the current entry, if any, for the named {@link ITx#UNISOLATED}
* index in the {@link #indexCache}.
*
* Note: This method is more direct than {@link #getIndex(String)}.
* {@link AbstractTask} uses this method together with
* {@link #putIndexCache(String, ICheckpointProtocol, boolean)} to allow
* different tasks access to the same pool of {@link ITx#UNISOLATED}
* indices.
*
* @param name
* The index name.
*
* @return The index iff it was found in the cache.
*/
synchronized protected ICheckpointProtocol getIndexCache(final String name) {
assertUnisolatedInstance();
return indexCache.get(name);
}
/**
* Removes the entry for the named index. The named index will no longer
* participate in commits.
*
* @param name
* The index name.
*
* @exception IllegalArgumentException
* if name is null
.
* @exception NoSuchIndexException
* if the index does not exist.
*/
synchronized public void dropIndex(final String name) {
assertUnisolatedInstance();
if (name == null)
throw new IllegalArgumentException();
if (log.isInfoEnabled())
log.info("name=" + name);
final byte[] key = getKey(name);
if(!super.contains(key)) {
throw new NoSuchIndexException("Not registered: "+name);
}
// remove the name -> btree mapping from the transient cache.
final ICommitter btree = indexCache.remove(name);
if (btree != null) {
/*
* Make sure that the index is not on the commit list.
*
* Note: If the index is not in the index cache then it WILL NOT be
* in the commit list.
*/
commitList.remove(name);
// clear our listener.
((ICheckpointProtocol) btree).setDirtyListener(null);
}
/*
* Remove the entry from the persistent index. After a commit you will
* no longer be able to find the metadata record for this index from the
* current commit record (it will still exist of course in historical
* commit records).
*/
super.remove(key);
// report event.
ResourceManager.dropUnisolatedIndex(name);
}
/**
* Return a {@link CounterSet} reflecting the named indices that are
* currently open (more accurately, those open named indices whose
* references are in {@link Name2Addr}s internal {@link #indexCache}). When
* index partitions are in use their {@link CounterSet}s are reported under
* a path formed from name of the scale-out index and partition identifier.
* Otherwise the {@link CounterSet}s are reported directly under the index
* name.
*
* @param counterSet
* When non-null
the performance counters are
* entered into the caller's collection. Otherwise they are
* entered into a new collection.
* @param found
* When non-null
, the names of the indices whose
* performance counters are being returned is reported as a
* side-effect on this {@link Set}.
*
* @return A new {@link CounterSet} reflecting the named indices that were
* open as of the time that this method was invoked.
*
* @see IndexManager#getIndexCounters()
*
* @see
* Expose performance counters for read-only indices
*/
protected CounterSet getIndexCounters(final CounterSet counterSet,
final Set found) {
assertUnisolatedInstance();
final CounterSet tmp = counterSet == null ? new CounterSet()
: counterSet;
final Iterator>> itr = indexCacheEntryIterator();
while (itr.hasNext()) {
final java.util.Map.Entry> entry = itr.next();
final String name = entry.getKey();
final ICheckpointProtocol btree = entry.getValue().get();
if (btree == null) {
// Note: Weak reference has been cleared.
continue;
}
final IndexMetadata md = btree.getIndexMetadata();
final LocalPartitionMetadata pmd = md.getPartitionMetadata();
final String path;
if (pmd != null) {
// Note: [name] already includes the partition identifier.
path = md.getName() + ICounterSet.pathSeparator + name;
} else {
path = name;
}
/*
* Attach the B+Tree performance counters.
*
* Note: These counters MUST NOT embed a hard reference to the
* AbstractBTree. That could cause the BTree to be retained as long
* as the caller holds the CounterSet object!
*/
tmp.makePath(path).attach(btree.getCounters());
if (found != null) {
/*
* Report out the names of the indices whose counters are being
* returned.
*/
found.add(name);
}
}
return tmp;
}
/**
* An entry in the persistent index.
*
* The {@link Entry} reports the {@link #name} of the index, its
* {@link #checkpointAddr}, and the {@link #commitTime} when it was last
* updated. If you want to know more about the index, then you need to open
* it.
*
* Do NOT open the index directly from the {@link #checkpointAddr}. That
* will circumvent the canonical mapping imposed by the
* {@link IIndexManager} on the open indices for a given name and commit
* time. Instead, just ask the {@link IIndexManager} to open the index
* having the specified {@link #name} as of the desired commit time.
*
* @author Bryan
* Thompson
*/
public static class Entry {
/**
* The name of the index.
*/
public final String name;
/**
* The address of the last known {@link Checkpoint} record for the
* index with that name.
*/
public final long checkpointAddr;
/**
* The commit time associated with the last commit point for the named
* index.
*/
public final long commitTime;
public Entry(final String name, final long checkpointAddr,
final long commitTime) {
this.name = name;
this.checkpointAddr = checkpointAddr;
this.commitTime = commitTime;
}
@Override
public String toString() {
return "Entry{name=" + name + ",checkpointAddr=" + checkpointAddr
+ ",commitTime=" + commitTime + "}";
}
}
/**
* The values are {@link Entry}s.
*
* @author Bryan Thompson
*/
public static class EntrySerializer {
public static transient final EntrySerializer INSTANCE = new EntrySerializer();
private EntrySerializer() {
}
public byte[] serialize(final Entry entry) {
try {
// estimate capacity
final int capacity = Bytes.SIZEOF_LONG + entry.name.length() * 2;
final ByteArrayOutputStream baos = new ByteArrayOutputStream(capacity);
final DataOutput os = new DataOutputStream(baos);
os.writeLong(entry.commitTime);
os.writeLong(entry.checkpointAddr);
os.writeUTF(entry.name);
return baos.toByteArray();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public Entry deserialize(final DataInput in) {
try {
final long commitTime = in.readLong();
final long checkpointAddr = in.readLong();
final String name = in.readUTF();
return new Entry(name, checkpointAddr, commitTime);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
/**
* Encapsulates key and value formation for {@link Name2Addr}.
*
* @author Bryan Thompson
*/
static public class Name2AddrTupleSerializer extends
DefaultTupleSerializer {
/**
*
*/
private static final long serialVersionUID = 5699568938604974463L;
/**
* Used to (de-)serialize {@link Entry}s (NOT thread-safe).
*/
private final EntrySerializer ser;
/**
* De-serialization ctor.
*/
public Name2AddrTupleSerializer() {
super();
this.ser = EntrySerializer.INSTANCE;
}
/**
* Ctor when creating a new instance.
*
* @param keyBuilderFactory
*/
public Name2AddrTupleSerializer(
final IKeyBuilderFactory keyBuilderFactory) {
super(keyBuilderFactory);
this.ser = EntrySerializer.INSTANCE;
}
/**
* Return the unsigned byte[] key for an index name.
*
* @param obj
* The name of an index.
*/
@Override
public byte[] serializeKey(final Object obj) {
final IKeyBuilder keyBuilder = getKeyBuilder();
final byte[] a = keyBuilder.reset().append((String) obj).getKey();
// log.error("name=" + obj + ", key=" + BytesUtil.toString(a)+", keyBuilder="+keyBuilder);
return a;
}
/**
* Return the byte[] value an {@link Entry}.
*
* @param entry
* An Entry.
*/
@Override
public byte[] serializeVal(final Entry entry) {
return ser.serialize(entry);
}
@SuppressWarnings("rawtypes")
@Override
public Entry deserialize(final ITuple tuple) {
return ser.deserialize(tuple.getValueStream());
}
/**
* The initial version (no additional persistent state).
*/
private final static transient byte VERSION0 = 0;
/**
* The current version.
*/
private final static transient byte VERSION = VERSION0;
@Override
public void readExternal(final ObjectInput in) throws IOException,
ClassNotFoundException {
super.readExternal(in);
final byte version = in.readByte();
switch (version) {
case VERSION0:
break;
default:
throw new UnsupportedOperationException("Unknown version: "
+ version);
}
}
@Override
public void writeExternal(final ObjectOutput out) throws IOException {
super.writeExternal(out);
out.writeByte(VERSION);
}
} // Name2AddrTupleSerializer
/**
* Prefix scan of a {@link Name2Addr} index. This scan assumes that the
* caller has provided for any possible thread-safety issues.
*
* @param prefix
* The prefix.
* @param n2a
* The index.
*
* @return The names of the indices spanned by that prefix in that index.
*
* @see
* Name2Addr.indexNameScan(prefix) uses scan + filter
* @see
* AbstractTripleStore.destroy() does not filter for correct prefix
*
*/
@SuppressWarnings("unchecked")
public static final Iterator indexNameScan(final String prefix,
final IIndex n2a) {
final byte[] fromKey;
final byte[] toKey;
final boolean hasPrefix = prefix != null && prefix.length() > 0;
// final boolean restrictScan = true;
if (hasPrefix ) //&& restrictScan)
{
/*
* When the namespace prefix was given, generate the toKey as the
* fixed length successor of the fromKey.
*
* Note: We MUST use StrengthEnum:=PRIMARY for the prefix scan in
* order to avoid the secondary collation ordering effects.
*/
// final IKeyBuilder keyBuilder = n2a.getIndexMetadata()
// .getTupleSerializer().getKeyBuilder();
// final Properties properties = new Properties();
//
// properties.setProperty(KeyBuilder.Options.STRENGTH,
// StrengthEnum.Primary.toString());
//
// final IKeyBuilder keyBuilder = new DefaultKeyBuilderFactory(
// properties).getKeyBuilder();
final IKeyBuilder keyBuilder = n2a.getIndexMetadata()
.getPrimaryKeyBuilder();
fromKey = keyBuilder.reset().append(prefix).getKey();
toKey = SuccessorUtil.successor(fromKey.clone());
if (log.isDebugEnabled()) {
log.error("fromKey=" + BytesUtil.toString(fromKey));
log.error("toKey =" + BytesUtil.toString(toKey));
}
} else {
// Do not restrict the scan.
fromKey = null;
toKey = null;
}
final ITupleIterator itr = n2a.rangeIterator(fromKey, toKey);
/*
* Add resolver from the tuple to the name of the index.
*/
final IStriterator sitr = new Striterator(itr).addFilter(new Resolver() {
private static final long serialVersionUID = 1L;
@Override
protected Object resolve(Object obj) {
return ((ITuple) obj).getObject().name;
}
});
// if (hasPrefix && !restrictScan) {
//
// /*
// * Only report the names that match the prefix.
// *
// * Note: For the moment, the filter is hacked by examining the
// * de-serialized Entry objects and only reporting those that start
// * with the [prefix].
// */
//
// sitr = sitr.addFilter(new Filter() {
//
// private static final long serialVersionUID = 1L;
//
// @Override
// public boolean isValid(final Object obj) {
//
// final String name = (String) obj;
//
// if (name.startsWith(prefix)) {
//
// // acceptable.
// return true;
// }
// return false;
// }
// });
//
// }
return sitr;
}
// /**
// * The SuccessorUtil does not work with CollatedKeys since it bumps the "meta/control" data
// * at the end of the key, rather than the "value" data of the key.
// *
// * It has been observed that the key data is delimited with a 01 byte, followed by meta/control
// * data with the key itself delimited by a 00 byte.
// *
// * Note that this has only been analyzed for the ICU collator, the standard Java collator does include
// * 00 bytes in the key. However, it too appears to delimit the value key with a 01 byte so the
// * same method should work.
// *
// * @param src - original key
// * @return the next key
// */
// private static byte[] successor(final byte[] src) {
// final byte[] nxt = src.clone();
// for (int i = 1; i < nxt.length; i++) {
// if (nxt[i] == 01) { // end of data
// nxt[i-1]++;
// break;
// }
// }
//
// return nxt;
// }
}