com.bigdata.resources.ViewMetadata Maven / Gradle / Ivy
package com.bigdata.resources;
import java.lang.ref.SoftReference;
import java.util.HashMap;
import java.util.Map;
import com.bigdata.btree.BTree;
import com.bigdata.btree.BTreeCounters;
import com.bigdata.btree.ILocalBTreeView;
import com.bigdata.btree.ISimpleSplitHandler;
import com.bigdata.btree.IndexSegment;
import com.bigdata.mdi.IMetadataIndex;
import com.bigdata.service.Event;
import com.bigdata.service.Params;
import com.bigdata.util.InnerCause;
/**
* Adds additional metadata to a {@link BTreeMetadata} that deals with the index
* partition view, including its fast rangeCount, its {@link ISimpleSplitHandler},
* etc.
*
* Note: There is overhead in opening a view comprised of more than just the
* mutable {@link BTree}. That is why there is a separation between the
* {@link BTreeMetadata} class and the {@link ViewMetadata}. The latter will
* force any {@link IndexSegment} in the view to be (re-)opened.
*
* @author Bryan Thompson
* @version $Id$
*/
class ViewMetadata extends BTreeMetadata implements Params {
/**
* Set true
iff the index partition view is requested and the
* various additional data are collected from that view (e.g., range count).
* This is done in order to prevent re-initialization of such lazily
* obtained data.
*/
private boolean initView = false;
/**
* Cached fast range count once initialized from view.
*/
private volatile long rangeCount;
/**
* Cached index partition count once initialized from the view.
*/
private volatile long partitionCount;
/**
* The adjusted nominal size of an index partition. Index partitions are
* split once {@link #sumSegBytes} is GT this value. This value MAY be
* adjusted down by an "acceleration" factor.
*
* @see OverflowManager.Options#NOMINAL_SHARD_SIZE
*/
private volatile long adjustedNominalShardSize;
/**
* Cached estimated percentage of a split once initialized from the view.
*
* Note: the percentOfSplit when based on sumSegBytes is not 100% predictive
* unless we have a compact view since the size on disk of the compact
* segment can be much less. This is especially true when many deleted
* tuples are purged by the compacting merge, in which case the segment
* could shrink to zero during the merge.
*/
private volatile double percentOfSplit;
/**
* Cached decision whether or not a tail split is warranted once initialized
* from the view.
*/
private volatile boolean tailSplit;
/**
* A {@link SoftReference} is used to cache the view since we really want to
* hold onto the reference until we get around to finishing overflow
* processing for this index partition. However, you SHOULD clear the
* reference using {@link #clearRef()} as soon as you have handled
* asynchronous overflow for this view.
*/
private volatile SoftReference ref;
/**
* Open the historical view of that index at that time (not just the mutable
* BTree but the full view). The view is cached. If the reference has been
* cleared then the view is re-opened. This also initializes values
* requiring additional effort which are not available until this method is
* invoked including {@link #getRangeCount()}, etc.
*/
public ILocalBTreeView getView() {
// double checked locking.
ILocalBTreeView view = ref == null ? null : ref.get();
if (view == null) {
synchronized (this) {
view = ref == null ? null : ref.get();
if (view == null) {
view = (ILocalBTreeView) resourceManager.getIndex(name,
commitTime);
ref = new SoftReference(view);
initView(view);
}
}
}
assert view != null : toString();
return view;
}
/**
* Release the {@link SoftReference} for the index partition view.
*
* @todo refactor into clearBTreeRef() and clearViewRef(). The latter calls
* the former. check all usage to make sure that we are invoking the
* correct method.
*/
public void clearRef() {
synchronized(this) {
if(ref != null) {
ref.clear();
}
}
super.clearRef();
}
/**
* Initialize additional data with higher latency (range count, #of index
* partitions, the adjusted split handler, etc.).
*
* @param view
* The view.
*/
synchronized private void initView(final ILocalBTreeView view) {
if (view == null) {
throw new AssertionError("View not found? " + this);
}
if (initView) {
/*
* This stuff only has to be done once even if the view is released.
*/
return;
}
/*
* Obtain the #of index partitions for this scale-out index.
*
* Note: This may require RMI, but the metadata index is also heavily
* cached by the {@link AbstractFederation}.
*
* Note: This must be done before we obtain the adjusted split handler
* as [npartitions] is an input to that process.
*/
{
long npartitions;
try {
final IMetadataIndex mdi = resourceManager.getFederation()
.getMetadataIndex(indexMetadata.getName(), commitTime);
if (mdi == null) {
log.warn("No metadata index: running in test harness?");
npartitions = 1L;
} else {
npartitions = mdi.rangeCount();
if (npartitions == 0) {
/*
* There must always be at least one index partition for
* a scale-out index so this is an error condition.
*/
log.error("No partitions? name="
+ indexMetadata.getName());
}
}
} catch (Throwable t) {
if (InnerCause.isInnerCause(t, InterruptedException.class)) {
// don't trap interrupts.
throw new RuntimeException(t);
}
/*
* Traps any RMI failures (or anything else), logs a warning,
* and reports npartitions as -1L.
*/
log.error("name=" + indexMetadata.getName(), t);
npartitions = -1L;
}
this.partitionCount = npartitions;
}
/*
* This computes the target size on the disk for a compact index segment
* for the shard. The calculation concerns an acceleration factor based
* on some desired minimum number of shards for the index and uses the
* nominalShardSize if the minimum has been satisfied.
*/
{
final int accelerateSplitThreshold = resourceManager.accelerateSplitThreshold;
if (accelerateSplitThreshold == 0
|| partitionCount > accelerateSplitThreshold) {
this.adjustedNominalShardSize = resourceManager.nominalShardSize;
} else {
/*
* discount: given T=100:
*
* d = .01 when N=1
*
* d = .1 when N=10
*
* d = 1 when N=100
*/
final double d = (double) partitionCount
/ accelerateSplitThreshold;
this.adjustedNominalShardSize = (long) (resourceManager.nominalShardSize * d);
if (log.isInfoEnabled())
log.info("npartitions=" + partitionCount + ", discount=" + d
+ ", threshold=" + accelerateSplitThreshold
+ ", adjustedNominalShardSize="
+ this.adjustedNominalShardSize
+ ", nominalShardSize="
+ resourceManager.nominalShardSize);
}
}
/*
* Range count for the view (fast.
*/
this.rangeCount = view.rangeCount();
/*
* The percentage of a full index partition fulfilled by this view.
*
* Note: the percentOfSplit when based on sumSegBytes is not 100%
* predictive unless we have a compact view since the size on disk of
* the compact segment can be much less. This is especially true when
* many deleted tuples are purged by the compacting merge, in which case
* the segment could shrink to zero during the merge.
*/
this.percentOfSplit = super.sumSegBytes / (double) adjustedNominalShardSize;
/*
* true iff this is a good candidate for a tail split.
*/
this.tailSplit = //
this.percentOfSplit > resourceManager.percentOfSplitThreshold && //
super.percentTailSplits > resourceManager.tailSplitThreshold//
;
initView = true;
}
/**
* The fast range count of the view (cached).
*
* @throws IllegalStateException
* unless {@link #getView()} has been invoked.
*/
public long getRangeCount() {
if(!initView) {
// materialize iff never initialized.
getView();
}
return rangeCount;
}
/**
* Return the #of index partitions for this scale-out index. The value is
* computed once per overflow event and then cached.
*/
public long getIndexPartitionCount() {
if(!initView) {
// materialize iff never initialized.
getView();
}
return partitionCount;
}
/**
* The adjusted nominal size on disk of a shard after a compacting merge
* (cached). This factors in an optional acceleration factor which causes
* shards to be split when they are smaller unless a minimum #of shards
* exist for that index.
*
* @see OverflowManager.Options#NOMINAL_SHARD_SIZE
*/
public long getAdjustedNominalShardSize() {
if(!initView) {
// materialize iff never initialized
getView();
}
return adjustedNominalShardSize;
}
/**
* Estimated percentage of a split based on the size on disk (cached).
*
* Note: the percentOfSplit is not 100% predictive unless we have a compact
* view since the size on disk of the compact segment can be much less. This
* is especially true when many deleted tuples are purged by the compacting
* merge, in which case the segment could shrink to zero during the merge.
*/
public double getPercentOfSplit() {
if (!initView) {
// materialize iff never initialized.
getView();
}
return this.percentOfSplit;
}
/**
* Return true
if the index partition satisfies the criteria
* for a tail split (heavy writes on the tail of the index partition and the
* size of the index partition is large enough to warrant a tail split).
*
* @see OverflowManager.Options#TAIL_SPLIT_THRESHOLD
* @see OverflowManager.Options#PERCENT_OF_SPLIT_THRESHOLD
*/
public boolean isTailSplit() {
if(!initView) {
getView();
}
return tailSplit;
}
/**
* {@inheritDoc}
*
* Note: The ctor intentionally does not force the materialization of the
* view or perform any RMI. Those operations are done lazily in order to not
* impose their latency during synchronous overflow.
*/
public ViewMetadata(
final ResourceManager resourceManager, final long commitTime,
final String name, final BTreeCounters btreeCounters) {
super(resourceManager, commitTime, name, btreeCounters);
}
/**
* Extended for more metadata.
*/
protected void toString(final StringBuilder sb) {
if (initView) {
sb.append(", rangeCount=" + rangeCount);
sb.append(", partitionCount=" + partitionCount);
sb.append(", adjustedNominalShardSize=" + adjustedNominalShardSize);
sb.append(", percentOfSplit=" + percentOfSplit);
sb.append(", tailSplit=" + tailSplit);
}
}
/**
* Returns all the interesting properties in a semi-structured form which
* can be used to log an {@link Event}.
*/
public Map getParams() {
final Map m = new HashMap();
/*
* Fields from the BTreeMetadata class.
*/
m.put("name", name);
m.put("action", getAction());
m.put("entryCount", entryCount);
m.put("sourceCount", sourceCount);
m.put("journalSourceCount", sourceJournalCount);
m.put("segmentSourceCount", sourceSegmentCount);
m.put("mergePriority", mergePriority);
// m.put("splitPriority", splitPriority);
m.put("manditoryMerge", mandatoryMerge);
m.put("#leafSplit", btreeCounters.leavesSplit);
m.put("#headSplit", btreeCounters.headSplit);
m.put("#tailSplit", btreeCounters.tailSplit);
m.put("percentHeadSplits", percentHeadSplits);
m.put("percentTailSplits", percentTailSplits);
/*
* Fields from the ViewMetadata class.
*/
m.put("rangeCount", rangeCount);
m.put("partitionCount", partitionCount);
m.put("adjustedNominalShardSize", adjustedNominalShardSize);
m.put("percentOfSplit", percentOfSplit);
m.put("tailSplit", tailSplit);
return m;
}
}