com.bigdata.resources.OverflowMetadata Maven / Gradle / Ivy
package com.bigdata.resources;
import java.lang.ref.SoftReference;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.log4j.Logger;
import com.bigdata.btree.BTree;
import com.bigdata.btree.BTreeCounters;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.io.DataInputBuffer;
import com.bigdata.journal.AbstractJournal;
import com.bigdata.journal.ITx;
import com.bigdata.journal.Name2Addr.Entry;
import com.bigdata.journal.Name2Addr.EntrySerializer;
import com.bigdata.service.IDataService;
/**
* Metadata on the entire synchronous and asynchronous overflow task.
*
* @author Bryan Thompson
* @version $Id$
*/
public class OverflowMetadata {
protected static final Logger log = Logger.getLogger(OverflowMetadata.class);
/**
* The resource manager.
*/
public final ResourceManager resourceManager;
/**
* The last commit time on the old journal. This identifies the commit point
* on which synchronous and asynchronous overflow will read.
*/
public final long lastCommitTime;
// /**
// * The names of any index partitions that were copied onto the new journal
// * during synchronous overflow processing.
// */
// private final Set copied = new HashSet();
/**
* Set true
iff asynchronous post-processing should be
* performed. Flag is set iff some indices are NOT copied onto the new
* journal such that asynchronous post-processing should be performed.
*/
public boolean postProcess;
/*
* Stuff relating to the performance counters for the indices on the old
* journal.
*/
/**
* The raw write score computed based on the net change in the aggregated
* {@link BTreeCounters}s for each index partition since the last overflow.
*
* @see BTreeCounters#computeRawWriteScore()
*/
private double totalRawStore;
/**
* Scores computed for each named index in order by ascending score
* (increased activity).
*/
private final Score[] scores;
/**
* Random access to the index {@link Score}s.
*/
private final Map scoreMap;
/*
* Metadata for the BTree and index partition views.
*/
/**
* Note: Since this is a linked hash map it will maintain the order in which
* we populate it so views() will also be in index name order.
*/
private final LinkedHashMap views;
/**
* Clears the {@link ViewMetadata} map, clearing the {@link SoftReference}s
* from each {@link BTreeMetadata} and {@link ViewMetadata} instance. This
* is used by the {@link AsynchronousOverflowTask} to encourage the eager GC
* of resources once that task is finished.
*/
void clearViews() {
for(ViewMetadata view : views.values()) {
view.clearRef();
}
views.clear();
}
/**
* Random lookup of the {@link ViewMetadata}.
*
* @param name
* The index name.
*
* @return The {@link ViewMetadata} and null
if none is
* found.
*
* @throws IllegalStateException
* if {@link #collectMoreData(ResourceManager)} has not been
* called.
*/
public final ViewMetadata getViewMetadata(final String name) {
if (name == null)
throw new IllegalArgumentException();
final ViewMetadata vmd = views.get(name);
return vmd;
}
/**
* The views that are being processed in index name order.
*/
public Iterator views() {
return Collections.unmodifiableMap(views).values().iterator();
}
/**
* The #of indices on the old journal.
*/
public int getIndexCount() {
return views.size();
}
/**
* True if the tuples for the index were copied to the new live journal
* during synchronous overflow.
*
* @param name
* The name of the index partition.
*
* @return
*/
public boolean isCopied(final String name) {
if (name == null)
throw new IllegalArgumentException();
final ViewMetadata vmd = views.get(name);
if (vmd == null)
throw new IllegalArgumentException();
return OverflowActionEnum.Copy.equals(vmd.getAction());
}
// /**
// * The action to take / taken for the index partition.
// *
// * @param name
// * The name of the index partition.
// *
// * @return The action taken or null
if no action has been
// * selected.
// */
// public OverflowActionEnum getAction(final String name) {
//
// if (name == null)
// throw new IllegalArgumentException();
//
// final ViewMetadata vmd = views.get(name);
//
// if (vmd == null)
// throw new IllegalArgumentException();
//
// return vmd.getAction();
//
// }
/**
* Specify the action to be taken.
*
* @param name
* The index partition name.
* @param action
* The action.
*
* @throws IllegalStateException
* if an action has already been specified.
*/
public void setAction(final String name, final OverflowActionEnum action) {
if (name == null)
throw new IllegalArgumentException();
final ViewMetadata vmd = views.get(name);
if (vmd == null)
throw new IllegalArgumentException();
/*
* Note: The caller generally should already be holding this lock so
* they can make an atomic decision regarding the action to take for
* this index partition.
*/
vmd.lock.lock();
try {
// set on the ViewMetadata object itself.
vmd.setAction(action);
// update counters.
synchronized (actionCounts) {
AtomicInteger counter = actionCounts.get(action);
if (counter == null) {
// new counter.
counter = new AtomicInteger();
actionCounts.put(action, counter);
}
// increment counter.
counter.incrementAndGet();
}
} finally {
vmd.lock.unlock();
}
}
/**
* Return the #of index partitions for which the specified action was
* chosen.
*
* @param action
* The action.
*
* @return The #of index partitions where that action was chosen.
*
* @todo This is only used by the unit tests as a means of verifying that
* they have set {@link OverflowManager#copyIndexThreshold} to zero.
* This stands in the way of decoupling asynchronous overflow from a
* fixed cycle such that index partition build, merge, split, etc.
* tasks can run at any time (other than while some other task is
* running).
*
* Note: we must also give
* {@link OverflowManager#doSynchronousOverflow()} exclusive access so
* it can propagate the index partition declarations without blocking.
* Right now, asynchronous index tasks do not execute during
* synchronous overflow so there is no problem. This does have the
* consequence that the set of locks is per {@link OverflowMetadata}
* instance (and hence only valid for the epoch between one
* synchronous overflow and the next).
*/
public int getActionCount(final OverflowActionEnum action) {
if (action == null)
throw new IllegalArgumentException();
synchronized (actionCounts) {
final AtomicInteger counter = actionCounts.get(action);
if (counter == null) {
return 0;
}
return counter.get();
}
}
private final Map actionCounts = new HashMap();
/**
* Captures various metadata about the live journal in preparation for a
* synchronous overflow operation.
*
* Note: This captures metadata which has low latency and does not force the
* materialization of the fused view of an index partition but may force the
* loading of the {@link BTree} from the old journal, which it itself a very
* light weight operation.
*
* @param resourceManager
*/
public OverflowMetadata(final ResourceManager resourceManager) {
if (resourceManager == null)
throw new IllegalArgumentException();
this.resourceManager = resourceManager;
// note: assumes the live journal is the one that we want!
final AbstractJournal oldJournal = resourceManager.getLiveJournal();
// timestamp of the last commit on the old journal.
lastCommitTime = oldJournal.getRootBlockView().getLastCommitTime();
/*
* This will be the aggregate of the net change in the btree performance
* counters since the last overflow operation (sum across [delta]).
* Together with the per-index partition performance counters, this is
* used to decide which indices are "hot" and which are not.
*/
final BTreeCounters totalCounters = new BTreeCounters();
/*
* Generate the metadata summaries of the index partitions that we will
* need to process.
*/
{
// using read-historical view of Name2Addr
final int numIndices = (int) oldJournal
.getName2Addr(lastCommitTime).rangeCount();
views = new LinkedHashMap(numIndices);
// the name2addr view as of the last commit time.
final ITupleIterator> itr = oldJournal.getName2Addr(lastCommitTime)
.rangeIterator();
final Map delta = resourceManager
.markAndGetDelta();
while (itr.hasNext()) {
final ITuple> tuple = itr.next();
final Entry entry = EntrySerializer.INSTANCE
.deserialize(new DataInputBuffer(tuple.getValue()));
/*
* Obtain the delta in the btree performance counters for this
* index partition since the last overflow.
*/
BTreeCounters btreeCounters = delta.get(entry.name);
if (btreeCounters == null) {
// use empty counters if none were reported (paranoia).
log.error("No performance counters? index=" + entry.name);
btreeCounters = new BTreeCounters();
}
totalCounters.add(btreeCounters);
views.put(entry.name, new ViewMetadata(resourceManager,
lastCommitTime, entry.name, btreeCounters));
}
}
/*
* Compute the "Score" for each index partition.
*/
{
final int nscores = views.size();
this.scores = new Score[nscores];
this.scoreMap = new HashMap(nscores);
this.totalRawStore = totalCounters.computeRawWriteScore();
if (nscores > 0) {
final Iterator> itr = views
.entrySet().iterator();
int i = 0;
while (itr.hasNext()) {
final Map.Entry entry = itr.next();
final String name = entry.getKey();
final ViewMetadata vmd = entry.getValue();
final BTreeCounters btreeCounters = vmd.btreeCounters;
scores[i] = new Score(name, btreeCounters, totalRawStore);
i++;
}
// sort into ascending order (increasing activity).
Arrays.sort(scores);
for (i = 0; i < scores.length; i++) {
scores[i].rank = i;
scores[i].drank = ((double) i) / scores.length;
scoreMap.put(scores[i].name, scores[i]);
}
if (log.isDebugEnabled()) {
log.debug("The most active index was: "
+ scores[scores.length - 1]);
log.debug("The least active index was: " + scores[0]);
}
}
}
}
/**
* The #of active index partitions on this data service.
*/
public int getActiveCount() {
return scores.length;
}
/**
* The scores in order from least active to most active.
*/
public List getScores() {
return Arrays.asList(scores);
}
/**
* Return true
if the named index partition is "warm" for
* {@link ITx#UNISOLATED} and/or {@link ITx#READ_COMMITTED} operations.
*
* Note: This method informs the selection of index partitions that will be
* moved to another {@link IDataService}. The preference is always to move
* an index partition that is "warm" rather than "hot" or "cold". Moving a
* "hot" index partition causes more latency since more writes will have
* been buffered and unisolated access to the index partition will be
* suspended during the atomic part of the move operation. Likewise, "cold"
* index partitions are not consuming any resources other than disk space
* for their history, and the history of an index is not moved when the
* index partition is moved.
*
* Since the history of an index partition is not moved when the index
* partition is moved, the determination of cold, warm or hot is made in
* terms of the resources consumed by {@link ITx#READ_COMMITTED} and
* {@link ITx#UNISOLATED} access to named index partitions. If an index
* partition is hot for historical read, then your only choices are to shed
* other index partitions from the data service, to read from a failover
* data service having the same index partition, or possibly to increase the
* replication count for the index partition.
*
* @param name
* The name of an index partition.
*
* @return The index {@link Score} -or- null
iff the index
* was not touched for read-committed or unisolated operations.
*/
public Score getScore(final String name) {
final Score score = scoreMap.get(name);
if (score == null) {
/*
* Index was not touched for read-committed or unisolated
* operations.
*/
if (log.isDebugEnabled())
log.debug("Index is cold: " + name);
return null;
}
if (log.isDebugEnabled())
log.debug("Index score: " + score);
return score;
}
}