com.bigdata.resources.OverflowMetadata Maven / Gradle / Ivy

Go to download
package com.bigdata.resources;

import java.lang.ref.SoftReference;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.log4j.Logger;

import com.bigdata.btree.BTree;
import com.bigdata.btree.BTreeCounters;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.io.DataInputBuffer;
import com.bigdata.journal.AbstractJournal;
import com.bigdata.journal.ITx;
import com.bigdata.journal.Name2Addr.Entry;
import com.bigdata.journal.Name2Addr.EntrySerializer;
import com.bigdata.service.IDataService;

/**
 * Metadata on the entire synchronous and asynchronous overflow task.
 * 
 * @author Bryan Thompson
 * @version $Id$
 */
public class OverflowMetadata {

    protected static final Logger log = Logger.getLogger(OverflowMetadata.class);
    
    /**
     * The resource manager.
     */
    public final ResourceManager resourceManager;
    
    /**
     * The last commit time on the old journal. This identifies the commit point
     * on which synchronous and asynchronous overflow will read.
     */
    public final long lastCommitTime;

//    /**
//     * The names of any index partitions that were copied onto the new journal
//     * during synchronous overflow processing.
//     */
//    private final Set copied = new HashSet();

    /**
     * Set true iff asynchronous post-processing should be
     * performed. Flag is set iff some indices are NOT copied onto the new
     * journal such that asynchronous post-processing should be performed.
     */
    public boolean postProcess;

    /*
     * Stuff relating to the performance counters for the indices on the old
     * journal.
     */
    
    /**
     * The raw write score computed based on the net change in the aggregated
     * {@link BTreeCounters}s for each index partition since the last overflow.
     * 
     * @see BTreeCounters#computeRawWriteScore()
     */
    private double totalRawStore;

    /**
     * Scores computed for each named index in order by ascending score
     * (increased activity).
     */
    private final Score[] scores;

    /**
     * Random access to the index {@link Score}s.
     */
    private final Map scoreMap;

    /*
     * Metadata for the BTree and index partition views.
     */
    
    /**
     * Note: Since this is a linked hash map it will maintain the order in which
     * we populate it so views() will also be in index name order.
     */
    private final LinkedHashMap views;

    /**
     * Clears the {@link ViewMetadata} map, clearing the {@link SoftReference}s
     * from each {@link BTreeMetadata} and {@link ViewMetadata} instance. This
     * is used by the {@link AsynchronousOverflowTask} to encourage the eager GC
     * of resources once that task is finished.
     */
    void clearViews() {
        
        for(ViewMetadata view : views.values()) {
        
            view.clearRef();
            
        }
        
        views.clear();
        
    }
    
    /**
     * Random lookup of the {@link ViewMetadata}.
     * 
     * @param name
     *            The index name.
     * 
     * @return The {@link ViewMetadata} and null if none is
     *         found.
     * 
     * @throws IllegalStateException
     *             if {@link #collectMoreData(ResourceManager)} has not been
     *             called.
     */
    public final ViewMetadata getViewMetadata(final String name) {
        
        if (name == null)
            throw new IllegalArgumentException();

        final ViewMetadata vmd = views.get(name);

        return vmd;
        
    }

    /**
     * The views that are being processed in index name order.
     */
    public Iterator views() {

        return Collections.unmodifiableMap(views).values().iterator();

    }

    /**
     * The #of indices on the old journal.
     */
    public int getIndexCount() {
        
        return views.size();
        
    }

    /**
     * True if the tuples for the index were copied to the new live journal
     * during synchronous overflow.
     * 
     * @param name
     *            The name of the index partition.
     * 
     * @return
     */
    public boolean isCopied(final String name) {
       
        if (name == null)
            throw new IllegalArgumentException();

        final ViewMetadata vmd = views.get(name);

        if (vmd == null)
            throw new IllegalArgumentException();

        return OverflowActionEnum.Copy.equals(vmd.getAction());
        
    }
    
//    /**
//     * The action to take / taken for the index partition.
//     * 
//     * @param name
//     *            The name of the index partition.
//     * 
//     * @return The action taken or null if no action has been
//     *         selected.
//     */
//    public OverflowActionEnum getAction(final String name) {
//
//        if (name == null)
//            throw new IllegalArgumentException();
//
//        final ViewMetadata vmd = views.get(name);
//
//        if (vmd == null)
//            throw new IllegalArgumentException();
//
//        return vmd.getAction();
//        
//    }
    
    /**
     * Specify the action to be taken.
     * 
     * @param name
     *            The index partition name.
     * @param action
     *            The action.
     * 
     * @throws IllegalStateException
     *             if an action has already been specified.
     */
    public void setAction(final String name, final OverflowActionEnum action) {

        if (name == null)
            throw new IllegalArgumentException();

        final ViewMetadata vmd = views.get(name);

        if (vmd == null)
            throw new IllegalArgumentException();

        /*
         * Note: The caller generally should already be holding this lock so
         * they can make an atomic decision regarding the action to take for
         * this index partition.
         */
        vmd.lock.lock();
        try {

            // set on the ViewMetadata object itself.
            vmd.setAction(action);

            // update counters.
            synchronized (actionCounts) {
                
                AtomicInteger counter = actionCounts.get(action);

                if (counter == null) {

                    // new counter.
                    counter = new AtomicInteger();
                    
                    actionCounts.put(action, counter);

                }
                
                // increment counter.
                counter.incrementAndGet();

            }
            
        } finally {

            vmd.lock.unlock();
            
        }

    }

    /**
     * Return the #of index partitions for which the specified action was
     * chosen.
     * 
     * @param action
     *            The action.
     * 
     * @return The #of index partitions where that action was chosen.
     * 
     * @todo This is only used by the unit tests as a means of verifying that
     *       they have set {@link OverflowManager#copyIndexThreshold} to zero.
     *       This stands in the way of decoupling asynchronous overflow from a
     *       fixed cycle such that index partition build, merge, split, etc.
     *       tasks can run at any time (other than while some other task is
     *       running).
     *       
     *       Note: we must also give
     *       {@link OverflowManager#doSynchronousOverflow()} exclusive access so
     *       it can propagate the index partition declarations without blocking.
     *       Right now, asynchronous index tasks do not execute during
     *       synchronous overflow so there is no problem. This does have the
     *       consequence that the set of locks is per {@link OverflowMetadata}
     *       instance (and hence only valid for the epoch between one
     *       synchronous overflow and the next).
     */
    public int getActionCount(final OverflowActionEnum action) {

        if (action == null)
            throw new IllegalArgumentException();
        
        synchronized (actionCounts) {
            
            final AtomicInteger counter = actionCounts.get(action);

            if (counter == null) {

                return 0;

            }
            
            return counter.get();

        }
        
    }
    private final Map actionCounts = new HashMap();

    /**
     * Captures various metadata about the live journal in preparation for a
     * synchronous overflow operation.
     * 

     * Note: This captures metadata which has low latency and does not force the
     * materialization of the fused view of an index partition but may force the
     * loading of the {@link BTree} from the old journal, which it itself a very
     * light weight operation.
     * 
     * @param resourceManager
     */
    public OverflowMetadata(final ResourceManager resourceManager) {

        if (resourceManager == null)
            throw new IllegalArgumentException();

        this.resourceManager = resourceManager;
        
        // note: assumes the live journal is the one that we want!
        final AbstractJournal oldJournal = resourceManager.getLiveJournal();

        // timestamp of the last commit on the old journal.
        lastCommitTime = oldJournal.getRootBlockView().getLastCommitTime();

        /*
         * This will be the aggregate of the net change in the btree performance
         * counters since the last overflow operation (sum across [delta]).
         * Together with the per-index partition performance counters, this is
         * used to decide which indices are "hot" and which are not.
         */
        final BTreeCounters totalCounters = new BTreeCounters();

        /*
         * Generate the metadata summaries of the index partitions that we will
         * need to process.
         */
        {

            // using read-historical view of Name2Addr
            final int numIndices = (int) oldJournal
                    .getName2Addr(lastCommitTime).rangeCount();

            views = new LinkedHashMap(numIndices);

            // the name2addr view as of the last commit time.
            final ITupleIterator itr = oldJournal.getName2Addr(lastCommitTime)
                    .rangeIterator();
            
            final Map delta = resourceManager
                    .markAndGetDelta();

            while (itr.hasNext()) {

                final ITuple tuple = itr.next();

                final Entry entry = EntrySerializer.INSTANCE
                        .deserialize(new DataInputBuffer(tuple.getValue()));

                /*
                 * Obtain the delta in the btree performance counters for this
                 * index partition since the last overflow.
                 */
                BTreeCounters btreeCounters = delta.get(entry.name);
                
                if (btreeCounters == null) {
                 
                    // use empty counters if none were reported (paranoia).
                    log.error("No performance counters? index=" + entry.name);

                    btreeCounters = new BTreeCounters();
                    
                }

                totalCounters.add(btreeCounters);
                
                views.put(entry.name, new ViewMetadata(resourceManager,
                        lastCommitTime, entry.name, btreeCounters));

            }

        }

        /*
         * Compute the "Score" for each index partition.
         */
        {

            final int nscores = views.size();

            this.scores = new Score[nscores];

            this.scoreMap = new HashMap(nscores);

            this.totalRawStore = totalCounters.computeRawWriteScore();

            if (nscores > 0) {

                final Iterator> itr = views
                        .entrySet().iterator();

                int i = 0;

                while (itr.hasNext()) {

                    final Map.Entry entry = itr.next();

                    final String name = entry.getKey();

                    final ViewMetadata vmd = entry.getValue();

                    final BTreeCounters btreeCounters = vmd.btreeCounters;
                    
                    scores[i] = new Score(name, btreeCounters, totalRawStore);

                    i++;

                }

                // sort into ascending order (increasing activity).
                Arrays.sort(scores);

                for (i = 0; i < scores.length; i++) {

                    scores[i].rank = i;

                    scores[i].drank = ((double) i) / scores.length;

                    scoreMap.put(scores[i].name, scores[i]);

                }

                if (log.isDebugEnabled()) {

                    log.debug("The most active index was: "
                            + scores[scores.length - 1]);

                    log.debug("The least active index was: " + scores[0]);

                }

            }

        }

    }

    /**
     * The #of active index partitions on this data service.
     */
    public int getActiveCount() {

        return scores.length;

    }

    /**
     * The scores in order from least active to most active.
     */
    public List getScores() {

        return Arrays.asList(scores);

    }

    /**
     * Return true if the named index partition is "warm" for
     * {@link ITx#UNISOLATED} and/or {@link ITx#READ_COMMITTED} operations.
     * 

     * Note: This method informs the selection of index partitions that will be
     * moved to another {@link IDataService}. The preference is always to move
     * an index partition that is "warm" rather than "hot" or "cold". Moving a
     * "hot" index partition causes more latency since more writes will have
     * been buffered and unisolated access to the index partition will be
     * suspended during the atomic part of the move operation. Likewise, "cold"
     * index partitions are not consuming any resources other than disk space
     * for their history, and the history of an index is not moved when the
     * index partition is moved.
     * 
     * Since the history of an index partition is not moved when the index
     * partition is moved, the determination of cold, warm or hot is made in
     * terms of the resources consumed by {@link ITx#READ_COMMITTED} and
     * {@link ITx#UNISOLATED} access to named index partitions. If an index
     * partition is hot for historical read, then your only choices are to shed
     * other index partitions from the data service, to read from a failover
     * data service having the same index partition, or possibly to increase the
     * replication count for the index partition.
     * 
     * @param name
     *            The name of an index partition.
     * 
     * @return The index {@link Score} -or- null iff the index
     *         was not touched for read-committed or unisolated operations.
     */
    public Score getScore(final String name) {

        final Score score = scoreMap.get(name);

        if (score == null) {

            /*
             * Index was not touched for read-committed or unisolated
             * operations.
             */

            if (log.isDebugEnabled())
                log.debug("Index is cold: " + name);

            return null;

        }

        if (log.isDebugEnabled())
            log.debug("Index score: " + score);

        return score;

    }

}