All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.journal.WarmUpTask Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Apr 14, 2015
 */
package com.bigdata.journal;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;

import org.apache.log4j.Logger;

import com.bigdata.btree.BaseIndexStats;
import com.bigdata.btree.ICheckpointProtocol;
import com.bigdata.util.Bytes;
import com.bigdata.util.InnerCause;
import com.bigdata.util.concurrent.LatchedExecutor;

/**
 * Helper class to warm up the indices associated with various namespaces on the
 * journal.
 * 
 * @author bryan
 * 
 * @see  pre-heat the journal on
 *      startup 
 * 
 *      TODO The current implementation assigns one thread per index. However,
 *      it is also possible to use a reader pool to accelerate the index scans.
 *      This would require a change to the
 *      {@link com.bigdata.btree.AbstractBTree#dumpPages(boolean, boolean)}
 *      implementation to parallelize the IOs over the children of a node. We
 *      have experimented with that in the past but not observed a big win for
 *      query. It might be more significant for warmup since we are using a full
 *      scan of each index.
 */
public class WarmUpTask implements Callable> {

   private static final Logger log = Logger.getLogger(WarmUpTask.class);
   
   /**
    * The journal.
    */
   private final Journal journal;

   /**
    * A list of zero or more namespaces to be warmed up (optional). When
    * null or empty, all namespaces will be warmed up.
    */
   private final List namespaces;
   
   /**
    * The commit time to be warmed up and -or- {@link ITx#READ_COMMITTED} to
    * warm up the last commit point on the journal.
    */
   private final long timestamp;

   /**
    * The #of threads that will be used to scan the pages in the indices
    * associated with those namespaces (GTE ONE). The #of threads that will be
    * used to scan the pages in the indices associated with those namespaces
    * (GTE ONE). The indices will be scanned with one thread per index. This
    * parameter determines the #of such scans that will execute in parallel.
    * Since the thread will block on any IO, you need a modestly large number of
    * threads here to enqueue enough disk reads to drive enough IOPs for an
    * efficient disk scan. Even a slow disk should use at least 10 threads.
    */
   private final int nparallel;
   
   /**
    * when true the leaves of the indices will also be read.
    */
   private final boolean visitLeaves;

   /**
    * Used to collect statistics obtained as a side-effect of the warmup
    * procedure.
    * 
    * Note: This collection is ordered and is NOT thread-safe.
    */
   private final Map statsMap = new TreeMap();

   /**
    * 
    * @param journal
    *           The journal.
    * @param namespaces
    *           A list of zero or more namespaces to be warmed up (optional).
    *           When null or empty, all namespaces will be warmed
    *           up.
    * @param timestamp
    *           The commit time to be warmed up and -or-
    *           {@link ITx#READ_COMMITTED} to warm up the last commit point on
    *           the journal.
    * @param nparallel
    *           The #of threads that will be used to scan the pages in the
    *           indices associated with those namespaces (GTE ONE). The indices
    *           will be scanned with one thread per index. This parameter
    *           determines the #of such scans that will execute in parallel.
    *           Since the thread will block on any IO, you need a modestly large
    *           number of threads here to enqueue enough disk reads to drive
    *           enough IOPs for an efficient disk scan. Even a slow disk should
    *           use at least 10 threads.
    * @param visitLeaves
    *           when true the leaves of the indices will also be
    *           read (the recommended warm up procedure does not read the
    *           leaves).
    */
   public WarmUpTask(//
         final Journal journal,//
         final List namespaces, //
         final long timestamp,//
         final int nparallel,//
         final boolean visitLeaves//
   ) {
      if (journal == null)
         throw new IllegalArgumentException();
      if (nparallel < 1)
         throw new IllegalArgumentException();
      this.journal = journal;
      this.namespaces = namespaces;
      this.timestamp = timestamp;
      this.nparallel = nparallel;
      this.visitLeaves = visitLeaves;
   }

   @Override
   public Map call() throws Exception {

      final long begin = System.nanoTime();
      
      // Attempts to pin a view of the journal as of that timestamp.
      final long tx = journal.newTx(timestamp);

      try {

         final long readOnCommitTime = journal.getLocalTransactionManager()
               .getTx(tx).getReadsOnCommitTime();

         final ICommitRecord commitRecord = journal
               .getCommitRecord(readOnCommitTime);

         // Populate the tasks.
         final List> tasks = new LinkedList>();
         {
            // Scan the named indices.
            final Iterator nitr = journal.indexNameScan(
                  null/* prefix */, timestamp);

            while (nitr.hasNext()) {

               // a registered index.
               final String name = nitr.next();

               if (namespaces != null && !namespaces.isEmpty()) {

                  boolean found = false;

                  for (String namespace : namespaces) {

                     if (name.startsWith(namespace)) {

                        found = true;

                        break;

                     }

                  }

                  if (!found) {

                     // Skip this index. Not a desired namespace.
                     continue;

                  }

               }

               if (log.isInfoEnabled())
                  log.info("Will warm up index: name=" + name);
               
               tasks.add(new FutureTask(
                     new Callable() {
                        @Override
                        public BaseIndexStats call() throws Exception {
                           return warmUpIndex(name, commitRecord);
                        }
                     }));

            } // while(itr) (next index)

         }

         // Execute the tasks with limited parallelism.
         try {

            final LatchedExecutor executor = new LatchedExecutor(
                  journal.getExecutorService(), nparallel);

            // Enqueue tasks to run with limited parallelism.
            for (FutureTask ft : tasks) {
               executor.execute(ft);
            }

            // Get the futures, throwing out any errors.
            for (FutureTask ft : tasks) {
               // The statistics from scanning a single index.
               final BaseIndexStats stats = ft.get();
               /*
                * Add those statistics to our collection.
                * 
                * Note: collection is not thread safe, but this logic is single
                * threaded.
                */
               statsMap.put(stats.name, stats);
            }

         } finally {
            
            // Ensure tasks are cancelled.
            for (FutureTask ft : tasks) {
               ft.cancel(true/* mayInterruptIfRunning */);
            }

         }

         final long elapsed = System.nanoTime() - begin;
         
         if (log.isInfoEnabled()) {
            /*
             * Write out warmup statistics (summary and detail).
             */
            final StringWriter strw = new StringWriter(statsMap.size()
                  * Bytes.kilobyte32);
            strw.append("Warmed up: " + statsMap.size() + " indices in "
                  + TimeUnit.NANOSECONDS.toMillis(elapsed) + "ms using up to "
                  + nparallel + " threads.\n");
            final PrintWriter w = new PrintWriter(strw);
            BaseIndexStats.writeOn(w, statsMap);
            w.flush();
            log.info(strw.toString());
         }

         return Collections.unmodifiableMap(statsMap);

      } finally {

         journal.abort(tx);

      }

   }// call()

   /**
    * Warm up a single index.
    * 
    * @param name
    *           The name of the index.
    * @param commitRecord
    *           The commit record from which the index will be loaded.
    *           
    * @return The statistics obtained as a side-effect of warming up the index.
    */
   private BaseIndexStats warmUpIndex(final String name,
         final ICommitRecord commitRecord) {

      // load index from its checkpoint record.
      final ICheckpointProtocol ndx;
      try {

         ndx = journal.getIndexWithCommitRecord(name, commitRecord);

      } catch (Throwable t) {

         if (InnerCause.isInnerCause(t, ClassNotFoundException.class)) {

            /*
             * This is typically a tuple serializer that has a dependency on an
             * application class that is not present in the CLASSPATH. Add the
             * necessary dependency(s) and you should no longer see this
             * message.
             */

            log.warn("Could not load index: "
                  + InnerCause.getInnerCause(t, ClassNotFoundException.class));

            return null;

         } else
            throw new RuntimeException(t);

      }

      // show checkpoint record.
      if (log.isDebugEnabled())
         log.debug(ndx.getCheckpoint());

      /*
       * Collect statistics on the page usage for the index.
       */
      {

         final BaseIndexStats stats = ndx.dumpPages(true/* recursive */,
               visitLeaves);

         if (log.isInfoEnabled())
            log.info("name=" + name + ", stats=" + stats);

         return stats;

      }

   }

} // class WarmUpTask




© 2015 - 2025 Weber Informatics LLC | Privacy Policy