com.linkedin.dagli.dag.FastPreparedDAGExecutor Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of core Show documentation
DAG-oriented machine learning framework for bug-resistant, readable, efficient, maintainable and trivially deployable models in Java and other JVM languages
There is a newer version: 15.0.0-beta9
Show newest version
package com.linkedin.dagli.dag;

import com.linkedin.dagli.generator.Generator;
import com.linkedin.dagli.objectio.biglist.BigListWriter;
import com.linkedin.dagli.objectio.ObjectIterator;
import com.linkedin.dagli.objectio.ObjectReader;
import com.linkedin.dagli.objectio.ObjectWriter;
import com.linkedin.dagli.producer.Producer;
import com.linkedin.dagli.transformer.PreparedTransformer;
import it.unimi.dsi.fastutil.objects.ObjectBigArrayBigList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.Future;


/**
 * An executor designed for very efficient inference in a (prepared) DAG.  This executor cannot prepare preparable DAGs,
 * however.  Use {@link LocalDAGExecutor} if you want to combine the efficient inference of FastPreparedDAGExecutor and
 * the training of {@link MultithreadedDAGExecutor}.
 *
 * Note that a single example/row/input is always executed by {@link FastPreparedDAGExecutor} using one thread; that is,
 * inference for a single example is not multithreaded by the executor (of course, nodes in the DAG may still
 * multithread their processing, but this is independent of the executor).  This is almost always desirable.  However,
 * if you have a huge, expensive pipelined model, consider using {@link MultithreadedDAGExecutor}, which can execute
 * multiple nodes in the DAG at once.
 */
public final class FastPreparedDAGExecutor extends AbstractDAGExecutor {
  private static final long serialVersionUID = 1L;

  /**
   * By default, the executor will create no more than one thread per this many examples/rows provided as input.
   */
  public static final int DEFAULT_MIN_INPUTS_PER_THREAD = 128;

  private int _maxThreads = 1;
  private int _minInputsPerThread = DEFAULT_MIN_INPUTS_PER_THREAD;
  private int _maxMinibatchSize = 1024;
  private boolean _useCommonPool = true;

  /**
   * Returns a copy that will either use the common thread pool, {@link ForkJoinPool#commonPool()}, or a new pool, when
   * performing multithreaded execution.
   *
   * By default, the common pool is used; this is usually the best option as it avoids the cost of creating new threads
   * and thread pools and helps avoid "excessive concurrency" (more threads than logical cores).
   *
   * @param useCommonPool whether or not the common pool should be used
   * @return a copy of this instance that will use the common pool or not depending on the provided flag
   */
  public FastPreparedDAGExecutor withCommonThreadPool(boolean useCommonPool) {
    return clone(c -> c._useCommonPool = useCommonPool);
  }

  /**
   * Returns a copy of this executor that will use no more than the specified maximum number of threads.
   *
   * The default maximum number of threads is 1.
   *
   * @param maxThreads the maximum number of threads to use
   * @return a copy of this executor with the specified maximum number of threads
   */
  public FastPreparedDAGExecutor withMaxThreads(int maxThreads) {
    return clone(c -> c._maxThreads = maxThreads);
  }

  /**
   * Returns a copy of this executor that will require the specified minimum number of examples per thread.  The
   * executor will create no more than one thread per this many examples provided as input.
   *
   * The default minimum number of examples per thread is 128.
   *
   * @param minInputsPerThread the minimum number of examples per thread used
   * @return a copy of this executor with the specified minimum examples per thread
   */
  public FastPreparedDAGExecutor withMinInputsPerThread(int minInputsPerThread) {
    return clone(c -> c._minInputsPerThread = minInputsPerThread);
  }

  /**
   * Returns a copy of this executor that will limit the minibatch size to be no more than the specified value.
   *
   * The minibatch size used is normally the maximum of the preferred minibatch sizes of all the prepared transformers,
   * but it will be constrained to be no more than this limit.  As this substantially affects the memory required by the
   * executor, setting a lower limit may be beneficial in some cases.
   *
   * The default limit is a (rather generous) value of 1024.
   *
   * @param maxMinibatchSize the maximum minibatch size that will be allowed (though the actual minibatch size used may
   *                         be as small as 1 regardless of this value)
   * @return a copy of this executor that will limit the minibatch size to be no more than the specified value
   */
  public FastPreparedDAGExecutor withMaxMinibatchSize(int maxMinibatchSize) {
    return clone(c -> c._maxMinibatchSize = maxMinibatchSize);
  }

  /**
   * Creates a new {@link FastPreparedDAGExecutor}.
   */
  public FastPreparedDAGExecutor() { }

  @Override
  protected , T extends PreparableDAGTransformer> DAGExecutionResult prepareAndApplyUnsafeImpl(
      T dag, ObjectReader