All Downloads are FREE. Search and download functionalities are using the official Maven repository.

water.H2O Maven / Gradle / Ivy

There is a newer version: 3.8.2.9
Show newest version
package water;

import com.brsanthu.googleanalytics.DefaultRequest;
import com.brsanthu.googleanalytics.GoogleAnalytics;

import jsr166y.CountedCompleter;
import jsr166y.ForkJoinPool;
import jsr166y.ForkJoinWorkerThread;

import org.apache.log4j.LogManager;
import org.apache.log4j.PropertyConfigurator;
import org.reflections.Reflections;

import java.io.File;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.RuntimeMXBean;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.net.InetAddress;
import java.net.MulticastSocket;
import java.net.NetworkInterface;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;

import water.UDPRebooted.ShutdownTsk;
import water.api.ModelCacheManager;
import water.api.RequestServer;
import water.exceptions.H2OFailException;
import water.exceptions.H2OIllegalArgumentException;
import water.init.AbstractBuildVersion;
import water.init.AbstractEmbeddedH2OConfig;
import water.init.JarHash;
import water.init.NetworkInit;
import water.init.NodePersistentStorage;
import water.nbhm.NonBlockingHashMap;
import water.persist.PersistManager;
import water.util.GAUtils;
import water.util.Log;
import water.util.OSUtils;
import water.util.PrettyPrint;

/**
* Start point for creating or joining an H2O Cloud.
*
* @author 
* @version 1.0
*/
final public class H2O {
  public static final String DEFAULT_JKS_PASS = "h2oh2o";

  //-------------------------------------------------------------------------------------------------------------------
  // Command-line argument parsing and help
  //-------------------------------------------------------------------------------------------------------------------

  /**
   * Print help about command line arguments.
   */
  public static void printHelp() {
    String defaultFlowDirMessage;
    if (DEFAULT_FLOW_DIR() == null) {
      // If you start h2o on Hadoop, you must set -flow_dir.
      // H2O doesn't know how to guess a good one.
      // user.home doesn't make sense.
      defaultFlowDirMessage =
      "          (The default is none; saving flows not available.)\n";
    }
    else {
      defaultFlowDirMessage =
      "          (The default is '" + DEFAULT_FLOW_DIR() + "'.)\n";
    }

    String s =
            "\n" +
            "Usage:  java [-Xmx] -jar h2o.jar [options]\n" +
            "        (Note that every option has a default and is optional.)\n" +
            "\n" +
            "    -h | -help\n" +
            "          Print this help.\n" +
            "\n" +
            "    -version\n" +
            "          Print version info and exit.\n" +
            "\n" +
            "    -name \n" +
            "          Cloud name used for discovery of other nodes.\n" +
            "          Nodes with the same cloud name will form an H2O cloud\n" +
            "          (also known as an H2O cluster).\n" +
            "\n" +
            "    -flatfile \n" +
            "          Configuration file explicitly listing H2O cloud node members.\n" +
            "\n" +
            "    -ip \n" +
            "          IP address of this node.\n" +
            "\n" +
            "    -port \n" +
            "          Port number for this node (note: port+1 is also used).\n" +
            "          (The default port is " + ARGS.port + ".)\n" +
            "\n" +
            "    -network [, ...]\n" +
            "          The IP address discovery code will bind to the first interface\n" +
            "          that matches one of the networks in the comma-separated list.\n" +
            "          Use instead of -ip when a broad range of addresses is legal.\n" +
            "          (Example network specification: '10.1.2.0/24' allows 256 legal\n" +
            "          possibilities.)\n" +
            "\n" +
            "    -ice_root \n" +
            "          The directory where H2O spills temporary data to disk.\n" +
            "\n" +
            "    -log_dir \n" +
            "          The directory where H2O writes logs to disk.\n" +
            "          (This usually has a good default that you need not change.)\n" +
            "\n" +
            "    -log_level \n" +
            "          Write messages at this logging level, or above.  Default is INFO." +
            "\n" +
            "\n" +
            "    -flow_dir \n" +
            "          The directory where H2O stores saved flows.\n" +
            defaultFlowDirMessage +
            "\n" +
            "    -nthreads <#threads>\n" +
            "          Maximum number of threads in the low priority batch-work queue.\n" +
            "          (The default is 99.)\n" +
            "\n" +
            "    -client\n" +
            "          Launch H2O node in client mode.\n" +
            "\n" +
            "Authentication options:\n" +
            "\n" +
            "    -jks \n" +
            "          Java keystore file\n" +
            "\n" +
            "    -jks_pass \n" +
            "          (Default is '" + DEFAULT_JKS_PASS + "')\n" +
            "\n" +
            "    -hash_login\n" +
            "          Use Jetty HashLoginService\n" +
            "\n" +
            "    -ldap_login\n" +
            "          Use Jetty LdapLoginService\n" +
            "\n" +
            "    -login_conf \n" +
            "          LoginService configuration file\n" +
            "\n" +
            "Cloud formation behavior:\n" +
            "\n" +
            "    New H2O nodes join together to form a cloud at startup time.\n" +
            "    Once a cloud is given work to perform, it locks out new members\n" +
            "    from joining.\n" +
            "\n" +
            "Examples:\n" +
            "\n" +
            "    Start an H2O node with 4GB of memory and a default cloud name:\n" +
            "        $ java -Xmx4g -jar h2o.jar\n" +
            "\n" +
            "    Start an H2O node with 6GB of memory and a specify the cloud name:\n" +
            "        $ java -Xmx6g -jar h2o.jar -name MyCloud\n" +
            "\n" +
            "    Start an H2O cloud with three 2GB nodes and a default cloud name:\n" +
            "        $ java -Xmx2g -jar h2o.jar &\n" +
            "        $ java -Xmx2g -jar h2o.jar &\n" +
            "        $ java -Xmx2g -jar h2o.jar &\n" +
            "\n";

    System.out.print(s);

    for (AbstractH2OExtension e : H2O.getExtensions()) {
      e.printHelp();
    }
  }

  /**
   * Singleton ARGS instance that contains the processed arguments.
   */
  public static final OptArgs ARGS = new OptArgs();

  /**
   * A class containing all of the arguments for H2O.
   */
  public static class
    OptArgs {
    //-----------------------------------------------------------------------------------
    // Help and info
    //-----------------------------------------------------------------------------------
    /** -help, -help=true; print help and exit*/
    public boolean help = false;

    /** -version, -version=true; print version and exit */
    public boolean version = false;

    //-----------------------------------------------------------------------------------
    // Clouding
    //-----------------------------------------------------------------------------------
    /** -name=name; Set cloud name */
    public String name = System.getProperty("user.name"); // Cloud name

    /** -flatfile=flatfile; Specify a list of cluster IP addresses */
    public String flatfile;

    /** -port=####; Specific Browser/API/HTML port */
    public int port;

    /** -baseport=####; Port to start upward searching from. */
    public int baseport = 54321;

    /** -web_ip=ip4_or_ip6; IP used for web server. By default it listen to all interfaces. */
    public String web_ip = null;

    /** -ip=ip4_or_ip6; Named IP4/IP6 address instead of the default */
    public String ip;

    /** -network=network; Network specification for acceptable interfaces to bind to */
    public String network;

    /** -client, -client=true; Client-only; no work; no homing of Keys (but can cache) */
    public boolean client;

    /** -user_name=user_name; Set user name */
    public String user_name = System.getProperty("user.name");

    //-----------------------------------------------------------------------------------
    // Node configuration
    //-----------------------------------------------------------------------------------
    /** -ice_root=ice_root; ice root directory; where temp files go */
    public String ice_root;

    /** -cleaner; enable user-mode spilling of big data to disk in ice_root */
    public boolean cleaner = false;

    /** -nthreads=nthreads; Max number of F/J threads in the low-priority batch queue */
    public char nthreads= (char)Runtime.getRuntime().availableProcessors();

    /** -log_dir=/path/to/dir; directory to save logs in */
    public String log_dir;

    /** -flow_dir=/path/to/dir; directory to save flows in */
    public String flow_dir;

    /** -disable_web; disable web API port (used by Sparkling Water) */
    public boolean disable_web = false;

    //-----------------------------------------------------------------------------------
    // HDFS & AWS
    //-----------------------------------------------------------------------------------
    /** -hdfs_config=hdfs_config; configuration file of the HDFS */
    public String hdfs_config = null;

    /** -hdfs_skip=hdfs_skip; used by Hadoop driver to not unpack and load any HDFS jar file at runtime. */
    public boolean hdfs_skip = false;

    /** -aws_credentials=aws_credentials; properties file for aws credentials */
    public String aws_credentials = null;

    /** --ga_hadoop_ver=ga_hadoop_ver; Version string for Hadoop */
    public String ga_hadoop_ver = null;

    /** --ga_opt_out; Turns off usage reporting to Google Analytics  */
    public boolean ga_opt_out = false;

    //-----------------------------------------------------------------------------------
    // Authentication
    //-----------------------------------------------------------------------------------
    /** -jks is Java KeyStore file on local filesystem */
    public String jks = null;

    /** -jks_pass is Java KeyStore password; default is 'h2oh2o' */
    public String jks_pass = DEFAULT_JKS_PASS;

    /** -hash_login enables HashLoginService */
    public boolean hash_login = false;

    /** -ldap_login enables LdapLoginService */
    public boolean ldap_login = false;

    /** -login_conf is login configuration service file on local filesystem */
    public String login_conf = null;

    //-----------------------------------------------------------------------------------
    // Debugging
    //-----------------------------------------------------------------------------------
    /** -log_level=log_level; One of DEBUG, INFO, WARN, ERRR.  Default is INFO. */
    public String log_level;

    /** -random_udp_drop, -random_udp_drop=true; test only, randomly drop udp incoming */
    public boolean random_udp_drop;

    /** -md5skip, -md5skip=true; test-only; Skip the MD5 Jar checksum; allows jars from different builds to mingle in the same cloud */
    public boolean md5skip = false;

    /** -quiet Enable quiet mode and avoid any prints to console, useful for client embedding */
    public boolean quiet = false;

    public boolean useUDP = false;

    @Override public String toString() {
      StringBuilder result = new StringBuilder();

      //determine fields declared in this class only (no fields of superclass)
      Field[] fields = this.getClass().getDeclaredFields();

      //print field names paired with their values
      result.append("[ ");
      for (Field field : fields) {
        try {
          result.append(field.getName());
          result.append(": ");
          //requires access to private field:
          result.append(field.get(this));
          result.append(", ");
        }
        catch (IllegalAccessException ex) {
          Log.err(ex);
        }
      }
      result.deleteCharAt(result.length() - 2);
      result.deleteCharAt(result.length() - 1);
      result.append(" ]");

      return result.toString();
    }

    /**
     * Whether this H2O instance was launched on hadoop (using 'hadoop jar h2odriver.jar') or not.
     */
    public boolean launchedWithHadoopJar() {
      return hdfs_skip;
    }
  }

  public static void parseFailed(String message) {
    System.out.println("");
    System.out.println("ERROR: " + message);
    System.out.println("");
    printHelp();
    H2O.exit(1);
  }

  public static class OptString {
    final String _s;
    String _lastMatchedFor;
    public OptString(String s) {
      _s = s;
    }
    public boolean matches(String s) {
      _lastMatchedFor = s;
      if (_s.equals("-"  + s)) return true;
      if (_s.equals("--" + s)) return true;
      return false;
    }

    public int incrementAndCheck(int i, String[] args) {
      i = i + 1;
      if (i >= args.length) parseFailed(_lastMatchedFor + " not specified");
      return i;
    }

    public int parseInt(String a) {
      try { return Integer.parseInt(a); }
      catch (Exception e) { }
      parseFailed("Argument " + _lastMatchedFor + " must be an integer (was given '" + a + "')" );
      return 0;
    }
    @Override public String toString() { return _s; }
  }


  /**
   * Dead stupid argument parser.
   */
  private static void parseArguments(String[] args) {
    for (AbstractH2OExtension e : H2O.getExtensions()) {
      args = e.parseArguments(args);
    }

    for (int i = 0; i < args.length; i++) {
      OptString s = new OptString(args[i]);
      if (s.matches("h") || s.matches("help")) {
        ARGS.help = true;
      }
      else if (s.matches("version")) {
        ARGS.version = true;
      }
      else if (s.matches("name")) {
        i = s.incrementAndCheck(i, args);
        ARGS.name = args[i];
      }
      else if (s.matches("flatfile")) {
        i = s.incrementAndCheck(i, args);
        ARGS.flatfile = args[i];
      }
      else if (s.matches("port")) {
        i = s.incrementAndCheck(i, args);
        ARGS.port = s.parseInt(args[i]);
      }
      else if (s.matches("baseport")) {
        i = s.incrementAndCheck(i, args);
        ARGS.baseport = s.parseInt(args[i]);
      }
      else if (s.matches("ip")) {
        i = s.incrementAndCheck(i, args);
        ARGS.ip = args[i];
      }
      else if (s.matches("web_ip")) {
        i = s.incrementAndCheck(i, args);
        ARGS.web_ip = args[i];
      }
      else if (s.matches("network")) {
        i = s.incrementAndCheck(i, args);
        ARGS.network = args[i];
      }
      else if (s.matches("client")) {
        ARGS.client = true;
      }
      else if (s.matches("user_name")) {
        i = s.incrementAndCheck(i, args);
        ARGS.user_name = args[i];
      }
      else if (s.matches("ice_root")) {
        i = s.incrementAndCheck(i, args);
        ARGS.ice_root = args[i];
      }
      else if (s.matches("log_dir")) {
        i = s.incrementAndCheck(i, args);
        ARGS.log_dir = args[i];
      }
      else if (s.matches("flow_dir")) {
        i = s.incrementAndCheck(i, args);
        ARGS.flow_dir = args[i];
      }
      else if (s.matches("disable_web")) {
        ARGS.disable_web = true;
      }
      else if (s.matches("nthreads")) {
        i = s.incrementAndCheck(i, args);
        int nthreads = s.parseInt(args[i]);
        if (nthreads >= 1) //otherwise keep default (all cores)
          ARGS.nthreads = (char) nthreads;
      }
      else if (s.matches("hdfs_config")) {
        i = s.incrementAndCheck(i, args);
        ARGS.hdfs_config = args[i];
      }
      else if (s.matches("hdfs_skip")) {
        ARGS.hdfs_skip = true;
      }
      else if (s.matches("aws_credentials")) {
        i = s.incrementAndCheck(i, args);
        ARGS.aws_credentials = args[i];
      }
      else if (s.matches("ga_hadoop_ver")) {
        i = s.incrementAndCheck(i, args);
        ARGS.ga_hadoop_ver = args[i];
      }
      else if (s.matches("ga_opt_out")) {
        // JUnits pass this as a system property, but it usually a flag without an arg
        if (i+1 < args.length && args[i+1].equals("yes")) i++;
        ARGS.ga_opt_out = true;
      }
      else if (s.matches("log_level")) {
        i = s.incrementAndCheck(i, args);
        ARGS.log_level = args[i];
      }
      else if (s.matches("random_udp_drop")) {
        ARGS.random_udp_drop = true;
      }
      else if (s.matches("md5skip")) {
        ARGS.md5skip = true;
      }
      else if (s.matches("quiet")) {
        ARGS.quiet = true;
      }
      else if(s.matches("useUDP")) {
        i = s.incrementAndCheck(i, args);
        ARGS.useUDP = true;
      }
      else if(s.matches("cleaner")) {
        i = s.incrementAndCheck(i, args);
        ARGS.cleaner = true;
      }
      else if (s.matches("jks")) {
        i = s.incrementAndCheck(i, args);
        ARGS.jks = args[i];
      }
      else if (s.matches("jks_pass")) {
        i = s.incrementAndCheck(i, args);
        ARGS.jks_pass = args[i];
      }
      else if (s.matches("hash_login")) {
        ARGS.hash_login = true;
      }
      else if (s.matches("ldap_login")) {
        ARGS.ldap_login = true;
      }
      else if (s.matches("login_conf")) {
        i = s.incrementAndCheck(i, args);
        ARGS.login_conf = args[i];
      }
      else {
        parseFailed("Unknown argument (" + s + ")");
      }
    }
  }

  private static void validateArguments() {
    if (ARGS.jks != null) {
      if (! new File(ARGS.jks).exists()) {
        parseFailed("File does not exist: " + ARGS.jks);
      }
    }

    if (ARGS.login_conf != null) {
      if (! new File(ARGS.login_conf).exists()) {
        parseFailed("File does not exist: " + ARGS.login_conf);
      }
    }

    if (ARGS.hash_login && ARGS.ldap_login) {
      parseFailed("Can only specify one of -hash_login and -ldap_login");
    }

    if (ARGS.hash_login || ARGS.ldap_login) {
      if (H2O.ARGS.login_conf == null) {
        parseFailed("Must specify -login_conf argument");
      }
    }

    // Validate extension arguments
    for (AbstractH2OExtension e : H2O.getExtensions()) {
      e.validateArguments();
    }
  }

  // Model cache manager
  public static ModelCacheManager getMCM() { return new ModelCacheManager(); }

  // Google analytics performance measurement
  public static GoogleAnalytics GA;
  public static int CLIENT_TYPE_GA_CUST_DIM = 1;
  public static int CLIENT_ID_GA_CUST_DIM = 2;

  //-------------------------------------------------------------------------------------------------------------------
  // Embedded configuration for a full H2O node to be implanted in another
  // piece of software (e.g. Hadoop mapper task).
  //-------------------------------------------------------------------------------------------------------------------

  public static volatile AbstractEmbeddedH2OConfig embeddedH2OConfig;

  /**
   * Register embedded H2O configuration object with H2O instance.
   */
  public static void setEmbeddedH2OConfig(AbstractEmbeddedH2OConfig c) { embeddedH2OConfig = c; }
  public static AbstractEmbeddedH2OConfig getEmbeddedH2OConfig() { return embeddedH2OConfig; }

  /**
   * Tell the embedding software that this H2O instance belongs to
   * a cloud of a certain size.
   * This may be non-blocking.
   *
   * @param ip IP address this H2O can be reached at.
   * @param port Port this H2O can be reached at (for REST API and browser).
   * @param size Number of H2O instances in the cloud.
   */
  public static void notifyAboutCloudSize(InetAddress ip, int port, int size) {
    if (embeddedH2OConfig == null) { return; }
    embeddedH2OConfig.notifyAboutCloudSize(ip, port, size);
  }


  public static void closeAll() {
    try { NetworkInit._udpSocket.close(); } catch( IOException ignore ) { }
    try { H2O.getJetty().stop(); } catch( Exception ignore ) { }
    try { NetworkInit._tcpSocket.close(); } catch( IOException ignore ) { }
    PersistManager PM = H2O.getPM();
    if( PM != null ) PM.getIce().cleanUp();
  }


  /** Notify embedding software instance H2O wants to exit.  Shuts down a single Node.
   *  @param status H2O's requested process exit value.
   */
  public static void exit(int status) {
    // Embedded H2O path (e.g. inside Hadoop mapper task).
    if( embeddedH2OConfig != null )
      embeddedH2OConfig.exit(status);

    // Standalone H2O path,p or if the embedded config does not exit
    System.exit(status);
  }

  /** Cluster shutdown itself by sending a shutdown UDP packet. */
  public static void shutdown(int status) {
    if(status == 0) H2O.orderlyShutdown();
    UDPRebooted.T.error.send(H2O.SELF);
    H2O.exit(status);
  }

  public static int orderlyShutdown() {
    return orderlyShutdown(-1);
  }
  public static int orderlyShutdown(int timeout) {
    boolean [] confirmations = new boolean[H2O.CLOUD.size()];
    if (H2O.SELF.index() >= 0) { // Do not wait for clients to shutdown
      confirmations[H2O.SELF.index()] = true;
    }
    Futures fs = new Futures();
    for(H2ONode n:H2O.CLOUD._memary) {
      if(n != H2O.SELF)
        fs.add(new RPC(n, new ShutdownTsk(H2O.SELF,n.index(), 1000, confirmations)).call());
    }
    if(timeout > 0)
      try { Thread.sleep(timeout); }
      catch (Exception ignore) {}
    else fs.blockForPending(); // todo, should really have block for pending with a timeout

    int failedToShutdown = 0;
    // shutdown failed
    for(boolean b:confirmations)
      if(!b) failedToShutdown++;
    return failedToShutdown;
  }
  private static volatile boolean _shutdownRequested = false;

  public static void requestShutdown() {
    _shutdownRequested = true;
  }

  public static boolean getShutdownRequested() {
    return _shutdownRequested;
  }

  //-------------------------------------------------------------------------------------------------------------------

  public static final AbstractBuildVersion ABV;
  static {
    AbstractBuildVersion abv = AbstractBuildVersion.UNKNOWN_VERSION;
    try {
      Class klass = Class.forName("water.init.BuildVersion");
      java.lang.reflect.Constructor constructor = klass.getConstructor();
      abv = (AbstractBuildVersion) constructor.newInstance();
    } catch (Exception ignore) { }
    ABV = abv;
  }

  //-------------------------------------------------------------------------------------------------------------------

  private static boolean _haveInheritedLog4jConfiguration = false;
  public static boolean haveInheritedLog4jConfiguration() {
    return _haveInheritedLog4jConfiguration;
  }

  public static void configureLogging() {
    if (LogManager.getCurrentLoggers().hasMoreElements()) {
      _haveInheritedLog4jConfiguration = true;
      return;
    }

    // Disable logging from a few specific classes at startup.
    // (These classes may (or may not) be re-enabled later on.)
    //
    // The full logger initialization is done by setLog4jProperties() in class water.util.Log.
    // The trick is the output path / file isn't known until the H2O API PORT is chosen,
    // so real logger initialization has to happen somewhat late in the startup lifecycle.
    java.util.Properties p = new java.util.Properties();
    p.setProperty("log4j.logger.org.reflections.Reflections", "WARN");
    p.setProperty("log4j.logger.org.eclipse.jetty", "WARN");
    PropertyConfigurator.configure(p);
    System.setProperty("org.eclipse.jetty.LEVEL", "WARN");

    // Log jetty stuff to stdout for now.
    // TODO:  figure out how to wire this into log4j.
    System.setProperty("org.eclipse.jetty.util.log.class", "org.eclipse.jetty.util.log.StrErrLog");
  }

  //-------------------------------------------------------------------------------------------------------------------

  // Be paranoid and check that this doesn't happen twice.
  private static boolean extensionsRegistered = false;
  private static long registerExtensionsMillis = 0;

  /**
   * Register H2O extensions.
   * 

* Use reflection to find all classes that inherit from water.AbstractH2OExtension * and call H2O.addExtension() for each. */ public static void registerExtensions() { if (extensionsRegistered) { throw H2O.fail("Extensions already registered"); } long before = System.currentTimeMillis(); // Disallow schemas whose parent is in another package because it takes ~4s to do the getSubTypesOf call. String[] packages = new String[]{"water", "hex"}; for (String pkg : packages) { Reflections reflections = new Reflections(pkg); for (Class registerClass : reflections.getSubTypesOf(water.AbstractH2OExtension.class)) { if (!Modifier.isAbstract(registerClass.getModifiers())) { try { Object instance = registerClass.newInstance(); water.AbstractH2OExtension e = (water.AbstractH2OExtension) instance; H2O.addExtension(e); } catch (Exception e) { throw H2O.fail(e.toString()); } } } } for (AbstractH2OExtension e : H2O.getExtensions()) { e.init(); } extensionsRegistered = true; registerExtensionsMillis = System.currentTimeMillis() - before; } private static ArrayList extensions = new ArrayList<>(); public static void addExtension(AbstractH2OExtension e) { extensions.add(e); } public static ArrayList getExtensions() { return extensions; } //------------------------------------------------------------------------------------------------------------------- // Be paranoid and check that this doesn't happen twice. private static boolean apisRegistered = false; /** * Register REST API routes. * * Use reflection to find all classes that inherit from water.api.AbstractRegister * and call the register() method for each. * * @param relativeResourcePath Relative path from running process working dir to find web resources. */ public static void registerRestApis(String relativeResourcePath) { if (apisRegistered) { throw H2O.fail("APIs already registered"); } // Log extension registrations here so the message is grouped in the right spot. for (AbstractH2OExtension e : H2O.getExtensions()) { e.printInitialized(); } Log.info("Registered " + H2O.getExtensions().size() + " extensions in: " + registerExtensionsMillis + "mS"); long before = System.currentTimeMillis(); // Disallow schemas whose parent is in another package because it takes ~4s to do the getSubTypesOf call. String[] packages = new String[] { "water", "hex" }; for (String pkg : packages) { Reflections reflections = new Reflections(pkg); Log.debug("Registering REST APIs for package: " + pkg); for (Class registerClass : reflections.getSubTypesOf(water.api.AbstractRegister.class)) { if (!Modifier.isAbstract(registerClass.getModifiers())) { try { Log.debug("Found REST API registration for class: " + registerClass.getName()); Object instance = registerClass.newInstance(); water.api.AbstractRegister r = (water.api.AbstractRegister) instance; r.register(relativeResourcePath); } catch (Exception e) { throw H2O.fail(e.toString()); } } } } apisRegistered = true; long registerApisMillis = System.currentTimeMillis() - before; Log.info("Registered: " + RequestServer.numRoutes() + " REST APIs in: " + registerApisMillis + "mS"); } //------------------------------------------------------------------------------------------------------------------- public static class AboutEntry { private String name; private String value; public String getName() { return name; } public String getValue() { return value; } AboutEntry(String n, String v) { name = n; value = v; } } private static ArrayList aboutEntries = new ArrayList<>(); @SuppressWarnings("unused") public static void addAboutEntry(String name, String value) { AboutEntry e = new AboutEntry(name, value); aboutEntries.add(e); } @SuppressWarnings("unused") public static ArrayList getAboutEntries() { return aboutEntries; } //------------------------------------------------------------------------------------------------------------------- private static AtomicLong nextModelNum = new AtomicLong(0); /** * Calculate a unique model id that includes User-Agent info (if it can be discovered). * For the user agent info to be discovered, this needs to be called from a Jetty thread. * * This lets us distinguish models created from R vs. other front-ends, for example. * At some future point, it could make sense to include a sessionId here. * * The algorithm is: * descModel_[userAgentPrefixIfKnown_]cloudId_monotonicallyIncreasingInteger * * Right now because of the way the REST API works, a bunch of numbers are created and * thrown away. So the values are monotonically increasing but not contiguous. * * @param desc Model description. * @return The suffix. */ synchronized public static String calcNextUniqueModelId(String desc) { StringBuilder sb = new StringBuilder(); sb.append(desc).append("_model_"); // Append user agent string if we can figure it out. String source = JettyHTTPD.getUserAgent(); if (source != null) { StringBuilder ua = new StringBuilder(); if (source.contains("Safari")) { ua.append("safari"); } else if (source.contains("Python")) { ua.append("python"); } else { for (int i = 0; i < source.length(); i++) { char c = source.charAt(i); if (c >= 'a' && c <= 'z') { ua.append(c); continue; } else if (c >= 'A' && c <= 'Z') { ua.append(c); continue; } break; } } if (ua.toString().length() > 0) { sb.append(ua.toString()).append("_"); } } // REST API needs some refactoring to avoid burning lots of extra numbers. // // I actually tried only doing the addAndGet only for POST requests (and junk UUID otherwise), // but that didn't eliminate the gaps. long n = nextModelNum.addAndGet(1); sb.append(Long.toString(CLUSTER_ID)).append("_").append(Long.toString(n)); return sb.toString(); } //------------------------------------------------------------------------------------------------------------------- // Atomically set once during startup. Guards against repeated startups. public static final AtomicLong START_TIME_MILLIS = new AtomicLong(); // When did main() run // Used to gate default worker threadpool sizes public static final int NUMCPUS = Runtime.getRuntime().availableProcessors(); // Best-guess process ID public static long PID = -1L; /** * Throw an exception that will cause the request to fail, but the cluster to continue. * @see #fail(String, Throwable) * @return never returns */ public static H2OIllegalArgumentException unimpl() { return new H2OIllegalArgumentException("unimplemented"); } /** * Throw an exception that will cause the request to fail, but the cluster to continue. * @see #unimpl(String) * @see #fail(String, Throwable) * @return never returns */ public static H2OIllegalArgumentException unimpl(String msg) { return new H2OIllegalArgumentException("unimplemented: " + msg); } /** * H2O.fail is intended to be used in code where something should never happen, and if * it does it's a coding error that needs to be addressed immediately. Examples are: * AutoBuffer serialization for an object you're trying to serialize isn't available; * there's a typing error on your schema; your switch statement didn't cover all the AST * subclasses available in Rapids. *

* It should *not* be used when only the single request should fail, it should *only* be * used if the error means that someone needs to go add some code right away. * * @param msg Message to Log.fatal() * @param cause Optional cause exception to Log.fatal() * @return never returns; calls System.exit(-1) */ public static H2OFailException fail(String msg, Throwable cause) { Log.fatal(msg); if (null != cause) Log.fatal(cause); Log.fatal("Stacktrace: "); Log.fatal(Arrays.toString(Thread.currentThread().getStackTrace())); H2O.shutdown(-1); // unreachable return new H2OFailException(msg); } /** * @see #fail(String, Throwable) * @return never returns */ public static H2OFailException fail() { return H2O.fail("Unknown code failure"); } /** * @see #fail(String, Throwable) * @return never returns */ public static H2OFailException fail(String msg) { return H2O.fail(msg, null); } /** * Return an error message with an accompanying URL to help the user get more detailed information. * * @param number H2O tech note number. * @param message Message to present to the user. * @return A longer message including a URL. */ public static String technote(int number, String message) { StringBuffer sb = new StringBuffer() .append(message) .append("\n") .append("\n") .append("For more information visit:\n") .append(" http://jira.h2o.ai/browse/TN-").append(Integer.toString(number)); return sb.toString(); } /** * Return an error message with an accompanying list of URLs to help the user get more detailed information. * * @param numbers H2O tech note numbers. * @param message Message to present to the user. * @return A longer message including a list of URLs. */ public static String technote(int[] numbers, String message) { StringBuffer sb = new StringBuffer() .append(message) .append("\n") .append("\n") .append("For more information visit:\n"); for (int number : numbers) { sb.append(" http://jira.h2o.ai/browse/TN-").append(Integer.toString(number)).append("\n"); } return sb.toString(); } // -------------------------------------------------------------------------- // The worker pools - F/J pools with different priorities. // These priorities are carefully ordered and asserted for... modify with // care. The real problem here is that we can get into cyclic deadlock // unless we spawn a thread of priority "X+1" in order to allow progress // on a queue which might be flooded with a large number of "<=X" tasks. // // Example of deadlock: suppose TaskPutKey and the Invalidate ran at the same // priority on a 2-node cluster. Both nodes flood their own queues with // writes to unique keys, which require invalidates to run on the other node. // Suppose the flooding depth exceeds the thread-limit (e.g. 99); then each // node might have all 99 worker threads blocked in TaskPutKey, awaiting // remote invalidates - but the other nodes' threads are also all blocked // awaiting invalidates! // // We fix this by being willing to always spawn a thread working on jobs at // priority X+1, and guaranteeing there are no jobs above MAX_PRIORITY - // i.e., jobs running at MAX_PRIORITY cannot block, and when those jobs are // done, the next lower level jobs get unblocked, etc. public static final byte MAX_PRIORITY = Byte.MAX_VALUE-1; public static final byte ACK_ACK_PRIORITY = MAX_PRIORITY-0; //126 public static final byte FETCH_ACK_PRIORITY = MAX_PRIORITY-1; //125 public static final byte ACK_PRIORITY = MAX_PRIORITY-2; //124 public static final byte DESERIAL_PRIORITY = MAX_PRIORITY-3; //123 public static final byte INVALIDATE_PRIORITY = MAX_PRIORITY-3; //123 public static final byte GET_KEY_PRIORITY = MAX_PRIORITY-4; //122 public static final byte PUT_KEY_PRIORITY = MAX_PRIORITY-5; //121 public static final byte ATOMIC_PRIORITY = MAX_PRIORITY-6; //120 public static final byte GUI_PRIORITY = MAX_PRIORITY-7; //119 public static final byte MIN_HI_PRIORITY = MAX_PRIORITY-7; //119 public static final byte MIN_PRIORITY = 0; // F/J threads that remember the priority of the last task they started // working on. // made public for ddply public static class FJWThr extends ForkJoinWorkerThread { public int _priority; FJWThr(ForkJoinPool pool) { super(pool); _priority = ((PrioritizedForkJoinPool)pool)._priority; setPriority( _priority == Thread.MIN_PRIORITY ? Thread.NORM_PRIORITY-1 : Thread. MAX_PRIORITY-1 ); setName("FJ-"+_priority+"-"+getPoolIndex()); } } // Factory for F/J threads, with cap's that vary with priority. static class FJWThrFact implements ForkJoinPool.ForkJoinWorkerThreadFactory { private final int _cap; FJWThrFact( int cap ) { _cap = cap; } @Override public ForkJoinWorkerThread newThread(ForkJoinPool pool) { int cap = _cap==-1 ? 4 * NUMCPUS : _cap; return pool.getPoolSize() <= cap ? new FJWThr(pool) : null; } } // A standard FJ Pool, with an expected priority level. private static class PrioritizedForkJoinPool extends ForkJoinPool { final int _priority; private PrioritizedForkJoinPool(int p, int cap) { super((ARGS.nthreads <= 0) ? NUMCPUS : ARGS.nthreads, new FJWThrFact(cap), null, p T submitTask( T task ) { int priority = task.priority(); if( priority < LOW_PRIORITY_API_WORK ) LOW_PRIORITY_API_WORK_CLASS = task.getClass().toString(); assert MIN_PRIORITY <= priority && priority <= MAX_PRIORITY:"priority " + priority + " is out of range, expected range is < " + MIN_PRIORITY + "," + MAX_PRIORITY + ">"; if( FJPS[priority]==null ) synchronized( H2O.class ) { if( FJPS[priority] == null ) FJPS[priority] = new PrioritizedForkJoinPool(priority,-1); } FJPS[priority].submit(task); return task; } public static abstract class H2OFuture implements Future { public final T getResult(){ try { return get(); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } } } /** Simple wrapper over F/J {@link CountedCompleter} to support priority * queues. F/J queues are simple unordered (and extremely light weight) * queues. However, we frequently need priorities to avoid deadlock and to * promote efficient throughput (e.g. failure to respond quickly to {@link * TaskGetKey} can block an entire node for lack of some small piece of * data). So each attempt to do lower-priority F/J work starts with an * attempt to work and drain the higher-priority queues. */ public static abstract class H2OCountedCompleter extends CountedCompleter implements Cloneable, Freezable { @Override public byte [] asBytes(){return new AutoBuffer().put(this).buf();} @Override public T reloadFromBytes(byte [] ary){ return read(new AutoBuffer(ary));} private /*final*/ byte _priority; // Without a completer, we expect this task will be blocked on - so the // blocking thread is not available in the current thread pool, so the // launched task needs to run at a higher priority. public H2OCountedCompleter( ) { this(null); } // With a completer, this task will NOT be blocked on and the the current // thread is available for executing it... so the priority can remain at // the current level. static private byte computePriority( H2OCountedCompleter completer ) { int currThrPrior = currThrPriority(); // If there's no completer, then current thread will block on this task // at the current priority, possibly filling up the current-priority // thread pool - so the task has to run at the next higher priority. if( completer == null ) return (byte)(currThrPrior+1); // With a completer - no thread blocks on this task, so no thread pool // gets filled-up with blocked threads. We can run at the current // priority (or the completer's priority if it's higher). return (byte)Math.max(currThrPrior,completer.priority()); } protected H2OCountedCompleter(H2OCountedCompleter completer) { this(completer,computePriority(completer)); } // Special for picking GUI priorities protected H2OCountedCompleter( byte prior ) { this(null,prior); } protected H2OCountedCompleter(H2OCountedCompleter completer, byte prior) { super(completer); _priority = prior; } /** Used by the F/J framework internally to do work. Once per F/J task, * drain the high priority queue before doing any low priority work. * Calls {@link #compute2} which contains actual work. */ @Override public final void compute() { FJWThr t = (FJWThr)Thread.currentThread(); int pp = ((PrioritizedForkJoinPool)t.getPool())._priority; // Drain the high priority queues before the normal F/J queue H2OCountedCompleter h2o = null; boolean set_t_prior = false; try { assert priority() == pp:" wrong priority for task " + getClass().getSimpleName() + ", expected " + priority() + ", but got " + pp; // Job went to the correct queue? assert t._priority <= pp; // Thread attempting the job is only a low-priority? final int p2 = Math.max(pp,MIN_HI_PRIORITY); for( int p = MAX_PRIORITY; p > p2; p-- ) { if( FJPS[p] == null ) continue; h2o = FJPS[p].poll2(); if( h2o != null ) { // Got a hi-priority job? t._priority = p; // Set & do it now! t.setPriority(Thread.MAX_PRIORITY-1); set_t_prior = true; h2o.compute2(); // Do it ahead of normal F/J work p++; // Check again the same queue } } } catch( Throwable ex ) { // If the higher priority job popped an exception, complete it // exceptionally... but then carry on and do the lower priority job. if( h2o != null ) h2o.completeExceptionally(ex); else { ex.printStackTrace(); throw ex; } } finally { t._priority = pp; if( pp == MIN_PRIORITY && set_t_prior ) t.setPriority(Thread.NORM_PRIORITY-1); } // Now run the task as planned if( this instanceof DTask ) icer().compute1(this); else compute2(); } public void compute1() { compute2(); } /** Override to specify actual work to do */ public abstract void compute2(); // In order to prevent deadlock, threads that block waiting for a reply // from a remote node, need the remote task to run at a higher priority // than themselves. This field tracks the required priority. protected final byte priority() { return _priority; } @Override public final T clone(){ try { return (T)super.clone(); } catch( CloneNotSupportedException e ) { throw Log.throwErr(e); } } /** If this is a F/J thread, return it's priority - used to lift the * priority of a blocking remote call, so the remote node runs it at a * higher priority - so we don't deadlock when we burn the local * thread. */ protected static byte currThrPriority() { Thread cThr = Thread.currentThread(); return (byte)((cThr instanceof FJWThr) ? ((FJWThr)cThr)._priority : MIN_PRIORITY); } // The serialization flavor / delegate. Lazily set on first use. private short _ice_id; /** Find the serialization delegate for a subclass of this class */ protected Icer icer() { int id = _ice_id; if(id != 0) { int tyid; if (id != 0) assert id == (tyid = TypeMap.onIce(this)) : "incorrectly cashed id " + id + ", typemap has " + tyid + ", type = " + getClass().getName(); } return TypeMap.getIcer(id!=0 ? id : (_ice_id=(short)TypeMap.onIce(this)),this); } @Override final public AutoBuffer write (AutoBuffer ab) { return icer().write (ab,(T)this); } @Override final public AutoBuffer writeJSON(AutoBuffer ab) { return icer().writeJSON(ab,(T)this); } @Override final public T read (AutoBuffer ab) { return icer().read (ab,(T)this); } @Override final public T readJSON(AutoBuffer ab) { return icer().readJSON(ab,(T)this); } @Override final public int frozenType() { return icer().frozenType(); } } public static abstract class H2OCallback extends H2OCountedCompleter{ public H2OCallback(){} public H2OCallback(H2OCountedCompleter cc){super(cc);} @Override public void compute2(){throw H2O.fail();} @Override public void onCompletion(CountedCompleter caller){callback((T) caller);} public abstract void callback(T t); } public static int H2O_PORT; // Both TCP & UDP cluster ports public static int API_PORT; // RequestServer and the API HTTP port /** * @return String of the form ipaddress:port */ public static String getIpPortString() { return H2O.SELF_ADDRESS.getHostAddress() + ":" + H2O.API_PORT; } // The multicast discovery port public static MulticastSocket CLOUD_MULTICAST_SOCKET; public static NetworkInterface CLOUD_MULTICAST_IF; public static InetAddress CLOUD_MULTICAST_GROUP; public static int CLOUD_MULTICAST_PORT ; /** Myself, as a Node in the Cloud */ public static H2ONode SELF = null; /** IP address of this node used for communication * with other nodes. */ public static InetAddress SELF_ADDRESS; // Place to store temp/swap files public static URI ICE_ROOT; public static String DEFAULT_ICE_ROOT() { String username = System.getProperty("user.name"); if (username == null) username = ""; String u2 = username.replaceAll(" ", "_"); if (u2.length() == 0) u2 = "unknown"; return "/tmp/h2o-" + u2; } // Place to store flows public static String DEFAULT_FLOW_DIR() { String flow_dir = null; try { if (ARGS.ga_hadoop_ver != null) { PersistManager pm = getPM(); if (pm != null) { String s = pm.getHdfsHomeDirectory(); if (pm.exists(s)) { flow_dir = s; } } if (flow_dir != null) { flow_dir = flow_dir + "/h2oflows"; } } else { flow_dir = System.getProperty("user.home") + File.separator + "h2oflows"; } } catch (Exception ignore) { // Never want this to fail, as it will kill program startup. // Returning null is fine if it fails for whatever reason. } return flow_dir; } /* Static list of acceptable Cloud members passed via -flatfile option. * It is updated also when a new client appears. */ public static HashSet STATIC_H2OS = null; // Reverse cloud index to a cloud; limit of 256 old clouds. static private final H2O[] CLOUDS = new H2O[256]; // Enables debug features like more logging and multiple instances per JVM static final String DEBUG_ARG = "h2o.debug"; static final boolean DEBUG = System.getProperty(DEBUG_ARG) != null; // Returned in REST API responses as X-h2o-cluster-id. // // Currently this is unique per node. Might make sense to distribute this // as part of joining the cluster so all nodes have the same value. public static final long CLUSTER_ID = System.currentTimeMillis(); private static JettyHTTPD jetty; public static void setJetty(JettyHTTPD value) { jetty = value; } public static JettyHTTPD getJetty() { return jetty; } /** If logging has not been setup yet, then Log.info will only print to * stdout. This allows for early processing of the '-version' option * without unpacking the jar file and other startup stuff. */ static void printAndLogVersion() { Log.init(ARGS.log_level, ARGS.quiet); Log.info("----- H2O started " + (ARGS.client?"(client)":"") + " -----"); Log.info("Build git branch: " + ABV.branchName()); Log.info("Build git hash: " + ABV.lastCommitHash()); Log.info("Build git describe: " + ABV.describe()); Log.info("Build project version: " + ABV.projectVersion()); Log.info("Built by: '" + ABV.compiledBy() + "'"); Log.info("Built on: '" + ABV.compiledOn() + "'"); for (AbstractH2OExtension e : H2O.getExtensions()) { String n = e.getExtensionName() + " "; AbstractBuildVersion abv = e.getBuildVersion(); Log.info(n + "Build git branch: ", abv.branchName()); Log.info(n + "Build git hash: ", abv.lastCommitHash()); Log.info(n + "Build git describe: ", abv.describe()); Log.info(n + "Build project version: ", abv.projectVersion()); Log.info(n + "Built by: ", abv.compiledBy()); Log.info(n + "Built on: ", abv.compiledOn()); } Runtime runtime = Runtime.getRuntime(); Log.info("Java availableProcessors: " + runtime.availableProcessors()); Log.info("Java heap totalMemory: " + PrettyPrint.bytes(runtime.totalMemory())); Log.info("Java heap maxMemory: " + PrettyPrint.bytes(runtime.maxMemory())); Log.info("Java version: Java "+System.getProperty("java.version")+" (from "+System.getProperty("java.vendor")+")"); List launchStrings = ManagementFactory.getRuntimeMXBean().getInputArguments(); Log.info("JVM launch parameters: "+launchStrings); Log.info("OS version: "+System.getProperty("os.name")+" "+System.getProperty("os.version")+" ("+System.getProperty("os.arch")+")"); long totalMemory = OSUtils.getTotalPhysicalMemory(); Log.info ("Machine physical memory: " + (totalMemory==-1 ? "NA" : PrettyPrint.bytes(totalMemory))); } private static void startGAStartupReport() { new GAStartupReportThread().start(); } /** Initializes the local node and the local cloud with itself as the only member. */ private static void startLocalNode() { PID = -1L; try { String n = ManagementFactory.getRuntimeMXBean().getName(); int i = n.indexOf('@'); if( i != -1 ) PID = Long.parseLong(n.substring(0, i)); } catch( Throwable ignore ) { } // Figure self out; this is surprisingly hard NetworkInit.initializeNetworkSockets(); // Do not forget to put SELF into the static configuration (to simulate // proper multicast behavior) if( !ARGS.client && STATIC_H2OS != null && !STATIC_H2OS.contains(SELF)) { Log.warn("Flatfile configuration does not include self: " + SELF+ " but contains " + STATIC_H2OS); STATIC_H2OS.add(SELF); } Log.info ("H2O cloud name: '" + ARGS.name + "' on " + SELF+ (ARGS.flatfile==null ? (", discovery address "+CLOUD_MULTICAST_GROUP+":"+CLOUD_MULTICAST_PORT) : ", static configuration based on -flatfile "+ARGS.flatfile)); Log.info("If you have trouble connecting, try SSH tunneling from your local machine (e.g., via port 55555):\n" + " 1. Open a terminal and run 'ssh -L 55555:localhost:" + API_PORT + " " + System.getProperty("user.name") + "@" + SELF_ADDRESS.getHostAddress() + "'\n" + " 2. Point your browser to " + jetty.getScheme() + "://localhost:55555"); // Create the starter Cloud with 1 member SELF._heartbeat._jar_md5 = JarHash.JARHASH; SELF._heartbeat._client = ARGS.client; } /** Starts the worker threads, receiver threads, heartbeats and all other * network related services. */ private static void startNetworkServices() { // We've rebooted the JVM recently. Tell other Nodes they can ignore task // prior tasks by us. Do this before we receive any packets UDPRebooted.T.reboot.broadcast(); // Start the UDPReceiverThread, to listen for requests from other Cloud // Nodes. There should be only 1 of these, and it never shuts down. // Started first, so we can start parsing UDP packets if(H2O.ARGS.useUDP) { new UDPReceiverThread().start(); // Start a UDP timeout worker thread. This guy only handles requests for // which we have not received a timely response and probably need to // arrange for a re-send to cover a dropped UDP packet. new UDPTimeOutThread().start(); // Same same for a dropped ACK needing an ACKACK back. new H2ONode.AckAckTimeOutThread().start(); } // Start the MultiReceiverThread, to listen for multi-cast requests from // other Cloud Nodes. There should be only 1 of these, and it never shuts // down. Started soon, so we can start parsing multi-cast UDP packets new MultiReceiverThread().start(); // Start the Persistent meta-data cleaner thread, which updates the K/V // mappings periodically to disk. There should be only 1 of these, and it // never shuts down. Needs to start BEFORE the HeartBeatThread to build // an initial histogram state. Cleaner.THE_CLEANER.start(); // Start the TCPReceiverThread, to listen for TCP requests from other Cloud // Nodes. There should be only 1 of these, and it never shuts down. new TCPReceiverThread(NetworkInit._tcpSocket).start(); // Register the default Requests Object x = water.api.RequestServer.class; } // Callbacks to add new Requests & menu items static private volatile boolean _doneRequests; static public void registerGET( String url_pattern, Class hclass, String hmeth, String summary ) { registerGET(url_pattern, hclass, hmeth, null, summary); } static public void registerGET( String url_pattern, Class hclass, String hmeth, String doc_method, String summary ) { if( _doneRequests ) throw new IllegalArgumentException("Cannot add more Requests once the list is finalized"); RequestServer.register(url_pattern,"GET", hclass, hmeth, doc_method, summary); } static public void registerPOST( String url_pattern, Class hclass, String hmeth, String summary ) { if( _doneRequests ) throw new IllegalArgumentException("Cannot add more Requests once the list is finalized"); RequestServer.register(url_pattern,"POST",hclass,hmeth,null,summary); } public static void registerResourceRoot(File f) { JarHash.registerResourceRoot(f); } /** Start the web service; disallow future URL registration. * Blocks until the server is up. */ static public void finalizeRegistration() { if (_doneRequests) return; _doneRequests = true; water.api.RequestServer.finalizeRegistration(); } // -------------------------------------------------------------------------- // The Current Cloud. A list of all the Nodes in the Cloud. Changes if we // decide to change Clouds via atomic Cloud update. public static volatile H2O CLOUD = new H2O(new H2ONode[0],0,0); // --- // A dense array indexing all Cloud members. Fast reversal from "member#" to // Node. No holes. Cloud size is _members.length. public final H2ONode[] _memary; final int _hash; // A dense integer identifier that rolls over rarely. Rollover limits the // number of simultaneous nested Clouds we are operating on in-parallel. // Really capped to 1 byte, under the assumption we won't have 256 nested // Clouds. Capped at 1 byte so it can be part of an atomically-assigned // 'long' holding info specific to this Cloud. final char _idx; // no unsigned byte, so unsigned char instead // Construct a new H2O Cloud from the member list H2O( H2ONode[] h2os, int hash, int idx ) { _memary = h2os; // Need to clone? java.util.Arrays.sort(_memary); // ... sorted! _hash = hash; // And record hash for cloud rollover _idx = (char)(idx&0x0ff); // Roll-over at 256 } // One-shot atomic setting of the next Cloud, with an empty K/V store. // Called single-threaded from Paxos. Constructs the new H2O Cloud from a // member list. void set_next_Cloud( H2ONode[] h2os, int hash ) { synchronized(this) { int idx = _idx+1; // Unique 1-byte Cloud index if( idx == 256 ) idx=1; // wrap, avoiding zero CLOUDS[idx] = CLOUD = new H2O(h2os,hash,idx); } SELF._heartbeat._cloud_size=(char)CLOUD.size(); } // Is nnn larger than old (counting for wrap around)? Gets confused if we // start seeing a mix of more than 128 unique clouds at the same time. Used // to tell the order of Clouds appearing. static boolean larger( int nnn, int old ) { assert (0 <= nnn && nnn <= 255); assert (0 <= old && old <= 255); return ((nnn-old)&0xFF) < 64; } public final int size() { return _memary.length; } final H2ONode leader() { return _memary[0]; } // Find the node index for this H2ONode, or a negative number on a miss int nidx( H2ONode h2o ) { return java.util.Arrays.binarySearch(_memary,h2o); } boolean contains( H2ONode h2o ) { return nidx(h2o) >= 0; } @Override public String toString() { return java.util.Arrays.toString(_memary); } public H2ONode[] members() { return _memary; } // Cluster free memory public long free_mem() { long memsz = 0; for( H2ONode h2o : CLOUD._memary ) memsz += h2o._heartbeat.get_free_mem(); return memsz; } // Quick health check; no reason given for bad health public boolean healthy() { long now = System.currentTimeMillis(); for( H2ONode h2o : H2O.CLOUD.members() ) if( now - h2o._last_heard_from >= HeartBeatThread.TIMEOUT ) return false; return true; } public static void waitForCloudSize(int x, long ms) { long start = System.currentTimeMillis(); while( System.currentTimeMillis() - start < ms ) { if( CLOUD.size() >= x && Paxos._commonKnowledge ) break; try { Thread.sleep(100); } catch( InterruptedException ignore ) { } } if( H2O.CLOUD.size() < x ) throw new RuntimeException("Cloud size under " + x); } public static int getCloudSize() { if (! Paxos._commonKnowledge) return -1; return CLOUD.size(); } // - Wait for at least HeartBeatThread.SLEEP msecs and // try to join others, if any. Try 2x just in case. // - Assume that we get introduced to everybody else // in one Paxos update, if at all (i.e, rest of // the cloud was already formed and stable by now) // - If nobody else is found, not an error. public static void joinOthers() { long start = System.currentTimeMillis(); while( System.currentTimeMillis() - start < 2000 ) { if( CLOUD.size() > 1 && Paxos._commonKnowledge ) break; try { Thread.sleep(100); } catch( InterruptedException ignore ) { } } } // -------------------------------------------------------------------------- static void initializePersistence() { _PM = new PersistManager(ICE_ROOT); if( ARGS.aws_credentials != null ) { try { water.persist.PersistS3.getClient(); } catch( IllegalArgumentException e ) { Log.err(e); } } } // -------------------------------------------------------------------------- // The (local) set of Key/Value mappings. public static final NonBlockingHashMap STORE = new NonBlockingHashMap<>(); // PutIfMatch // - Atomically update the STORE, returning the old Value on success // - Kick the persistence engine as needed // - Return existing Value on fail, no change. // // Keys are interned here: I always keep the existing Key, if any. The // existing Key is blind jammed into the Value prior to atomically inserting // it into the STORE and interning. // // Because of the blind jam, there is a narrow unusual race where the Key // might exist but be stale (deleted, mapped to a TOMBSTONE), a fresh put() // can find it and jam it into the Value, then the Key can be deleted // completely (e.g. via an invalidate), the table can resize flushing the // stale Key, an unrelated weak-put can re-insert a matching Key (but as a // new Java object), and delete it, and then the original thread can do a // successful put_if_later over the missing Key and blow the invariant that a // stored Value always points to the physically equal Key that maps to it // from the STORE. If this happens, some of replication management bits in // the Key will be set in the wrong Key copy... leading to extra rounds of // replication. public static Value putIfMatch( Key key, Value val, Value old ) { if( old != null ) // Have an old value? key = old._key; // Use prior key if( val != null ) { assert val._key.equals(key); if( val._key != key ) val._key = key; // Attempt to uniquify keys } // Insert into the K/V store Value res = STORE.putIfMatchUnlocked(key,val,old); if( res != old ) return res; // Return the failure cause // Persistence-tickle. // If the K/V mapping is going away, remove the old guy. // If the K/V mapping is changing, let the store cleaner just overwrite. // If the K/V mapping is new, let the store cleaner just create if( old != null && val == null ) old.removePersist(); // Remove the old guy if( val != null ) { Cleaner.dirty_store(); // Start storing the new guy if( old==null ) Scope.track_internal(key); // New Key - start tracking } return old; // Return success } // Get the value from the store public static void raw_remove(Key key) { Value v = STORE.remove(key); if( v != null ) v.removePersist(); } public static void raw_clear() { STORE.clear(); } public static boolean containsKey( Key key ) { return STORE.get(key) != null; } static Key getk( Key key ) { return STORE.getk(key); } public static Set localKeySet( ) { return STORE.keySet(); } static Collection values( ) { return STORE.values(); } static public int store_size() { return STORE.size(); } // Nice local-STORE only debugging summary public static String STOREtoString() { int[] cnts = new int[1]; Object[] kvs = H2O.STORE.raw_array(); // Start the walk at slot 2, because slots 0,1 hold meta-data for( int i=2; i= cnts.length ) cnts = Arrays.copyOf(cnts,cnts.length<<1); cnts[t]++; } StringBuilder sb = new StringBuilder(); for( int t=0; t { public GCTask() {super(GUI_PRIORITY);} @Override public void compute2() { Log.info("Calling System.gc() now..."); System.gc(); Log.info("System.gc() finished"); tryComplete(); } } for (H2ONode node : H2O.CLOUD._memary) { GCTask t = new GCTask(); new RPC<>(node, t).call().get(); } } // -------------------------------------------------------------------------- public static void main( String[] args ) { // Record system start-time. if( !START_TIME_MILLIS.compareAndSet(0L, System.currentTimeMillis()) ) return; // Already started // Copy all ai.h2o.* system properties to the tail of the command line, // effectively overwriting the earlier args. ArrayList args2 = new ArrayList<>(Arrays.asList(args)); for( Object p : System.getProperties().keySet() ) { String s = (String)p; if( s.startsWith("ai.h2o.") ) { args2.add("-" + s.substring(7)); // hack: Junits expect properties, throw out dummy prop for ga_opt_out if (!s.substring(7).equals("ga_opt_out")) args2.add(System.getProperty(s)); } } // Parse args parseArguments(args2.toArray(args)); // Get ice path before loading Log or Persist class String ice = DEFAULT_ICE_ROOT(); if( ARGS.ice_root != null ) ice = ARGS.ice_root.replace("\\", "/"); try { ICE_ROOT = new URI(ice); } catch(URISyntaxException ex) { throw new RuntimeException("Invalid ice_root: " + ice + ", " + ex.getMessage()); } // Always print version, whether asked-for or not! printAndLogVersion(); if( ARGS.version ) { Log.flushStdout(); exit(0); } // Print help & exit if( ARGS.help ) { printHelp(); exit(0); } // Validate arguments validateArguments(); Log.info("X-h2o-cluster-id: " + H2O.CLUSTER_ID); Log.info("User name: '" + H2O.ARGS.user_name + "'"); // Register with GA or not List gaidList = JarHash.getResourcesList("gaid"); if((new File(".h2o_no_collect")).exists() || (new File(System.getProperty("user.home")+File.separator+".h2o_no_collect")).exists() || ARGS.ga_opt_out || gaidList.contains("CRAN") || H2O.ABV.projectVersion().split("\\.")[3].equals("99999")) { // dev build has minor version 99999 GA = null; Log.info("Opted out of sending usage metrics."); } else { try { GA = new GoogleAnalytics("UA-56665317-1", "H2O", ABV.projectVersion()); DefaultRequest defReq = GA.getDefaultRequest(); String gaid = null; if (gaidList.size() > 0) { if (gaidList.size() > 1) Log.debug("More than once resource seen in gaid dir."); for (String str : gaidList) { if (str.matches("........-....-....-....-............") && !str.equals("XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX")) { gaid = str; break; } } } if (gaid == null) { // No UUID, create one gaid = defReq.clientId(); gaid = gaid.replaceFirst("........-","ANONYMOU-"); } defReq.customDimension(CLIENT_ID_GA_CUST_DIM, gaid); GA.setDefaultRequest(defReq); } catch(Throwable t) { Log.POST(11, t.toString()); StackTraceElement[] stes = t.getStackTrace(); for(int i =0; i < stes.length; i++) Log.POST(11, stes[i].toString()); } } // Epic Hunt for the correct self InetAddress NetworkInit.findInetAddressForSelf(); // Start the local node. Needed before starting logging. startLocalNode(); try { String logDir = Log.getLogDir(); Log.info("Log dir: '" + logDir + "'"); } catch (Exception e) { Log.info("Log dir: (Log4j configuration inherited)"); } Log.info("Cur dir: '" + System.getProperty("user.dir") + "'"); //Print extra debug info now that logs are setup RuntimeMXBean rtBean = ManagementFactory.getRuntimeMXBean(); Log.debug("H2O launch parameters: "+ARGS.toString()); Log.debug("Boot class path: "+ rtBean.getBootClassPath()); Log.debug("Java class path: "+ rtBean.getClassPath()); Log.debug("Java library path: "+ rtBean.getLibraryPath()); // Load up from disk and initialize the persistence layer initializePersistence(); // Initialize NPS { String flow_dir; if (ARGS.flow_dir != null) { flow_dir = ARGS.flow_dir; } else { flow_dir = DEFAULT_FLOW_DIR(); } if (flow_dir != null) { flow_dir = flow_dir.replace("\\", "/"); Log.info("Flow dir: '" + flow_dir + "'"); } else { Log.info("Flow dir is undefined; saving flows not available"); } NPS = new NodePersistentStorage(flow_dir); } // Start network services, including heartbeats startNetworkServices(); // start server services Log.trace("Network services started"); // The "Cloud of size N formed" message printed out by doHeartbeat is the trigger // for users of H2O to know that it's OK to start sending REST API requests. Paxos.doHeartbeat(SELF); assert SELF._heartbeat._cloud_hash != 0 || ARGS.client; // Start the heartbeat thread, to publish the Clouds' existence to other // Clouds. This will typically trigger a round of Paxos voting so we can // join an existing Cloud. new HeartBeatThread().start(); if (GA != null) startGAStartupReport(); } // Die horribly public static void die(String s) { Log.fatal(s); H2O.shutdown(-1); } public static class GAStartupReportThread extends Thread { final private int sleepMillis = 150 * 1000; //2.5 min // Constructor. public GAStartupReportThread() { super("GAStartupReport"); // Only 9 characters get printed in the log. setDaemon(true); setPriority(MAX_PRIORITY - 2); } // Class main thread. @Override public void run() { try { Thread.sleep (sleepMillis); } catch (Exception ignore) {}; GAUtils.logStartup(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy