All Downloads are FREE. Search and download functionalities are using the official Maven repository.

water.TestUtil Maven / Gradle / Ivy

The newest version!
package water;

import hex.CreateFrame;
import hex.Model;
import hex.SplitFrame;
import hex.genmodel.*;
import hex.genmodel.easy.RowData;
import org.junit.AfterClass;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.rules.TestRule;
import org.junit.runner.Description;
import org.junit.runners.model.Statement;
import water.api.StreamingSchema;
import water.fvec.*;
import water.init.NetworkInit;
import water.junit.Priority;
import water.junit.rules.RulesPriorities;
import water.parser.BufferedString;
import water.parser.DefaultParserProviders;
import water.parser.ParseDataset;
import water.parser.ParseSetup;
import water.util.*;
import water.util.Timer;
import water.util.fp.Function;

import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URL;
import java.net.URLConnection;
import java.util.*;

import static org.junit.Assert.*;

@Ignore("Support for tests, but no actual tests here")
public class TestUtil extends Iced {
  { // we need assertions to be checked at least when tests are running
    ClassLoader loader = getClass().getClassLoader();
    loader.setDefaultAssertionStatus(true);
  }

  public final static boolean JACOCO_ENABLED = Boolean.parseBoolean(System.getProperty("test.jacocoEnabled", "false"));
  private static boolean _stall_called_before = false;
  private static String[] ignoreTestsNames;
  private static String[] doonlyTestsNames;
  protected static int _initial_keycnt = 0;
  /**
   * Minimal cloud size to start test.
   */
  public static int MINCLOUDSIZE = Integer.parseInt(System.getProperty("cloudSize", "1"));
  /**
   * Default time in ms to wait for clouding
   */
  protected static int DEFAULT_TIME_FOR_CLOUDING = 60000 /* ms */;

  public TestUtil() {
    this(1);
  }

  public TestUtil(int minCloudSize) {
    MINCLOUDSIZE = Math.max(MINCLOUDSIZE, minCloudSize);
    String ignoreTests = System.getProperty("ignore.tests");
    if (ignoreTests != null) {
      ignoreTestsNames = ignoreTests.split(",");
      if (ignoreTestsNames.length == 1 && ignoreTestsNames[0].equals("")) {
        ignoreTestsNames = null;
      }
    }
    String doonlyTests = System.getProperty("doonly.tests");
    if (doonlyTests != null) {
      doonlyTestsNames = doonlyTests.split(",");
      if (doonlyTestsNames.length == 1 && doonlyTestsNames[0].equals("")) {
        doonlyTestsNames = null;
      }
    }
  }

  // ==== Test Setup & Teardown Utilities ====
  // Stall test until we see at least X members of the Cloud
  protected static int getDefaultTimeForClouding() {
    return JACOCO_ENABLED
            ? DEFAULT_TIME_FOR_CLOUDING * 10
            : DEFAULT_TIME_FOR_CLOUDING;
  }

  public static void stall_till_cloudsize(int x) {
    stall_till_cloudsize(x, getDefaultTimeForClouding());
  }

  /**
   * Take a double array and return it as a single array.  It will take each row on top of each other
   *
   * @param arr
   * @return
   */
  public static double[] changeDouble2SingleArray(double[][] arr) {
    double[] result = new double[arr.length * arr[0].length];
    int numRows = arr.length;
    int offset = 0;
    for (int rind = 0; rind < numRows; rind++) {
      int rowLength = arr[rind].length;
      System.arraycopy(arr[rind], 0, result, offset, rowLength);
      offset += rowLength;
    }
    return result;
  }

  public static void stall_till_cloudsize(int x, int timeout) {
    stall_till_cloudsize(new String[]{}, x, timeout);
  }

  public static void stall_till_cloudsize(String[] args, int x) {
    stall_till_cloudsize(args, x, getDefaultTimeForClouding());
  }

  public static void stall_till_cloudsize(String[] args, int x, int timeout) {
    x = Math.max(MINCLOUDSIZE, x);
    if (!_stall_called_before) {
      H2O.main(args);
      H2O.registerResourceRoot(new File(System.getProperty("user.dir") + File.separator + "h2o-web/src/main/resources/www"));
      H2O.registerResourceRoot(new File(System.getProperty("user.dir") + File.separator + "h2o-core/src/main/resources/www"));
      ExtensionManager.getInstance().registerRestApiExtensions();
      _stall_called_before = true;
    }
    H2O.waitForCloudSize(x, timeout);
    _initial_keycnt = H2O.store_size();
    // Finalize registration of REST API to enable tests which are touching Schemas.
    H2O.startServingRestApi();
  }

  @AfterClass
  public static void checkLeakedKeys() {
    int leaked_keys = H2O.store_size() - _initial_keycnt;
    int cnt = 0;
    if (leaked_keys > 0) {
      int print_max = 10;
      for (Key k : H2O.localKeySet()) {
        Value value = Value.STORE_get(k);
        // Ok to leak VectorGroups and the Jobs list
        if (value == null || value.isVecGroup() || value.isESPCGroup() || k == Job.LIST ||
                // Also leave around all attempted Jobs for the Jobs list
                (value.isJob() && value.get().isStopped())) {
          leaked_keys--;
        } else {
          System.out.println(k + " -> " + (value.type() != TypeMap.PRIM_B ? value.get() : "byte[]"));
          if (cnt++ < print_max)
            System.err.println("Leaked key: " + k + " = " + TypeMap.className(value.type()));
        }
      }
      if (print_max < leaked_keys) System.err.println("... and " + (leaked_keys - print_max) + " more leaked keys");
    }
    assertTrue("Keys leaked: " + leaked_keys + ", cnt = " + cnt, leaked_keys <= 0 || cnt == 0);
    // Bulk brainless key removal.  Completely wipes all Keys without regard.
    new DKVCleaner().doAllNodes();
    _initial_keycnt = H2O.store_size();
  }

  private static class KeyCleaner extends MRTask {
    private final Class[] objectType;

    private KeyCleaner(Class[] objectType) {
      this.objectType = objectType;
    }

    @Override
    protected void setupLocal() {
      Futures fs = new Futures();
      for (Key k : H2O.localKeySet()) {
        Value value = Value.STORE_get(k);
        if (value == null || value.isVecGroup() || value.isESPCGroup() || k == Job.LIST ||
                value.isJob() || value.type() == TypeMap.PRIM_B
        ) {
          // do nothing
        } else {
          for (Class c : objectType) {
            if (c.isInstance(value.get())) {
              DKV.remove(k, fs);
              break;
            }
          }
        }
      }
      fs.blockForPending();
    }
  }

  public static void cleanupKeys(Class... objectType) {
    new KeyCleaner(objectType).doAllNodes();
  }

  public static void checkArrays(double[] expected, double[] actual, double threshold) {
    for (int i = 0; i < actual.length; i++) {
      if (!Double.isNaN(expected[i]) && !Double.isNaN(actual[i])) // only compare when both are not NaN
        assertEquals(expected[i], actual[i], threshold * Math.min(Math.abs(expected[i]), Math.abs(actual[i])));
    }
  }

  public static void checkDoubleArrays(double[][] expected, double[][] actual, double threshold) {
    int len1 = expected.length;
    assertEquals(len1, actual.length);

    for (int ind = 0; ind < len1; ind++) {
      assertEquals(expected[ind].length, actual[ind].length);
      checkArrays(expected[ind], actual[ind], threshold);
    }
  }
  
  public static void checkIntArrays(int[][] expected, int[][] actual) {
    int len1 = expected.length;
    assertEquals(len1, actual.length);

    for (int ind = 0; ind < len1; ind++) {
      assertEquals(expected[ind].length, actual[ind].length);
      Arrays.equals(expected[ind], actual[ind]);
    }
  }
  

  /**
   * @deprecated use {@link #generateEnumOnly(int, int, int, double)} instead
   * 

* Will be removed at version 3.38.0.1 */ @Deprecated protected static Frame generate_enum_only(int numCols, int numRows, int num_factor, double missingfrac) { return generateEnumOnly(numCols, numRows, num_factor, missingfrac); } /** * generate random frames containing enum columns only * * @param numCols * @param numRows * @param num_factor * @return */ protected static Frame generateEnumOnly(int numCols, int numRows, int num_factor, double missingfrac) { long seed = System.currentTimeMillis(); System.out.println("Createframe parameters: rows: " + numRows + " cols:" + numCols + " seed: " + seed); return generateEnumOnly(numCols, numRows, num_factor, missingfrac, seed); } /** * @deprecated use {@link #generateEnumOnly(int, int, int, double, long)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected static Frame generate_enum_only(int numCols, int numRows, int num_factor, double missingfrac, long seed) { return generateEnumOnly(numCols, numRows, num_factor, missingfrac, seed); } public static Frame generateEnumOnly(int numCols, int numRows, int num_factor, double missingfrac, long seed) { CreateFrame cf = new CreateFrame(); cf.rows = numRows; cf.cols = numCols; cf.factors = num_factor; cf.binary_fraction = 0; cf.integer_fraction = 0; cf.categorical_fraction = 1; cf.has_response = false; cf.missing_fraction = missingfrac; cf.seed = seed; System.out.println("Createframe parameters: rows: " + numRows + " cols:" + numCols + " seed: " + cf.seed); return cf.execImpl().get(); } /** * @deprecated use {@link #generateRealOnly(int, int, double)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected static Frame generate_real_only(int numCols, int numRows, double missingfrac) { return generateRealOnly(numCols, numRows, missingfrac); } protected static Frame generateRealOnly(int numCols, int numRows, double missingfrac) { long seed = System.currentTimeMillis(); System.out.println("Createframe parameters: rows: " + numRows + " cols:" + numCols + " seed: " + seed); return generateRealOnly(numCols, numRows, missingfrac, seed); } /** * @deprecated use {@link #generateRealOnly(int, int, double, long)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected static Frame generate_real_only(int numCols, int numRows, double missingfrac, long seed) { return generateRealOnly(numCols, numRows, missingfrac, seed); } protected static Frame generateRealOnly(int numCols, int numRows, double missingfrac, long seed) { return generateRealWithRangeOnly(numCols, numRows, missingfrac, seed, 100); } protected static Frame generateRealWithRangeOnly(int numCols, int numRows, double missingfrac, long seed, long range) { CreateFrame cf = new CreateFrame(); cf.rows = numRows; cf.cols = numCols; cf.binary_fraction = 0; cf.integer_fraction = 0; cf.categorical_fraction = 0; cf.time_fraction = 0; cf.string_fraction = 0; cf.has_response = false; cf.missing_fraction = missingfrac; cf.real_range = range; cf.seed = seed; System.out.println("Createframe parameters: rows: " + numRows + " cols:" + numCols + " seed: " + cf.seed + " range: "+range); return cf.execImpl().get(); } /** * @deprecated use {@link #generateIntOnly(int, int, int, double)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected static Frame generate_int_only(int numCols, int numRows, int integer_range, double missingfrac) { return generateIntOnly(numCols, numRows, integer_range, missingfrac); } protected static Frame generateIntOnly(int numCols, int numRows, int integer_range, double missingfrac) { long seed = System.currentTimeMillis(); System.out.println("Createframe parameters: rows: " + numRows + " cols:" + numCols + " seed: " + seed); return generateIntOnly(numCols, numRows, integer_range, missingfrac, seed); } /** * @deprecated use {@link #generateIntOnly(int, int, int, double, long)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected static Frame generate_int_only(int numCols, int numRows, int integer_range, double missingfrac, long seed) { return generateIntOnly(numCols, numRows, integer_range, missingfrac, seed); } protected static Frame generateIntOnly(int numCols, int numRows, int integerRange, double missingfrac, long seed) { CreateFrame cf = new CreateFrame(); cf.rows = numRows; cf.cols = numCols; cf.binary_fraction = 0; cf.integer_fraction = 1; cf.categorical_fraction = 0; cf.time_fraction = 0; cf.string_fraction = 0; cf.has_response = false; cf.missing_fraction = missingfrac; cf.integer_range = integerRange; cf.seed = seed; System.out.println("Createframe parameters: rows: " + numRows + " cols:" + numCols + " seed: " + cf.seed); return cf.execImpl().get(); } protected static int[] rangeFun(int numEle, int offset) { int[] ranges = new int[numEle]; for (int index = 0; index < numEle; index++) { ranges[index] = index + offset; } return ranges; } protected static int[] sortDir(int numEle, Random rand) { int[] sortDir = new int[numEle]; int[] dirs = new int[]{-1, 1}; for (int index = 0; index < numEle; index++) { sortDir[index] = dirs[rand.nextInt(2)]; } return sortDir; } public static class DKVCleaner extends MRTask { @Override public void setupLocal() { H2O.raw_clear(); water.fvec.Vec.ESPC.clear(); } } // current running test - assumes no test parallelism just like the rest of this class public static Description CURRENT_TEST_DESCRIPTION; /** * Execute this rule before each test to print test name and test class */ @Rule transient public TestRule logRule = new TestRule() { @Override public Statement apply(Statement base, Description description) { Log.info("###########################################################"); Log.info(" * Test class name: " + description.getClassName()); Log.info(" * Test method name: " + description.getMethodName()); Log.info("###########################################################"); CURRENT_TEST_DESCRIPTION = description; return base; } }; /* Ignore tests specified in the ignore.tests system property: applied last, if test is ignored, no other rule with be evaluated */ @Rule transient public TestRule runRule = new @Priority(RulesPriorities.RUN_TEST) TestRule() { @Override public Statement apply(Statement base, Description description) { String testName = description.getClassName() + "#" + description.getMethodName(); boolean ignored = false; if (ignoreTestsNames != null && ignoreTestsNames.length > 0) { for (String tn : ignoreTestsNames) { if (testName.startsWith(tn)) { ignored = true; break; } } } if (doonlyTestsNames != null && doonlyTestsNames.length > 0) { ignored = true; for (String tn : doonlyTestsNames) { if (testName.startsWith(tn)) { ignored = false; break; } } } if (ignored) { // Ignored tests trump do-only tests Log.info("#### TEST " + testName + " IGNORED"); return new Statement() { @Override public void evaluate() throws Throwable { } }; } else { return base; } } }; @Rule transient public TestRule timerRule = new TestRule() { @Override public Statement apply(Statement base, Description description) { return new TimerStatement(base, description.getClassName() + "#" + description.getMethodName()); } class TimerStatement extends Statement { private final Statement _base; private final String _tname; public TimerStatement(Statement base, String tname) { _base = base; _tname = tname; } @Override public void evaluate() throws Throwable { Timer t = new Timer(); try { _base.evaluate(); } finally { Log.info("#### TEST " + _tname + " EXECUTION TIME: " + t); } } } }; // ==== Data Frame Creation Utilities ==== /** * Compare 2 frames * * @param fr1 Frame * @param fr2 Frame * @param epsilon Relative tolerance for floating point numbers */ public static void assertIdenticalUpToRelTolerance(Frame fr1, Frame fr2, double epsilon) { assertIdenticalUpToRelTolerance(fr1, fr2, epsilon, true, ""); } public static void assertIdenticalUpToRelTolerance(Frame fr1, Frame fr2, double epsilon, String messagePrefix) { assertIdenticalUpToRelTolerance(fr1, fr2, epsilon, true, messagePrefix); } public static void assertIdenticalUpToRelTolerance(Frame fr1, Frame fr2, double epsilon, boolean expected) { assertIdenticalUpToRelTolerance(fr1, fr2, epsilon, expected, ""); } public static void assertIdenticalUpToRelTolerance(Frame fr1, Frame fr2, double epsilon, boolean expected, String messagePrefix) { if (fr1 == fr2) return; if (expected) { assertEquals("Number of columns differ.", fr1.numCols(), fr2.numCols()); assertEquals("Number of rows differ.", fr1.numRows(), fr2.numRows()); } else if (fr1.numCols() != fr2.numCols() || fr1.numRows() != fr2.numRows()) { return; } Scope.enter(); if (!fr1.isCompatible(fr2)) fr1.makeCompatible(fr2); Cmp1 cmp = new Cmp1(epsilon, messagePrefix).doAll(new Frame(fr1).add(fr2)); Scope.exit(); assertTrue(cmp._message, expected == !cmp._unequal); } /** * Compare 2 frames * * @param fr1 Frame * @param fr2 Frame */ public static void assertBitIdentical(Frame fr1, Frame fr2) { assertIdenticalUpToRelTolerance(fr1, fr2, 0); } static File[] contentsOf(String name, File folder) { try { return FileUtils.contentsOf(folder, name); } catch (IOException ioe) { fail(ioe.getMessage()); return null; } } /** * @deprecated use {@link #parseTestFile(String)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated public static Frame parse_test_file(String fname) { return parseTestFile(fname); } /** * @deprecated use {@link #parseTestFile(String, int[])} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated public static Frame parse_test_file(String fname, int[] skipped_columns) { return parseTestFile(fname, skipped_columns); } /** * Find & parse a CSV file. NPE if file not found. * * @param fname Test filename * @return Frame or NPE */ public static Frame parseTestFile(String fname) { return parseTestFile(Key.make(), fname); } public static Frame parseTestFile(String fname, int[] skipped_columns) { return parseTestFile(Key.make(), fname, skipped_columns); } /** * Find & parse & track in {@link Scope} a CSV file. NPE if file not found. * * @param fname Test filename * @return Frame or NPE */ public static Frame parseAndTrackTestFile(String fname) { return Scope.track(parseTestFile(Key.make(), fname)); } /** * Make sure the given frame is distributed in a way that MRTask reduce operation is called * and spans at least 2 nodes of the cluster (if running on multinode). *

* If a new frame is created - it is automatically tracked in Scope if it is currently active. * * @param frame input frame * @return possibly new Frame rebalanced to a minimum number of chunks */ public static Frame ensureDistributed(Frame frame) { int minChunks = H2O.getCloudSize() * 4; // at least one node will have 4 chunks (MR tree will have at least 2 levels) return ensureDistributed(frame, minChunks); } /** * Make sure the given frame is distributed at least to given minimum number of chunks * and spans at least 2 nodes of the cluster (if running on multinode). *

* If a new frame is created - it is automatically tracked in Scope if it is currently active. * * @param frame input frame * @param minChunks minimum required number of chunks * @return possibly new Frame rebalanced to a minimum number of chunks */ public static Frame ensureDistributed(Frame frame, int minChunks) { if (frame.anyVec().nChunks() < minChunks) { // rebalance first Key k = Key.make(); H2O.submitTask(new RebalanceDataSet(frame, k, minChunks)).join(); frame = trackIfScopeActive(k.get()); } // check frame spans 2+ nodes if (H2O.CLOUD.size() > 1) { Vec v = frame.anyVec(); H2ONode node = null; for (int i = 0; i < v.nChunks(); i++) { H2ONode cNode = v.chunkKey(i).home_node(); if (v.chunkLen(i) == 0) continue; if (node == null) node = cNode; else if (cNode != node) // found proof return frame; } throw new IllegalStateException("Frame is only stored on a sigle node"); } return frame; } static Frame trackIfScopeActive(Frame frame) { if (Scope.isActive()) { // this function can only be called in tests - it is thus safe to auto-track the frame if the test created a Scope Scope.track(frame); } return frame; } public static void assertExists(String fname) { NFSFileVec v = makeNfsFileVec(fname); assertNotNull("File '" + fname + "' was not found", v); v.remove(); } public static NFSFileVec makeNfsFileVec(String fname) { try { File file = FileUtils.locateFile(fname); if ((file == null) && (isCI() || runWithoutLocalFiles())) { long lastModified = downloadTestFileFromS3(fname); if (lastModified != 0 && isCI()) { // in CI fail if the file is missing for more than 30 days if (System.currentTimeMillis() - lastModified > 30 * 24 * 60 * 60 * 1000L) { throw new IllegalStateException( "File '" + fname + "' is still not locally synchronized (more than 30 days). Talk to #devops-requests"); } } } return NFSFileVec.make(fname); } catch (IOException ioe) { Log.err(ioe); fail(ioe.getMessage()); return null; } } private static boolean runWithoutLocalFiles() { return Boolean.parseBoolean(System.getenv("H2O_JUNIT_ALLOW_NO_SMALLDATA")); } private static File getLocalSmalldataFile(final String fname) { String projectDir = System.getenv("H2O_PROJECT_DIR"); return projectDir != null ? new File(projectDir, fname) : new File(fname); } protected static long downloadTestFileFromS3(String fname) throws IOException { if (fname.startsWith("./")) fname = fname.substring(2); final File f = getLocalSmalldataFile(fname); if (!f.exists()) { if (f.getParentFile() != null) { boolean dirsCreated = f.getParentFile().mkdirs(); if (! dirsCreated) { Log.warn("Failed to create directory:" + f.getParentFile()); } } File tmpFile = File.createTempFile(f.getName(), "tmp", f.getParentFile()); final URL source = new URL("https://h2o-public-test-data.s3.amazonaws.com/" + fname); final URLConnection connection = source.openConnection(); connection.setConnectTimeout(1000); connection.setReadTimeout(2000); final long lastModified = connection.getLastModified(); try (final InputStream stream = connection.getInputStream()) { org.apache.commons.io.FileUtils.copyInputStreamToFile(stream, tmpFile); } if (tmpFile.renameTo(f)) { return lastModified; } else { Log.warn("Couldn't download " + fname + " from S3."); } } return 0; } /** * @deprecated use {@link #parseTestFile(Key, String, boolean)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_file(Key outputKey, String fname, boolean guessSetup) { return parseTestFile(outputKey, fname, guessSetup); } protected Frame parseTestFile(Key outputKey, String fname, boolean guessSetup) { return parseTestFile(outputKey, fname, guessSetup, null); } /** * @deprecated use {@link #parseTestFile(Key, String, boolean, int[])} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_file(Key outputKey, String fname, boolean guessSetup, int[] skippedColumns) { return parseTestFile(outputKey, fname, guessSetup, skippedColumns); } protected Frame parseTestFile(Key outputKey, String fname, boolean guessSetup, int[] skippedColumns) { NFSFileVec nfs = makeNfsFileVec(fname); ParseSetup guessParseSetup = ParseSetup.guessSetup(new Key[]{nfs._key}, false, 1); if (skippedColumns != null) { guessParseSetup.setSkippedColumns(skippedColumns); guessParseSetup.setParseColumnIndices(guessParseSetup.getNumberColumns(), skippedColumns); } return ParseDataset.parse(outputKey, new Key[]{nfs._key}, true, ParseSetup.guessSetup(new Key[]{nfs._key}, false, 1)); } /** * @deprecated use {@link #parseTestFile(Key, String)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_file(Key outputKey, String fname) { return parseTestFile(outputKey, fname); } public static Frame parseTestFile(Key outputKey, String fname) { return parseTestFile(outputKey, fname, new int[]{}); } /** * @deprecated use {@link #parseTestFile(Key, String, int[])} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_file(Key outputKey, String fname, int[] skippedColumns) { return parseTestFile(outputKey, fname, skippedColumns); } public static Frame parseTestFile(Key outputKey, String fname, int[] skippedColumns) { return parseTestFile(outputKey, fname, null, skippedColumns); } /** * @deprecated use {@link #parseTestFile(String, ParseSetupTransformer)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_file(String fname, ParseSetupTransformer transformer) { return parseTestFile(fname, transformer); } public static Frame parseTestFile(String fname, ParseSetupTransformer transformer) { return parseTestFile(Key.make(), fname, transformer); } /** * @deprecated use {@link #parseTestFile(String, ParseSetupTransformer, int[])} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_file(String fname, ParseSetupTransformer transformer, int[] skippedColumns) { return parseTestFile(fname, transformer, skippedColumns); } public static Frame parseTestFile(String fname, ParseSetupTransformer transformer, int[] skippedColumns) { return parseTestFile(Key.make(), fname, transformer, skippedColumns); } /** * @deprecated use {@link #parseTestFile(Key, String, ParseSetupTransformer)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_file(Key outputKey, String fname, ParseSetupTransformer transformer) { return parseTestFile(outputKey, fname, transformer); } public static Frame parseTestFile(Key outputKey, String fname, ParseSetupTransformer transformer) { return parseTestFile(outputKey, fname, transformer, null); } /** * @deprecated use {@link #parseTestFile(Key outputKey, String fname, ParseSetupTransformer transformer, int[] skippedColumns)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_file(Key outputKey, String fname, ParseSetupTransformer transformer, int[] skippedColumns) { return parseTestFile(outputKey, fname, transformer, skippedColumns); } public static Frame parseTestFile(Key outputKey, String fname, ParseSetupTransformer transformer, int[] skippedColumns) { NFSFileVec nfs = makeNfsFileVec(fname); ParseSetup guessedSetup = ParseSetup.guessSetup(new Key[]{nfs._key}, false, ParseSetup.GUESS_HEADER); if (skippedColumns != null) { guessedSetup.setSkippedColumns(skippedColumns); guessedSetup.setParseColumnIndices(guessedSetup.getNumberColumns(), skippedColumns); } if (transformer != null) guessedSetup = transformer.transformSetup(guessedSetup); return ParseDataset.parse(outputKey, new Key[]{nfs._key}, true, guessedSetup); } public static Frame parseTestFile(Key outputKey, String fname, ParseSetupTransformer transformer, int[] skippedColumns, int psetup) { NFSFileVec nfs = makeNfsFileVec(fname); ParseSetup guessedSetup = ParseSetup.guessSetup(new Key[]{nfs._key}, false, psetup); if (skippedColumns != null) { guessedSetup.setSkippedColumns(skippedColumns); guessedSetup.setParseColumnIndices(guessedSetup.getNumberColumns(), skippedColumns); } if (transformer != null) guessedSetup = transformer.transformSetup(guessedSetup); return ParseDataset.parse(outputKey, new Key[]{nfs._key}, true, guessedSetup); } /** * @deprecated use {@link #parseTestFile(String fname, String na_string, int check_header, byte[] column_types)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_file(String fname, String na_string, int check_header, byte[] column_types) { return parseTestFile(fname, na_string, check_header, column_types); } public static Frame parseTestFile(String fname, String na_string, int check_header, byte[] column_types) { return parseTestFile(fname, na_string, check_header, column_types, null, null); } /** * @deprecated use {@link #parseTestFile(String fname, String na_string, int check_header, byte[] column_types, ParseSetupTransformer transformer)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_file(String fname, String na_string, int check_header, byte[] column_types, ParseSetupTransformer transformer) { return parseTestFile(fname, na_string, check_header, column_types, transformer); } public static Frame parseTestFile(String fname, String na_string, int check_header, byte[] column_types, ParseSetupTransformer transformer) { return parseTestFile(fname, na_string, check_header, column_types, transformer, null); } /** * @deprecated use {@link #parseTestFile(String fname, String na_string, int check_header, byte[] column_types, ParseSetupTransformer transformer, int[] skippedColumns)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_file(String fname, String na_string, int check_header, byte[] column_types, ParseSetupTransformer transformer, int[] skippedColumns) { return parseTestFile(fname, na_string, check_header, column_types, transformer, skippedColumns); } public static Frame parseTestFile(String fname, String na_string, int check_header, byte[] column_types, ParseSetupTransformer transformer, int[] skippedColumns) { NFSFileVec nfs = makeNfsFileVec(fname); Key[] res = {nfs._key}; // create new parseSetup in order to store our na_string ParseSetup p = ParseSetup.guessSetup(res, new ParseSetup(DefaultParserProviders.GUESS_INFO, (byte) ',', false, check_header, 0, null, null, null, null, null, null, null)); if (skippedColumns != null) { p.setSkippedColumns(skippedColumns); p.setParseColumnIndices(p.getNumberColumns(), skippedColumns); } // add the na_strings into p. if (na_string != null) { int column_number = p.getColumnTypes().length; int na_length = na_string.length() - 1; String[][] na_strings = new String[column_number][na_length + 1]; for (int index = 0; index < column_number; index++) { na_strings[index][na_length] = na_string; } p.setNAStrings(na_strings); } if (column_types != null) p.setColumnTypes(column_types); if (transformer != null) p = transformer.transformSetup(p); return ParseDataset.parse(Key.make(), res, true, p); } public static Frame parseTestFile(String fname, String na_string, int check_header, byte[] column_types, ParseSetupTransformer transformer, int[] skippedColumns, boolean force_col_types) { NFSFileVec nfs = makeNfsFileVec(fname); Key[] res = {nfs._key}; // create new parseSetup in order to store our na_string ParseSetup p = ParseSetup.guessSetup(res, new ParseSetup(DefaultParserProviders.GUESS_INFO, (byte) ',', false, check_header, 0, null, null, null, null, null, null, null)); if (skippedColumns != null) { p.setSkippedColumns(skippedColumns); p.setParseColumnIndices(p.getNumberColumns(), skippedColumns); } if (force_col_types) // only useful for parquet parsers here p.setForceColTypes(true); // add the na_strings into p. if (na_string != null) { int column_number = p.getColumnTypes().length; int na_length = na_string.length() - 1; String[][] na_strings = new String[column_number][na_length + 1]; for (int index = 0; index < column_number; index++) { na_strings[index][na_length] = na_string; } p.setNAStrings(na_strings); } if (column_types != null) p.setColumnTypes(column_types); if (transformer != null) p = transformer.transformSetup(p); return ParseDataset.parse(Key.make(), res, true, p); } /** * @deprecated use {@link #parseTestFolder(String)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_folder(String fname) { return parseTestFolder(fname); } /** * Find & parse a folder of CSV files. NPE if file not found. * * @param fname Test filename * @return Frame or NPE */ protected Frame parseTestFolder(String fname) { return parseTestFolder(fname, null); } /** * @deprecated use {@link #parseTestFolder(String, int[])} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_folder(String fname, int[] skippedColumns) { return parseTestFolder(fname, skippedColumns); } /** * Find & parse a folder of CSV files. NPE if file not found. * * @param fname Test filename * @return Frame or NPE */ protected Frame parseTestFolder(String fname, int[] skippedColumns) { File folder = FileUtils.locateFile(fname); File[] files = contentsOf(fname, folder); Arrays.sort(files); ArrayList keys = new ArrayList<>(); for (File f : files) if (f.isFile()) keys.add(NFSFileVec.make(f)._key); Key[] res = new Key[keys.size()]; keys.toArray(res); return ParseDataset.parse(skippedColumns, Key.make(), res); } /** * @deprecated use {@link #parseTestFolder(String, String, int, byte[], ParseSetupTransformer)} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_folder(String fname, String na_string, int check_header, byte[] column_types, ParseSetupTransformer transformer) { return parseTestFolder(fname, na_string, check_header, column_types, transformer); } /** * Parse a folder with csv files when a single na_string is specified. * * @param fname name of folder * @param na_string string for NA in a column * @return */ protected static Frame parseTestFolder(String fname, String na_string, int check_header, byte[] column_types, ParseSetupTransformer transformer) { return parseTestFolder(fname, na_string, check_header, column_types, transformer, null); } /** * @deprecated use {@link #parseTestFolder(String, String, int, byte[], ParseSetupTransformer, int[])} instead *

* Will be removed at version 3.38.0.1 */ @Deprecated protected Frame parse_test_folder(String fname, String na_string, int check_header, byte[] column_types, ParseSetupTransformer transformer, int[] skipped_columns) { return parseTestFolder(fname, na_string, check_header, column_types, transformer, skipped_columns); } /** * Parse a folder with csv files when a single na_string is specified. * * @param fname name of folder * @param na_string string for NA in a column * @return */ protected static Frame parseTestFolder(String fname, String na_string, int check_header, byte[] column_types, ParseSetupTransformer transformer, int[] skipped_columns) { File folder = FileUtils.locateFile(fname); File[] files = contentsOf(fname, folder); Arrays.sort(files); ArrayList keys = new ArrayList<>(); for (File f : files) if (f.isFile()) keys.add(NFSFileVec.make(f)._key); Key[] res = new Key[keys.size()]; keys.toArray(res); // generated the necessary key here // create new parseSetup in order to store our na_string ParseSetup p = ParseSetup.guessSetup(res, new ParseSetup(DefaultParserProviders.GUESS_INFO, (byte) ',', true, check_header, 0, null, null, null, null, null, null, null)); if (skipped_columns != null) { p.setSkippedColumns(skipped_columns); p.setParseColumnIndices(p.getNumberColumns(), skipped_columns); } // add the na_strings into p. if (na_string != null) { int column_number = p.getColumnTypes().length; int na_length = na_string.length() - 1; String[][] na_strings = new String[column_number][na_length + 1]; for (int index = 0; index < column_number; index++) { na_strings[index][na_length] = na_string; } p.setNAStrings(na_strings); } if (column_types != null) p.setColumnTypes(column_types); if (transformer != null) p = transformer.transformSetup(p); return ParseDataset.parse(Key.make(), res, true, p); } public static class Frames { public final Frame train; public final Frame test; public final Frame valid; public Frames(Frame train, Frame test, Frame valid) { this.train = train; this.test = test; this.valid = valid; } } public static Frames split(Frame f) { return split(f, 0.9, 0d); } public static Frames split(Frame f, double testFraction) { return split(f, testFraction, 0); } public static Frames split(Frame f, double testFraction, double validFraction) { double[] fractions; double trainFraction = 1d - testFraction - validFraction; if (validFraction > 0d) { fractions = new double[]{trainFraction, testFraction, validFraction}; } else { fractions = new double[]{trainFraction, testFraction}; } SplitFrame sf = new SplitFrame(f, fractions, null); sf.exec().get(); Key[] splitKeys = sf._destination_frames; Frame trainFrame = Scope.track(splitKeys[0].get()); Frame testFrame = Scope.track(splitKeys[1].get()); Frame validFrame = (validFraction > 0d) ? Scope.track(splitKeys[2].get()) : null; return new Frames(trainFrame, testFrame, validFrame); } /** * A Numeric Vec from an array of ints * * @param rows Data * @return The Vec */ public static Vec vec(int... rows) { return vec(null, rows); } /** * A Categorical/Factor Vec from an array of ints - with categorical/domain mapping * * @param domain Categorical/Factor names, mapped by the data values * @param rows Data * @return The Vec */ public static Vec vec(String[] domain, int... rows) { Key k = Vec.VectorGroup.VG_LEN1.addVec(); Futures fs = new Futures(); AppendableVec avec = new AppendableVec(k, Vec.T_NUM); avec.setDomain(domain); NewChunk chunk = new NewChunk(avec, 0); for (int r : rows) chunk.addNum(r); chunk.close(0, fs); Vec vec = avec.layout_and_close(fs); fs.blockForPending(); return vec; } /** * A numeric Vec from an array of ints */ public static Vec ivec(int... rows) { return vec(null, rows); } /** * A categorical Vec from an array of strings */ public static Vec cvec(String... rows) { return cvec(null, rows); } public static Vec cvec(String[] domain, String... rows) { HashMap domainMap = new HashMap<>(10); ArrayList domainList = new ArrayList<>(10); if (domain != null) { int j = 0; for (String s : domain) { domainMap.put(s, j++); domainList.add(s); } } int[] irows = new int[rows.length]; for (int i = 0, j = 0; i < rows.length; i++) { String s = rows[i]; if (!domainMap.containsKey(s)) { domainMap.put(s, j++); domainList.add(s); } irows[i] = domainMap.get(s); } return vec(domainList.toArray(new String[]{}), irows); } /** * A numeric Vec from an array of doubles */ public static Vec dvec(double... rows) { Key k = Vec.VectorGroup.VG_LEN1.addVec(); Futures fs = new Futures(); AppendableVec avec = new AppendableVec(k, Vec.T_NUM); NewChunk chunk = new NewChunk(avec, 0); for (double r : rows) chunk.addNum(r); chunk.close(0, fs); Vec vec = avec.layout_and_close(fs); fs.blockForPending(); return vec; } /** * A time Vec from an array of ints */ public static Vec tvec(int... rows) { Key k = Vec.VectorGroup.VG_LEN1.addVec(); Futures fs = new Futures(); AppendableVec avec = new AppendableVec(k, Vec.T_TIME); NewChunk chunk = new NewChunk(avec, 0); for (int r : rows) chunk.addNum(r); chunk.close(0, fs); Vec vec = avec.layout_and_close(fs); fs.blockForPending(); return vec; } /** * A string Vec from an array of strings */ public static Vec svec(String... rows) { Key k = Vec.VectorGroup.VG_LEN1.addVec(); Futures fs = new Futures(); AppendableVec avec = new AppendableVec(k, Vec.T_STR); NewChunk chunk = new NewChunk(avec, 0); for (String r : rows) chunk.addStr(r); chunk.close(0, fs); Vec vec = avec.layout_and_close(fs); fs.blockForPending(); return vec; } /** * A string Vec from an array of strings */ public static Vec uvec(UUID... rows) { Key k = Vec.VectorGroup.VG_LEN1.addVec(); Futures fs = new Futures(); AppendableVec avec = new AppendableVec(k, Vec.T_UUID); NewChunk chunk = new NewChunk(avec, 0); for (UUID r : rows) chunk.addUUID(r); chunk.close(0, fs); Vec vec = avec.layout_and_close(fs); fs.blockForPending(); return vec; } // Shortcuts for initializing constant arrays public static String[] ar(String... a) { return a; } public static String[][] ar(String[]... a) { return a; } public static byte[] ar(byte... a) { return a; } public static long[] ar(long... a) { return a; } public static long[][] ar(long[]... a) { return a; } public static int[] ari(int... a) { return a; } public static int[][] ar(int[]... a) { return a; } public static float[] arf(float... a) { return a; } public static double[] ard(double... a) { return a; } public static double[][] ard(double[]... a) { return a; } public static double[][] ear(double... a) { double[][] r = new double[a.length][1]; for (int i = 0; i < a.length; i++) r[i][0] = a[i]; return r; } // Java7+ @SafeVarargs public static T[] aro(T... a) { return a; } // ==== Comparing Results ==== public static void assertFrameEquals(Frame expected, Frame actual, double absDelta) { assertFrameEquals(expected, actual, absDelta, null); } public static void assertFrameEquals(Frame expected, Frame actual, Double absDelta, Double relativeDelta) { assertEquals("Frames have different number of vecs. ", expected.vecs().length, actual.vecs().length); for (int i = 0; i < expected.vecs().length; i++) { if (expected.vec(i).isString()) assertStringVecEquals(expected.vec(i), actual.vec(i)); else assertVecEquals(i + "/" + expected._names[i] + " ", expected.vec(i), actual.vec(i), absDelta, relativeDelta); } } public static void assertVecEquals(Vec expecteds, Vec actuals, double delta) { assertVecEquals("", expecteds, actuals, delta); } public static void assertVecEquals(Vec expecteds, Vec actuals, double delta, double relativeDelta) { assertVecEquals("", expecteds, actuals, delta, relativeDelta); } public static void assertVecEquals(String messagePrefix, Vec expecteds, Vec actuals, double delta) { assertVecEquals(messagePrefix, expecteds, actuals, delta, null); } public static void assertVecEquals(String messagePrefix, Vec expecteds, Vec actuals, Double absDelta, Double relativeDelta) { assertEquals(expecteds.length(), actuals.length()); for (int i = 0; i < expecteds.length(); i++) { final String message = messagePrefix + i + ": " + expecteds.at(i) + " != " + actuals.at(i) + ", chunkIds = " + expecteds.elem2ChunkIdx(i) + ", " + actuals.elem2ChunkIdx(i) + ", row in chunks = " + (i - expecteds.chunkForRow(i).start()) + ", " + (i - actuals.chunkForRow(i).start()); double expectedVal = expecteds.at(i); double actualVal = actuals.at(i); assertEquals(message, expectedVal, actualVal, computeAssertionDelta(expectedVal, absDelta, relativeDelta)); } } private static double computeAssertionDelta(double expectedVal, Double absDelta, Double relDelta) { if ((absDelta == null || absDelta.isNaN()) && (relDelta == null || relDelta.isNaN())) { throw new IllegalArgumentException("Either absolute or relative delta has to be non-null and non-NaN"); } else if (relDelta == null || relDelta.isNaN()) { return absDelta; } else { double computedRelativeDelta; double deltaBase = Math.abs(expectedVal); if (deltaBase == 0) { computedRelativeDelta = relDelta; } else { computedRelativeDelta = deltaBase * relDelta; } if (absDelta == null || absDelta.isNaN()) { return computedRelativeDelta; } else { // use the bigger delta for the assert return Math.max(computedRelativeDelta, absDelta); } } } public static void assertUUIDVecEquals(Vec expecteds, Vec actuals) { assertEquals(expecteds.length(), actuals.length()); assertEquals("Vec types match", expecteds.get_type_str(), actuals.get_type_str()); for (int i = 0; i < expecteds.length(); i++) { UUID expected = new UUID(expecteds.at16l(i), expecteds.at16h(i)); UUID actual = new UUID(actuals.at16l(i), actuals.at16h(i)); final String message = i + ": " + expected + " != " + actual + ", chunkIds = " + expecteds.elem2ChunkIdx(i) + ", " + actuals.elem2ChunkIdx(i) + ", row in chunks = " + (i - expecteds.chunkForRow(i).start()) + ", " + (i - actuals.chunkForRow(i).start()); assertEquals(message, expected, actual); } } private static String toStr(BufferedString bs) { return bs != null ? bs.toString() : null; } public static void assertStringVecEquals(Vec expecteds, Vec actuals) { assertEquals(expecteds.length(), actuals.length()); assertEquals("Vec types match", expecteds.get_type_str(), actuals.get_type_str()); for (int i = 0; i < expecteds.length(); i++) { String expected = toStr(expecteds.atStr(new BufferedString(), i)); String actual = toStr(actuals.atStr(new BufferedString(), i)); final String message = i + ": " + expected + " != " + actual + ", chunkIds = " + expecteds.elem2ChunkIdx(i) + ", " + actuals.elem2ChunkIdx(i) + ", row in chunks = " + (i - expecteds.chunkForRow(i).start()) + ", " + (i - actuals.chunkForRow(i).start()); assertEquals(message, expected, actual); } } private static String getFactorAsString(Vec v, long row) { return v.isNA(row) ? null : v.factor((long) v.at(row)); } public static void assertCatVecEquals(Vec expecteds, Vec actuals) { assertEquals(expecteds.length(), actuals.length()); assertEquals("Vec types match", expecteds.get_type_str(), actuals.get_type_str()); for (int i = 0; i < expecteds.length(); i++) { String expected = getFactorAsString(expecteds, i); String actual = getFactorAsString(actuals, i); final String message = i + ": " + expected + " != " + actual + ", chunkIds = " + expecteds.elem2ChunkIdx(i) + ", " + actuals.elem2ChunkIdx(i) + ", row in chunks = " + (i - expecteds.chunkForRow(i).start()) + ", " + (i - actuals.chunkForRow(i).start()); assertEquals(message, expected, actual); } } public static void assertTwoDimTableEquals(TwoDimTable expected, TwoDimTable actual) { assertEquals("tableHeader different", expected.getTableHeader(), actual.getTableHeader()); assertEquals("tableDescriptionDifferent", expected.getTableDescription(), actual.getTableDescription()); assertArrayEquals("rowHeaders different", expected.getRowHeaders(), actual.getRowHeaders()); assertArrayEquals("colHeaders different", expected.getColHeaders(), actual.getColHeaders()); assertArrayEquals("colTypes different", expected.getColTypes(), actual.getColTypes()); assertArrayEquals("colFormats different", expected.getColFormats(), actual.getColFormats()); assertEquals("colHeaderForRowHeaders different", expected.getColHeaderForRowHeaders(), actual.getColHeaderForRowHeaders()); for (int r = 0; r < expected.getRowDim(); r++) { for (int c = 0; c < expected.getColDim(); c++) { Object ex = expected.get(r, c); Object act = actual.get(r, c); assertEquals("cellValues different at row " + r + ", col " + c, ex, act); } } } public static void checkStddev(double[] expected, double[] actual, double threshold) { for (int i = 0; i < actual.length; i++) assertEquals(expected[i], actual[i], threshold); } public static void checkIcedArrays(IcedWrapper[][] expected, IcedWrapper[][] actual, double threshold) { for (int i = 0; i < actual.length; i++) for (int j = 0; j < actual[0].length; j++) assertEquals(expected[i][j].d, actual[i][j].d, threshold); } public static boolean[] checkEigvec(double[][] expected, double[][] actual, double threshold) { int nfeat = actual.length; int ncomp = actual[0].length; boolean[] flipped = new boolean[ncomp]; for (int j = 0; j < ncomp; j++) { // flipped[j] = Math.abs(expected[0][j] - actual[0][j]) > threshold; flipped[j] = Math.abs(expected[0][j] - actual[0][j]) > Math.abs(expected[0][j] + actual[0][j]); for (int i = 0; i < nfeat; i++) { assertEquals(expected[i][j], flipped[j] ? -actual[i][j] : actual[i][j], threshold); } } return flipped; } public static boolean[] checkEigvec(double[][] expected, TwoDimTable actual, double threshold) { int nfeat = actual.getRowDim(); int ncomp = actual.getColDim(); boolean[] flipped = new boolean[ncomp]; for (int j = 0; j < ncomp; j++) { flipped[j] = Math.abs(expected[0][j] - (double) actual.get(0, j)) > threshold; for (int i = 0; i < nfeat; i++) { assertEquals(expected[i][j], flipped[j] ? -(double) actual.get(i, j) : (double) actual.get(i, j), threshold); } } return flipped; } public static boolean equalTwoArrays(double[] array1, double[] array2, double tol) { assert array1.length == array2.length : "Arrays have different lengths"; for (int index = 0; index < array1.length; index++) { double diff = Math.abs(array1[index] - array2[index])/Math.max(Math.abs(array1[index]), Math.abs(array2[index])); if (diff > tol) return false; } return true; } public static class StandardizeColumns extends MRTask { int[] _columns2Transform; double[] _colMeans; double[] _oneOStd; public StandardizeColumns(int[] cols, double[] colMeans, double[] oneOSigma, Frame transF) { assert cols.length == colMeans.length; assert colMeans.length == oneOSigma.length; _columns2Transform = cols; _colMeans = colMeans; _oneOStd = oneOSigma; int numCols = transF.numCols(); for (int cindex : cols) { // check to make sure columns are numerical assert transF.vec(cindex).isNumeric(); } } @Override public void map(Chunk[] chks) { int chunkLen = chks[0].len(); int colCount = 0; for (int cindex : _columns2Transform) { for (int rindex = 0; rindex < chunkLen; rindex++) { double temp = (chks[cindex].atd(rindex) - _colMeans[colCount]) * _oneOStd[colCount]; chks[cindex].set(rindex, temp); } colCount += 1; } } } public static boolean equalTwoHashMaps(HashMap coeff1, HashMap coeff2, double tol) { assert coeff1.size() == coeff2.size() : "HashMap sizes are differenbt"; for (String key : coeff1.keySet()) { if (Math.abs(coeff1.get(key) - coeff2.get(key)) > tol) return false; } return true; } public static boolean equalTwoDimTables(TwoDimTable tab1, TwoDimTable tab2, double tol) { boolean same = true; //compare colHeaders same = Arrays.equals(tab1.getColHeaders(), tab2.getColHeaders()) && Arrays.equals(tab1.getColTypes(), tab2.getColTypes()); String[] colTypes = tab2.getColTypes(); IcedWrapper[][] cellValues1 = tab1.getCellValues(); IcedWrapper[][] cellValues2 = tab2.getCellValues(); same = same && cellValues1.length == cellValues2.length; if (!same) return false; // compare cell values for (int cindex = 0; cindex < cellValues1.length; cindex++) { same = same && cellValues1[cindex].length == cellValues2[cindex].length; if (!same) return false; for (int index = 0; index < cellValues1[cindex].length; index++) { if (colTypes[index].equals("double")) { same = same && Math.abs(Double.parseDouble(cellValues1[cindex][index].toString()) - Double.parseDouble(cellValues2[cindex][index].toString())) < tol; } else { same = same && cellValues1[cindex][index].toString().equals(cellValues2[cindex][index].toString()); } } } return same; } public static boolean[] checkEigvec(TwoDimTable expected, TwoDimTable actual, double threshold) { int nfeat = actual.getRowDim(); int ncomp = actual.getColDim(); boolean[] flipped = new boolean[ncomp]; // better way to get sign for (int j = 0; j < ncomp; j++) { for (int i = 0; i < nfeat; i++) { if (Math.abs((Double) expected.get(i, j)) > 0.0 && Math.abs((Double) actual.get(i, j)) > 0.0) { // only non zeros flipped[j] = !(Math.signum((Double) expected.get(i, j)) == Math.signum((Double) actual.get(i, j))); break; } } } for (int j = 0; j < ncomp; j++) { for (int i = 0; i < nfeat; i++) { assertEquals((double) expected.get(i, j), flipped[j] ? -(double) actual.get(i, j) : (double) actual.get(i, j), threshold); } } return flipped; } public static boolean[] checkProjection(Frame expected, Frame actual, double threshold, boolean[] flipped) { assertEquals("Number of columns", expected.numCols(), actual.numCols()); assertEquals("Number of columns in flipped", expected.numCols(), flipped.length); int nfeat = (int) expected.numRows(); int ncomp = expected.numCols(); for (int j = 0; j < ncomp; j++) { Vec.Reader vexp = expected.vec(j).new Reader(); Vec.Reader vact = actual.vec(j).new Reader(); assertEquals(vexp.length(), vact.length()); for (int i = 0; i < nfeat; i++) { if (vexp.isNA(i) || vact.isNA(i)) { continue; } // only perform comparison when data is not NAN assertEquals(vexp.at8(i), flipped[j] ? -vact.at8(i) : vact.at8(i), threshold); } } return flipped; } // Run tests from cmd-line since testng doesn't seem to be able to it. public static void main(String[] args) { H2O.main(new String[0]); for (String arg : args) { try { System.out.println("=== Starting " + arg); Class clz = Class.forName(arg); Method main = clz.getDeclaredMethod("main"); main.invoke(null); } catch (InvocationTargetException ite) { Throwable e = ite.getCause(); e.printStackTrace(); try { Thread.sleep(100); } catch (Exception ignore) { } } catch (Exception e) { e.printStackTrace(); try { Thread.sleep(100); } catch (Exception ignore) { } } finally { System.out.println("=== Stopping " + arg); } } try { Thread.sleep(100); } catch (Exception ignore) { } if (args.length != 0) UDPRebooted.T.shutdown.send(H2O.SELF); } protected static class Cmp1 extends MRTask { final double _epsilon; final String _messagePrefix; public Cmp1(double epsilon) { _epsilon = epsilon; _messagePrefix = ""; } public Cmp1(double epsilon, String msg) { _epsilon = epsilon; _messagePrefix = msg + " "; } public boolean _unequal; public String _message; @Override public void map(Chunk chks[]) { for (int cols = 0; cols < chks.length >> 1; cols++) { Chunk c0 = chks[cols]; Chunk c1 = chks[cols + (chks.length >> 1)]; for (int rows = 0; rows < chks[0]._len; rows++) { String msgBase = _messagePrefix + "At [" + rows + ", " + cols + "]: "; if (c0.isNA(rows) != c1.isNA(rows)) { _unequal = true; _message = msgBase + "c0.isNA " + c0.isNA(rows) + " != c1.isNA " + c1.isNA(rows); return; } else if (!(c0.isNA(rows) && c1.isNA(rows))) { if (c0 instanceof C16Chunk && c1 instanceof C16Chunk) { long lo0 = c0.at16l(rows), lo1 = c1.at16l(rows); long hi0 = c0.at16h(rows), hi1 = c1.at16h(rows); if (lo0 != lo1 || hi0 != hi1) { _unequal = true; _message = msgBase + " lo0 " + lo0 + " != lo1 " + lo1 + " || hi0 " + hi0 + " != hi1 " + hi1; return; } } else if (c0 instanceof CStrChunk && c1 instanceof CStrChunk) { BufferedString s0 = new BufferedString(), s1 = new BufferedString(); c0.atStr(s0, rows); c1.atStr(s1, rows); if (s0.compareTo(s1) != 0) { _unequal = true; _message = msgBase + " s0 " + s0 + " != s1 " + s1; return; } } else if ((c0 instanceof C8Chunk) && (c1 instanceof C8Chunk)) { long d0 = c0.at8(rows), d1 = c1.at8(rows); if (d0 != d1) { _unequal = true; _message = msgBase + " d0 " + d0 + " != d1 " + d1; return; } } else { double d0 = c0.atd(rows), d1 = c1.atd(rows); double cmpValue = ((d0 == 0.0) || (d1 == 0.0)) ? 1.0 : Math.abs(d0) + Math.abs(d1); if (!(Math.abs(d0 - d1) <= cmpValue * _epsilon)) { _unequal = true; _message = msgBase + " d0 " + d0 + " != d1 " + d1; return; } } } } } } @Override public void reduce(Cmp1 cmp) { if (_unequal) return; if (cmp._unequal) { _unequal = true; _message = cmp._message; } } } public static void assertFrameAssertion(FrameAssertion frameAssertion) { int[] dim = frameAssertion.dim; Frame frame = null; try { frame = frameAssertion.prepare(); assertEquals("Frame has to have expected number of columns", dim[0], frame.numCols()); assertEquals("Frame has to have expected number of rows", dim[1], frame.numRows()); frameAssertion.check(frame); } finally { frameAssertion.done(frame); if (frame != null) frame.delete(); } } public static abstract class FrameAssertion { protected final String file; private final int[] dim; // columns X rows public FrameAssertion(String file, int[] dim) { this.file = file; this.dim = dim; } public Frame prepare() { return parseTestFile(file); } public void done(Frame frame) { } public void check(Frame frame) { } public final int nrows() { return dim[1]; } public final int ncols() { return dim[0]; } } public static abstract class GenFrameAssertion extends FrameAssertion { public GenFrameAssertion(String file, int[] dim) { this(file, dim, null); } public GenFrameAssertion(String file, int[] dim, ParseSetupTransformer psTransformer) { super(file, dim); this.psTransformer = psTransformer; } protected File generatedFile; protected ParseSetupTransformer psTransformer; protected abstract File prepareFile() throws IOException; @Override public Frame prepare() { try { File f = generatedFile = prepareFile(); System.out.println("File generated into: " + f.getCanonicalPath()); if (f.isDirectory()) { return parseTestFolder(f.getCanonicalPath(), null, ParseSetup.HAS_HEADER, null, psTransformer); } else { return parseTestFile(f.getCanonicalPath(), psTransformer); } } catch (IOException e) { throw new RuntimeException("Cannot prepare test frame from file: " + file, e); } } @Override public void done(Frame frame) { if (generatedFile != null) { generatedFile.deleteOnExit(); org.apache.commons.io.FileUtils.deleteQuietly(generatedFile); } } } public static class Datasets { public static Frame iris() { return parseTestFile(Key.make("iris.hex"), "smalldata/iris/iris_wheader.csv"); } } /** * Tests can hook into the parse process using this interface and modify some of the guessed parameters. * This simplifies the test workflow as usually most of the guessed parameters are correct and the test really only * needs to modify/add few parameters. */ public interface ParseSetupTransformer { ParseSetup transformSetup(ParseSetup guessedSetup); } /** * @param frame * @param columnName column's name to be factorized * @return Frame with factorized column */ public static Frame asFactor(Frame frame, String columnName) { Vec vec = frame.vec(columnName); frame.replace(frame.find(columnName), vec.toCategoricalVec()); vec.remove(); DKV.put(frame); return frame; } public static void printOutFrameAsTable(Frame fr) { printOutFrameAsTable(fr, false, fr.numRows()); } public static void printOutFrameAsTable(Frame fr, boolean rollups, long limit) { assert limit <= Integer.MAX_VALUE; TwoDimTable twoDimTable = fr.toTwoDimTable(0, (int) limit, rollups); System.out.println(twoDimTable.toString(2, true)); } public void printOutColumnsMetadata(Frame fr) { for (String header : fr.toTwoDimTable().getColHeaders()) { String type = fr.vec(header).get_type_str(); int cardinality = fr.vec(header).cardinality(); System.out.println(header + " - " + type + String.format("; Cardinality = %d", cardinality)); } } protected static RowData toRowData(Frame fr, String[] columns, long row) { RowData rd = new RowData(); for (String col : columns) { Vec v = fr.vec(col); if (!v.isNumeric() && !v.isCategorical()) { throw new UnsupportedOperationException("Unsupported column type for column '" + col + "': " + v.get_type_str()); } if (!v.isNA(row)) { Object val; if (v.isCategorical()) { val = v.domain()[(int) v.at8(row)]; } else { val = v.at(row); } rd.put(col, val); } } return rd; } protected static double[] toNumericRow(Frame fr, long row) { double[] result = new double[fr.numCols()]; for (int i = 0; i < result.length; i++) { result[i] = fr.vec(i).at(row); } return result; } /** * Compares two frames. Two frames are equal if and only if they contain the same number of columns, rows, * and values at each cell (coordinate) are the same. Column names are ignored, as well as chunks sizes and all other * aspects besides those explicitly mentioned. * * @param f1 Frame to be compared, not null * @param f2 Frame to be compared, not null * @param delta absolute tolerance * @return True if frames are the same up to tolerance - number of columns, rows & values at each cell. * @throws AssertionError If any inequalities are found * @throws IllegalArgumentException If input frames don't have the same column and row count */ public static boolean compareFrames(final Frame f1, final Frame f2, double delta) { return compareFrames(f1, f2, delta, 0.0); } /** * Compares two frames. Two frames are equal if and only if they contain the same number of columns, rows, * and values at each cell (coordinate) are the same. Column names are ignored, as well as chunks sizes and all other * aspects besides those explicitly mentioned. * * @param f1 Frame to be compared, not null * @param f2 Frame to be compared, not null * @param delta absolute tolerance * @param relativeDelta relative tolerance * @return True if frames are the same up to tolerance - number of columns, rows & values at each cell. * @throws AssertionError If any inequalities are found * @throws IllegalArgumentException If input frames don't have the same column and row count */ public static boolean compareFrames(final Frame f1, final Frame f2, double delta, double relativeDelta) { Objects.requireNonNull(f1); Objects.requireNonNull(f2); if (f1.numCols() != f2.numCols()) throw new IllegalArgumentException(String.format("Number of columns is not the same: {%o, %o}", f1.numCols(), f2.numCols())); if (f1.numRows() != f2.numRows()) throw new IllegalArgumentException(String.format("Number of rows is not the same: {%o, %o}", f1.numRows(), f2.numRows())); for (int vecNum = 0; vecNum < f1.numCols(); vecNum++) { final Vec f1Vec = f1.vec(vecNum); final Vec f2Vec = f2.vec(vecNum); assertVecEquals(f1Vec, f2Vec, delta, relativeDelta); } return true; } public static final String[] ignoredColumns(final Frame frame, final String... usedColumns) { Set ignored = new HashSet(Arrays.asList(frame.names())); ignored.removeAll(Arrays.asList(usedColumns)); return ignored.toArray(new String[ignored.size()]); } public static boolean compareFrames(final Frame f1, final Frame f2) throws IllegalStateException { return compareFrames(f1, f2, 0); } /** * Sets a locale cluster-wide. Consider returning it back to the default value. * * @param locale Locale to set to the whole cluster */ public static void setLocale(final Locale locale) { new ChangeLocaleTsk(locale) .doAllNodes(); } private static class ChangeLocaleTsk extends MRTask { private final Locale _locale; public ChangeLocaleTsk(Locale locale) { this._locale = locale; } @Override protected void setupLocal() { Locale.setDefault(_locale); } } /** * Converts a H2OFrame to a csv file for debugging purposes. * * @param fileNameWithPath: String containing filename with path that will contain the H2O Frame * @param h2oframe: H2O Frame to be saved as CSV file. * @param header: boolean to decide if column names should be saved. Set to false if don't care. * @param hex_string: boolean to decide if the double values are written in hex. Set to false if don't care. * @throws IOException */ public static void writeFrameToCSV(String fileNameWithPath, Frame h2oframe, boolean header, boolean hex_string) throws IOException { Frame.CSVStreamParams params = new Frame.CSVStreamParams() .setHeaders(header) .setHexString(hex_string); File targetFile = new File(fileNameWithPath); byte[] buffer = new byte[1 << 20]; int bytesRead; try (InputStream frameToStream = h2oframe.toCSV(params); OutputStream outStream = new FileOutputStream(targetFile)) { while ((bytesRead = frameToStream.read(buffer)) > 0) { // for our toCSV stream, return 0 as EOF, not -1 outStream.write(buffer, 0, bytesRead); } } } /** * @param len Length of the resulting vector * @param randomSeed Seed for the random generator (for reproducibility) * @return An instance of {@link Vec} with binary weights (either 0.0D or 1.0D, nothing in between). */ public static Vec createRandomBinaryWeightsVec(final long len, final long randomSeed) { final Vec weightsVec = Vec.makeZero(len, Vec.T_NUM); final Random random = RandomUtils.getRNG(randomSeed); for (int i = 0; i < weightsVec.length(); i++) { weightsVec.set(i, random.nextBoolean() ? 1.0D : 0D); } return weightsVec; } /** * @param len Length of the resulting vector * @param randomSeed Seed for the random generator (for reproducibility) * @return An instance of {@link Vec} with random double values */ public static Vec createRandomDoubleVec(final long len, final long randomSeed) { final Vec vec = Vec.makeZero(len, Vec.T_NUM); final Random random = RandomUtils.getRNG(randomSeed); for (int i = 0; i < vec.length(); i++) { vec.set(i, random.nextDouble()); } return vec; } /** * @param len Length of the resulting vector * @param randomSeed Seed for the random generator (for reproducibility) * @return An instance of {@link Vec} with random categorical values */ public static Vec createRandomCategoricalVec(final long len, final long randomSeed) { String[] domain = new String[100]; for (int i = 0; i < domain.length; i++) domain[i] = "CAT_" + i; final Vec vec = Scope.track(Vec.makeZero(len, Vec.T_NUM)).makeZero(domain); final Random random = RandomUtils.getRNG(randomSeed); for (int i = 0; i < vec.length(); i++) { vec.set(i, random.nextInt(domain.length)); } return vec; } @SuppressWarnings("rawtypes") public static GenModel toMojo(Model model, String testName, boolean readModelMetaData) { final String filename = testName + ".zip"; StreamingSchema ss = new StreamingSchema(model.getMojo(), filename); try (FileOutputStream os = new FileOutputStream(ss.getFilename())) { ss.getStreamWriter().writeTo(os); } catch (IOException e) { throw new IllegalStateException("MOJO writing failed", e); } try { MojoReaderBackend cr = MojoReaderBackendFactory.createReaderBackend(filename); return ModelMojoReader.readFrom(cr, readModelMetaData); } catch (IOException e) { throw new IllegalStateException("MOJO loading failed", e); } finally { boolean deleted = new File(filename).delete(); if (!deleted) Log.warn("Failed to delete the file"); } } public static boolean isCI() { return System.getProperty("user.name").equals("jenkins"); } public static > void assertInDKV(Key key, T object) { assertEquals(key, object._key); T dkvObject = DKV.getGet(key); assertNotNull(dkvObject); assertEquals(object.checksum(true), dkvObject.checksum(true)); } public static Vec transformVec(Vec vec, Function transform) { new MRTask() { @Override public void map(Chunk c) { for (int i = 0; i < c._len; i++) { if (c.isNA(i)) continue; c.set(i, transform.apply(c.atd(i))); } } }.doAll(vec); return vec; } /** * Debugging-only function that lets the developer open Flow (or R/Py) during execution of a junit test * and inspect the model. */ @SuppressWarnings("unused") @Deprecated // just to make it noticeable in IDE public static void browser() { if (isCI()) { throw new IllegalStateException("Never leave browser() calls in committed source code - only for debugging"); } File root = new File("."); while (!new File(root, "h2o-core").isDirectory()) { root = new File(root, ".."); } H2O.registerResourceRoot(new File(root, "h2o-web/src/main/resources/www")); H2O.registerResourceRoot(new File(root, "h2o-core/src/main/resources/www")); String message = "Open H2O Flow in your web browser: "; System.err.println(message + H2O.getURL(NetworkInit.h2oHttpView.getScheme())); while (!H2O.getShutdownRequested()) { try { Thread.sleep(60 * 1000); System.err.println("Still waiting for H2O to shutdown"); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy