All Downloads are FREE. Search and download functionalities are using the official Maven repository.

water.api.FramesHandler Maven / Gradle / Ivy

package water.api;

import jsr166y.CountedCompleter;

import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import hex.Model;
import water.DKV;
import water.Futures;
import water.H2O;
import water.Iced;
import water.Job;
import water.Key;
import water.KeySnapshot;
import water.Value;
import water.api.ModelsHandler.Models;
import water.exceptions.H2OColumnNotFoundArgumentException;
import water.exceptions.H2OIllegalArgumentException;
import water.exceptions.H2OKeyNotFoundArgumentException;
import water.exceptions.H2OKeyWrongTypeArgumentException;
import water.exceptions.H2OKeysNotFoundArgumentException;
import water.exceptions.H2OParseException;
import water.fvec.Frame;
import water.fvec.Vec;
import water.persist.PersistManager;
import water.util.KeyedVoid;
import water.util.Log;

/*
 * FramesHandler deals with all REST API endpoints that start with /Frames.
 * 

* GET /3/Frames/(?.*)/export/(?.*)/overwrite/(?.*) *

export(): Export a Frame to the given path with optional overwrite. *

* GET /3/Frames/(?.*)/columns/(?.*)/summary *

columnSummary(): Return the summary metrics for a column, e.g. mins, maxes, mean, sigma, percentiles, etc. *

* GET /3/Frames/(?.*)/columns/(?.*)/domain *

columnDomain(): Return the domains for the specified column. \"null\" if the column is not an Enum. *

* GET /3/Frames/(?.*)/columns/(?.*) *

column(): Return the specified column from a Frame. *

* TODO: deleteme? * GET /3/Frames/(?.*)/columns *

columns(): Return all the columns from a Frame. *

* GET /3/Frames/(?.*)/summary *

summary(): Return a Frame, including the histograms, after forcing computation of rollups. *

* GET /3/Frames/(?.*) *

fetch(): Return the specified Frame. *

* GET /3/Frames *

list(): Return all Frames in the H2O distributed K/V store. *

* DELETE /3/Frames/(?.*) *

delete(): Delete the specified Frame from the H2O distributed K/V store. *

* DELETE /3/Frames *

deleteAll(): Delete all Frames from the H2O distributed K/V store. *

*/ class FramesHandler> extends Handler { /** Class which contains the internal representation of the frames list and params. */ protected static final class Frames extends Iced { Key frame_id; long row_offset; int row_count; long column_offset; int column_count; Frame[] frames; String column; public boolean find_compatible_models = false; /** * Fetch all Frames from the KV store. */ protected static Frame[] fetchAll() { // Get all the frames. final Key[] frameKeys = KeySnapshot.globalKeysOfClass(Frame.class); Frame[] frames = new Frame[frameKeys.length]; for (int i = 0; i < frameKeys.length; i++) { Frame frame = getFromDKV("(none)", frameKeys[i]); frames[i] = frame; } return frames; } /** * Fetch all the Models so we can see if they are compatible with our Frame(s). */ static protected Map> fetchModelCols(Model[] all_models) { Map> all_models_cols = new HashMap<>(); for (Model m : all_models) all_models_cols.put(m, new HashSet<>(Arrays.asList(m._output._names))); return all_models_cols; } /** * For a given frame return an array of the compatible models. * * @param frame The frame for which we should fetch the compatible models. * @param all_models An array of all the Models in the DKV. * @return */ private static Model[] findCompatibleModels(Frame frame, Model[] all_models) { Map> all_models_cols = Frames.fetchModelCols(all_models); List compatible_models = new ArrayList(); Set frame_column_names = new HashSet(Arrays.asList(frame._names)); for (Map.Entry> entry : all_models_cols.entrySet()) { Model model = entry.getKey(); Set model_cols = entry.getValue(); if (frame_column_names.containsAll(model_cols)) { // See if adapt throws an exception or not. try { if( model.adaptTestForTrain(new Frame(frame), false, false).length == 0 ) compatible_models.add(model); } catch( IllegalArgumentException e ) { // skip } } } return compatible_models.toArray(new Model[0]); } } /** * Return all the frames. The Frames list will be instances of FrameSynopsisV3, * which only contains a few fields, for performance reasons. * @see FrameSynopsisV3 */ @SuppressWarnings("unused") // called through reflection by RequestServer public FramesV3 list(int version, FramesV3 s) { Frames f = s.createAndFillImpl(); f.frames = Frames.fetchAll(); s.fillFromImplWithSynopsis(f); return s; } // TODO: in /4 return a schema with just a list of column names. @SuppressWarnings("unused") // called through reflection by RequestServer public FramesV3 columns(int version, FramesV3 s) { // TODO: return *only* the columns. . . This may be a different schema. return fetch(version, s); } // TODO: almost identical to ModelsHandler; refactor public static Frame getFromDKV(String param_name, String key_str) { return getFromDKV(param_name, Key.make(key_str)); } // TODO: almost identical to ModelsHandler; refactor public static Frame getFromDKV(String param_name, Key key) { if (null == key) throw new H2OIllegalArgumentException(param_name, "Frames.getFromDKV()", key); Value v = DKV.get(key); if (null == v) throw new H2OKeyNotFoundArgumentException(param_name, key.toString()); Iced ice = v.get(); if( ice instanceof Vec ) return new Frame((Vec)ice); if (! (ice instanceof Frame)) throw new H2OKeyWrongTypeArgumentException(param_name, key.toString(), Frame.class, ice.getClass()); return (Frame)ice; } // TODO: return VecV4 /** Return a single column from the frame. */ @SuppressWarnings("unused") // called through reflection by RequestServer public FramesV3 column(int version, FramesV3 s) { // TODO: should return a Vec schema Frame frame = getFromDKV("key", s.frame_id.key()); Vec vec = frame.vec(s.column); if (null == vec) throw new H2OColumnNotFoundArgumentException("column", s.frame_id.toString(), s.column); Vec[] vecs = { vec }; String[] names = { s.column }; Frame new_frame = new Frame(names, vecs); s.frames = new FrameV3[1]; s.frames[0] = new FrameV3().fillFromImpl(new_frame); ((FrameV3)s.frames[0]).clearBinsField(); return s; } // TODO: return VecDomainV4 @SuppressWarnings("unused") // called through reflection by RequestServer public FramesV3 columnDomain(int version, FramesV3 s) { Frame frame = getFromDKV("key", s.frame_id.key()); Vec vec = frame.vec(s.column); if (vec == null) throw new H2OColumnNotFoundArgumentException("column", s.frame_id.toString(), s.column); s.domain = new String[1][]; s.domain[0] = vec.domain(); return s; } // TODO: return VecSummaryV4 @SuppressWarnings("unused") // called through reflection by RequestServer public FramesV3 columnSummary(int version, FramesV3 s) { Frame frame = getFromDKV("key", s.frame_id.key()); // safe Vec vec = frame.vec(s.column); if (null == vec) throw new H2OColumnNotFoundArgumentException("column", s.frame_id.toString(), s.column); // Compute second pass of rollups: the histograms. if (!vec.isString()) { vec.bins(); } // Cons up our result s.frames = new FrameV3[1]; s.frames[0] = new FrameV3().fillFromImpl(new Frame(new String[]{s.column}, new Vec[]{vec}), s.row_offset, s.row_count, s.column_offset, s.column_count, true); return s; } /** Docs for column summary. */ public StringBuffer columnSummaryDocs(int version, StringBuffer docs) { return null; // doc(this, version, docs, "docs/columnSummary.md"); } // TODO: return everything but the second level of rollups (histograms); currently mins and maxes are missing /** Return a single frame. */ @SuppressWarnings("unused") // called through reflection by RequestServer public FramesV3 fetch(int version, FramesV3 s) { FramesV3 frames = doFetch(version, s, FrameV3.ColV3.NO_SUMMARY); // Summary data is big, and not always there: null it out here. You have to call columnSummary // to force computation of the summary data. for (FrameBase a_frame: frames.frames) { ((FrameV3)a_frame).clearBinsField(); } return frames; } private FramesV3 doFetch(int version, FramesV3 s, boolean force_summary) { Frames f = s.createAndFillImpl(); Frame frame = getFromDKV("key", s.frame_id.key()); // safe s.frames = new FrameV3[1]; s.frames[0] = new FrameV3(frame, s.row_offset, s.row_count).fillFromImpl(frame, s.row_offset, s.row_count, s.column_offset, s.column_count, force_summary); // TODO: Refactor with FrameBase if (s.find_compatible_models) { Model[] compatible = Frames.findCompatibleModels(frame, Models.fetchAll()); s.compatible_models = new ModelSchema[compatible.length]; ((FrameV3)s.frames[0]).compatible_models = new String[compatible.length]; int i = 0; for (Model m : compatible) { s.compatible_models[i] = (ModelSchema)Schema.schema(version, m).fillFromImpl(m); ((FrameV3)s.frames[0]).compatible_models[i] = m._key.toString(); i++; } } return s; } /** Export a single frame to the specified path. */ public FramesV3 export(int version, FramesV3 s) { Frame fr = getFromDKV("key", s.frame_id.key()); Log.info("ExportFiles processing (" + s.path + ")"); s.job = (JobV3) Schema.schema(version, Job.class).fillFromImpl(ExportDatasetJob.export(fr, s.path, s.frame_id.key().toString(),s.force)); return s; } private static class ExportDatasetJob extends Job { private ExportDatasetJob(String path) { super(Key.make(path), "Export frame"); } private static ExportDatasetJob export(Frame fr, String path, String frameName, boolean overwrite) { // Validate input boolean fileExists = H2O.getPM().exists(path); if (overwrite && fileExists) { Log.warn("File " + path + " exists, but will be overwritten!"); } else if (!overwrite && fileExists) { throw new H2OIllegalArgumentException(path, "exportFrame", "File " + path + " already exists!"); } InputStream is = (fr).toCSV(true, false); ExportDatasetJob job = new ExportDatasetJob(path); ExportTask t = new ExportTask(is, path, frameName, overwrite, job); job.start(t, fr.anyVec().nChunks(), true); return job; } private static class ExportTask extends H2O.H2OCountedCompleter { final InputStream _csv; final String _path; final String _frameName; final boolean _overwrite; final Job _j; ExportTask(InputStream csv, String path, String frameName, boolean overwrite, Job j) { _csv = csv; _path = path; _frameName = frameName; _overwrite = overwrite; _j = j; } private void copyStream(OutputStream os, final int buffer_size) { int curIdx = 0; try { byte[] bytes = new byte[buffer_size]; for (; ; ) { int count = _csv.read(bytes, 0, buffer_size); if (count <= 0) { break; } os.write(bytes, 0, count); int workDone = ((Frame.CSVStream) _csv)._curChkIdx; if (curIdx != workDone) { _j.update(workDone - curIdx); curIdx = workDone; } } } catch (Exception ex) { throw new RuntimeException(ex); } } @Override public void compute2() { PersistManager pm = H2O.getPM(); OutputStream os = null; try { os = pm.create(_path, _overwrite); copyStream(os, 4 * 1024 * 1024); } finally { if (os != null) { try { os.close(); Log.info("Key '" + _frameName + "' was written to " + _path + "."); } catch (Exception e) { Log.err(e); } } } tryComplete(); } // Took a crash/NPE somewhere in the parser. Attempt cleanup. @Override public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) { if (_j != null) { _j.cancel(); if (ex instanceof H2OParseException) { throw (H2OParseException) ex; } else { _j.failed(ex); } } return true; } @Override public void onCompletion(CountedCompleter caller) { _j.done(); } } } @SuppressWarnings("unused") // called through reflection by RequestServer // TODO: return list of FrameSummaryV3 that has histograms et al. public FramesV3 summary(int version, FramesV3 s) { Frame frame = getFromDKV("key", s.frame_id.key()); // safe if (null != frame) { Futures fs = new Futures(); Vec[] vecArr = frame.vecs(); for (Vec v : vecArr) { if (! v.isString()) { v.startRollupStats(fs, Vec.DO_HISTOGRAMS); } } fs.blockForPending(); } return doFetch(version, s, FrameV3.ColV3.FORCE_SUMMARY); } /** Remove an unlocked frame. Fails if frame is in-use. */ @SuppressWarnings("unused") // called through reflection by RequestServer public FramesV3 delete(int version, FramesV3 frames) { Frame frame = getFromDKV("key", frames.frame_id.key()); // safe frame.delete(); // lock & remove return frames; } /** * Remove ALL an unlocked frames. Throws IAE for all deletes that failed * (perhaps because the Frames were locked & in-use). */ @SuppressWarnings("unused") // called through reflection by RequestServer public FramesV3 deleteAll(int version, FramesV3 frames) { final Key[] keys = KeySnapshot.globalKeysOfClass(Frame.class); ArrayList missing = new ArrayList<>(); Futures fs = new Futures(); for( int i = 0; i < keys.length; i++ ) { try { getFromDKV("(none)", keys[i]).delete(null, fs); } catch( IllegalArgumentException iae ) { missing.add(keys[i].toString()); } } fs.blockForPending(); if( missing.size() != 0 ) throw new H2OKeysNotFoundArgumentException("(none)", missing.toArray(new String[missing.size()])); return frames; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy