All Downloads are FREE. Search and download functionalities are using the official Maven repository.

water.serial.ObjectTreeBinarySerializer Maven / Gradle / Ivy

package water.serial;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;

import water.H2O;
import water.Key;
import water.Keyed;
import water.persist.Persist;
import water.util.FileUtils;
import water.util.Log;

/**
 * Object tree serializer.
 *
 * It dumps a given list of keys (representing object graph in breadth-first order)
 * into a folder referenced by an URI.
 * It creates a metafile holding list of files which express load order
 * of given files and their deserialization.
 *
 * If save destination exists, the save will fail.
 * If load source does not or is not readable, the load will fail.
 * If list of keys to save contains null-Key, then loaded list will contain null-key as well.
 *
 * The caller is responsible for cleanup of destination directory in case of failure.
 *
 */
public class ObjectTreeBinarySerializer implements Serializer, URI> {
  /** Name of metafile containing information about exported object tree. */
  public static final String METAFILE = "__h2o_bin.mbin";
  /** Extension string used for exported objects */
  public static final String EXTENSION = ".bin";
  /** Null marker */
  public static final String NULL_PLACEHOLDER = "";

  /** Do DKV after load put on loaded object if it has defined key. */
  final boolean dkvPutAfterLoad;
  /** Do DKV put after load even the object already exists in DKV */
  final boolean overrideInDkv;
  /* During save override the destination file. */
  final boolean overrideFile;

  public ObjectTreeBinarySerializer() {
    this(true, true, true);
  }
  public ObjectTreeBinarySerializer(boolean overrideFile) {
    this(true, true, overrideFile);
  }
  public ObjectTreeBinarySerializer(boolean dkvPutAfterLoad, boolean overrideInDkv, boolean overrideFile) {
    this.dkvPutAfterLoad = dkvPutAfterLoad;
    this.overrideInDkv = overrideInDkv;
    this.overrideFile = overrideFile;
  }

  @Override
  public void save(List objectTree, URI outputDir) throws IOException {
    assert outputDir.getQuery() == null : "Query parameters are not allowed in URI.";
    // Get persist manager for given output URI
    Persist persist = H2O.getPM().getPersistForURI(outputDir);
    // Create the destination folder
    if (!persist.mkdirs(outputDir.toString())) {
      boolean targetExists = persist.exists(outputDir.toString());
      if (overrideFile && targetExists) {
        Log.warn("Directory " + outputDir + " already exists.");
      } else if (overrideFile && !targetExists || !overrideFile && !targetExists) {
        throw new IllegalArgumentException(
            "Directory " + outputDir + " cannot be created! Check you access privileges!");
      } else {
        throw new IllegalArgumentException("Directory " + outputDir + " already exists but "
                                           + "the flag for force overwrite is `false`.");
      }
    }
    // Step-by-step saves all files into folder
    List savedFilenames = new ArrayList<>(objectTree.size());
    BinarySerializer serial = getKeyedSerializer();
    // Serialize full object tree
    // FIXME: this should be in future distributed operation saving results to distributed FS in parallel.
    for(Key k : objectTree) {
      if (k != null) {
        String filename = FileUtils.keyToFileName(k) + EXTENSION;
        URI fileUri = URI.create(outputDir + "/" + filename); // We have URI
        // NOTE: this will fetch remote object to the caller node!
        serial.save(k.get(), fileUri);
        savedFilenames.add(filename);
      } else {
        savedFilenames.add(NULL_PLACEHOLDER);
      }
    }
    // last step dump a metafile with saved keys
    dumpMetaFile(persist, savedFilenames, outputDir);
  }

  @Override
  public List load(List l, URI inputDir) throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public List load(URI inputDir) throws IOException {
    // Get persist manager for given output URI
    Persist persist = H2O.getPM().getPersistForURI(inputDir);
    // Look for metafile and load it, it returns list of filenames in the load order
    List filenames = loadMetaFile(persist, inputDir);
    List objectTree = new ArrayList<>(filenames.size());
    // Load given object (side effect of load is save into DKV)
    BinarySerializer serial = getKeyedSerializer();
    for (String filename : filenames) {
      if (!NULL_PLACEHOLDER.equals(filename)) {
        URI fileUri = URI.create(inputDir + "/" + filename);
        Keyed ok = serial.load(fileUri);
        objectTree.add(ok._key);
      } else {
        objectTree.add(null);
      }
    }
    Collections.reverse(objectTree);
    return objectTree;
  }

  /** Dump a metafile with a list of save files. */
  protected void dumpMetaFile(Persist persist, List filenames, URI outputDir) throws IOException {
    URI metafileUri = URI.create(outputDir + "/" + METAFILE);
    PrintStream os = new PrintStream(persist.create(metafileUri.toString(), true));
    Collections.reverse(filenames);
    try {
      for (String fname : filenames)
        os.println(fname);
    } finally {
      FileUtils.close(os);
    }
  }

  protected List loadMetaFile(Persist persist, URI inputDir) throws IOException {
    URI metafileUri = URI.create(inputDir + "/" + METAFILE);
    BufferedReader reader = new BufferedReader(new InputStreamReader(persist.open(metafileUri.toString())));
    List filenames = new LinkedList<>();
    try {
      String fname = null;
      while ((fname = reader.readLine()) != null) {
        filenames.add(fname);
      }
    } finally {
      FileUtils.close(reader);
    }
    return filenames;
  }

  protected BinarySerializer getKeyedSerializer() {
    return new KeyedBinarySerializer(dkvPutAfterLoad, overrideInDkv, overrideFile);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy