All Downloads are FREE. Search and download functionalities are using the official Maven repository.

water.fvec.NFSFileVec Maven / Gradle / Ivy

There is a newer version: 3.8.2.9
Show newest version
package water.fvec;

import java.io.File;

import water.*;
import water.persist.PersistNFS;

/** A NFS distributed file-backed Vector
 *  

* Vec will be lazily loaded from the NFS file on-demand. Each machine is * expected to have the same filesystem view onto a file with the same * byte contents. Each machine will lazily load only the sections of the file * that are assigned to that machine. Basically, the file starts striped * across some globally visible file system (e.g. NFS, or just replicated on * local disk) and is loaded into memory - again striped across the machines - * without any network traffic or data-motion. *

* Useful to "memory map" into RAM large datafiles, often pure text files. */ public class NFSFileVec extends FileVec { /** Make a new NFSFileVec key which holds the filename implicitly. This name * is used by the Chunks to load data on-demand. Blocking * @return A NFSFileVec mapped to this file. */ public static NFSFileVec make(File f) { Futures fs = new Futures(); NFSFileVec nfs = make(f, fs); fs.blockForPending(); return nfs; } /** Make a new NFSFileVec key which holds the filename implicitly. This name * is used by the Chunks to load data on-demand. * @return A NFSFileVec mapped to this file. */ public static NFSFileVec make(File f, Futures fs) { if( !f.exists() ) throw new IllegalArgumentException("File not found: "+f.toString()); long size = f.length(); Key k = Vec.newKey(PersistNFS.decodeFile(f)); // Insert the top-level FileVec key into the store NFSFileVec nfs = new NFSFileVec(k,size); DKV.put(k,nfs,fs); return nfs; } private NFSFileVec(Key key, long len) {super(key,len,Value.NFS);} @Override public int setChunkSize(Frame fr, int chunkSize) { // Clear cached chunks first // Peeking into a file before the chunkSize has been set // will load chunks of the file in DFLT_CHUNK_SIZE amounts. // If this side-effect is not reversed when _chunkSize differs // from the default value, parsing will either double read // sections (_chunkSize < DFLT_CHUNK_SIZE) or skip data // (_chunkSize > DFLT_CHUNK_SIZE). This reverses this side-effect. Futures fs = new Futures(); Keyed.remove(_key, fs); fs.blockForPending(); return super.setChunkSize(fr, chunkSize); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy