
water.rapids.ASTColSlice Maven / Gradle / Ivy
package water.rapids;
import jsr166y.CountedCompleter;
import water.*;
import water.fvec.*;
import water.parser.BufferedString;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
/** Column slice; allows R-like syntax.
* Numbers past the largest column are an error.
* Negative numbers and number lists are allowed, and represent an *exclusion* list */
class ASTColSlice extends ASTPrim {
@Override public String[] args() { return new String[]{"ary", "cols"}; }
@Override int nargs() { return 1+2; } // (cols src [col_list])
@Override public String str() { return "cols" ; }
@Override
public Val apply(Env env, Env.StackHelp stk, AST asts[]) {
Val v = stk.track(asts[1].exec(env));
if( v instanceof ValRow ) {
ValRow vv = (ValRow)v;
return vv.slice(asts[2].columns(vv._names));
}
Frame src = v.getFrame();
int[] cols = col_select(src.names(),asts[2]);
Frame dst = new Frame();
Vec[] vecs = src.vecs();
for( int col : cols ) dst.add(src._names[col],vecs[col]);
return new ValFrame(dst);
}
// Complex column selector; by list of names or list of numbers or single
// name or number. Numbers can be ranges or negative.
static int[] col_select( String[] names, AST col_selector ) {
int[] cols = col_selector.columns(names);
if( cols.length==0 ) return cols; // Empty inclusion list?
if( cols[0] >= 0 ) { // Positive (inclusion) list
if( cols[cols.length-1] >= names.length )
throw new IllegalArgumentException("Column must be an integer from 0 to "+(names.length-1));
return cols;
}
// Negative (exclusion) list; convert to positive inclusion list
int[] pos = new int[names.length];
for( int col : cols ) // more or less a radix sort, filtering down to cols to ignore
if( 0 <= -col-1 && -col-1 < names.length )
pos[-col-1] = -1;
int j=0;
for( int i=0; i= fr.numCols()) )
throw new IllegalArgumentException("Column must be an integer from 0 to "+(fr.numCols()-1));
for( int col : cols ) // For all included columns
if( col >= 0 && col < fr.numCols() ) // Ignoring out-of-range ones
fr2.add(fr.names()[col],fr.vecs()[col]);
return new ValFrame(fr2);
}
}
/** Row Slice */
class ASTRowSlice extends ASTPrim {
@Override public String[] args() { return new String[]{"ary", "rows"}; }
@Override int nargs() { return 1+2; } // (rows src [row_list])
@Override public String str() { return "rows" ; }
@Override
public Val apply(Env env, Env.StackHelp stk, AST asts[]) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
Frame returningFrame;
long nrows = fr.numRows();
if( asts[2] instanceof ASTNumList ) {
final ASTNumList nums = (ASTNumList)asts[2];
if( !nums._isSort && !nums.isEmpty() && nums._bases[0] >= 0)
throw new IllegalArgumentException("H2O does not currently reorder rows, please sort your row selection first");
long[] rows = (nums._isList || nums.min()<0) ? nums.expand8Sort() : null;
if( rows!=null ) {
if (rows.length == 0) { // Empty inclusion list?
} else if (rows[0] >= 0) { // Positive (inclusion) list
if (rows[rows.length - 1] > nrows)
throw new IllegalArgumentException("Row must be an integer from 0 to " + (nrows - 1));
} else { // Negative (exclusion) list
if (rows[rows.length - 1] >= 0)
throw new IllegalArgumentException("Cannot mix negative and postive row selection");
// Invert the list to make a positive list, ignoring out-of-bounds values
BitSet bs = new BitSet((int) nrows);
for (int i = 0; i < rows.length; i++) {
int idx = (int) (-rows[i] - 1); // The positive index
if (idx >= 0 && idx < nrows)
bs.set(idx); // Set column to EXCLUDE
}
rows = new long[(int) nrows - bs.cardinality()];
for (int i = bs.nextClearBit(0), j = 0; i < nrows; i = bs.nextClearBit(i + 1))
rows[j++] = i;
}
}
final long[] ls = rows;
returningFrame = new MRTask(){
@Override public void map(Chunk[] cs, NewChunk[] ncs) {
if( nums.cnt()==0 ) return;
if( ls != null && ls.length == 0 ) return;
long start = cs[0].start();
long end = start + cs[0]._len;
long min = ls==null?(long)nums.min():ls[0], max = ls==null?(long)nums.max()-1:ls[ls.length-1]; // exclusive max to inclusive max when stride == 1
// [ start, ..., end ] the chunk
//1 [] nums out left: nums.max() < start
//2 [] nums out rite: nums.min() > end
//3 [ nums ] nums run left: nums.min() < start && nums.max() <= end
//4 [ nums ] nums run in : start <= nums.min() && nums.max() <= end
//5 [ nums ] nums run rite: start <= nums.min() && end < nums.max()
if( !(maxend) ) { // not situation 1 or 2 above
long startOffset = (min > start ? min : start); // situation 4 and 5 => min > start;
for( int i=(int)(startOffset-start); i= 0 )) {
for(int c=0;c idxs = new ArrayList<>();
for( double i=0; i[] dmap = new HashMap[types.length];
String[][] domains = new String[types.length][];
int[][][] cmaps = new int[types.length][][];
for(int k=0;k();
int c = 0;
byte t = types[k];
if( t == Vec.T_CAT ) {
int[][] maps = new int[frs.length][];
for(int i=1; i < frs.length; i++) {
maps[i] = new int[frs[i].vec(k).domain().length];
for(int j=0; j < maps[i].length; j++ ) {
String s = frs[i].vec(k).domain()[j];
if( !dmap[k].containsKey(s)) dmap[k].put(s, maps[i][j]=c++);
else maps[i][j] = dmap[k].get(s);
}
}
cmaps[k] = maps;
} else {
cmaps[k] = new int[frs.length][];
}
domains[k] = c==0?null:new String[c];
for( Map.Entry e : dmap[k].entrySet())
domains[k][e.getValue()] = e.getKey();
}
// Now make Keys for the new Vecs
Key[] keys = fr.anyVec().group().addVecs(fr.numCols());
Vec[] vecs = new Vec[fr.numCols()];
int rowLayout = Vec.ESPC.rowLayout(keys[0],espc);
for( int i=0; i {
final Vec[] _vecs; // Input vecs to be row-bound
final Vec _v; // Result vec
final long[] _espc; // Result layout
int[][] _cmaps; // categorical mapping array
RbindTask(H2O.H2OCountedCompleter cc, Vec[] vecs, Vec v, long[] espc, int[][] cmaps) { super(cc); _vecs = vecs; _v = v; _espc = espc; _cmaps=cmaps; }
@Override
public void compute2() {
addToPendingCount(_vecs.length-1-1);
int offset=0;
for( int i=1; i<_vecs.length; i++ ) {
new RbindMRTask(this, _cmaps[i], _v, offset).dfork(_vecs[i]);
offset += _vecs[i].nChunks();
}
}
@Override public void onCompletion(CountedCompleter cc) {
DKV.put(_v);
}
}
private static class RbindMRTask extends MRTask {
private final int[] _cmap;
private final int _chunkOffset;
private final Vec _v;
RbindMRTask(H2O.H2OCountedCompleter hc, int[] cmap, Vec v, int offset) { super(hc); _cmap = cmap; _v = v; _chunkOffset = offset;}
@Override public void map(Chunk cs) {
int idx = _chunkOffset+cs.cidx();
Key ckey = Vec.chunkKey(_v._key, idx);
if (_cmap != null) {
assert !cs.hasFloat(): "Input chunk ("+cs.getClass()+") has float, but is expected to be categorical";
NewChunk nc = new NewChunk(_v, idx);
// loop over rows and update ints for new domain mapping according to vecs[c].domain()
for (int r=0;r < cs._len;++r) {
if (cs.isNA(r)) nc.addNA();
else nc.addNum(_cmap[(int)cs.at8(r)], 0);
}
nc.close(_fs);
} else {
DKV.put(ckey, cs.deepCopy(), _fs, true);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy