All Downloads are FREE. Search and download functionalities are using the official Maven repository.

water.rapids.ASTColSlice Maven / Gradle / Ivy

There is a newer version: 3.8.2.9
Show newest version
package water.rapids;

import jsr166y.CountedCompleter;
import water.*;
import water.fvec.*;
import water.parser.BufferedString;

import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;

/** Column slice; allows R-like syntax.
 *  Numbers past the largest column are an error.
 *  Negative numbers and number lists are allowed, and represent an *exclusion* list */
class ASTColSlice extends ASTPrim {
  @Override public String[] args() { return new String[]{"ary", "cols"}; }
  @Override int nargs() { return 1+2; } // (cols src [col_list])
  @Override public String str() { return "cols" ; }
  @Override
  public Val apply(Env env, Env.StackHelp stk, AST asts[]) {
    Val v = stk.track(asts[1].exec(env));
    if( v instanceof ValRow ) {
      ValRow vv = (ValRow)v;
      return vv.slice(asts[2].columns(vv._names));
    }
    Frame src = v.getFrame();
    int[] cols = col_select(src.names(),asts[2]);
    Frame dst = new Frame();
    Vec[] vecs = src.vecs();
    for( int col : cols )  dst.add(src._names[col],vecs[col]);
    return new ValFrame(dst);
  }

  // Complex column selector; by list of names or list of numbers or single
  // name or number.  Numbers can be ranges or negative.
  static int[] col_select( String[] names, AST col_selector ) {
    int[] cols = col_selector.columns(names);
    if( cols.length==0 ) return cols; // Empty inclusion list?
    if( cols[0] >= 0 ) { // Positive (inclusion) list
      if( cols[cols.length-1] >= names.length )
        throw new IllegalArgumentException("Column must be an integer from 0 to "+(names.length-1));
      return cols;
    }

    // Negative (exclusion) list; convert to positive inclusion list
    int[] pos = new int[names.length];
    for( int col : cols ) // more or less a radix sort, filtering down to cols to ignore
      if( 0 <= -col-1 && -col-1 < names.length ) 
        pos[-col-1] = -1;
    int j=0;
    for( int i=0; i= fr.numCols()) )
      throw new IllegalArgumentException("Column must be an integer from 0 to "+(fr.numCols()-1));
    for( int col : cols )       // For all included columns
      if( col >= 0 && col < fr.numCols() ) // Ignoring out-of-range ones
        fr2.add(fr.names()[col],fr.vecs()[col]);
    return new ValFrame(fr2);
  }
}

/** Row Slice */
class ASTRowSlice extends ASTPrim {
  @Override public String[] args() { return new String[]{"ary", "rows"}; }
  @Override int nargs() { return 1+2; } // (rows src [row_list])
  @Override public String str() { return "rows" ; }
  @Override
  public Val apply(Env env, Env.StackHelp stk, AST asts[]) {
    Frame fr = stk.track(asts[1].exec(env)).getFrame();
    Frame returningFrame;
    long nrows = fr.numRows();
    if( asts[2] instanceof ASTNumList ) {
      final ASTNumList nums = (ASTNumList)asts[2];

      if( !nums._isSort && !nums.isEmpty() && nums._bases[0] >= 0)
        throw new IllegalArgumentException("H2O does not currently reorder rows, please sort your row selection first");

      long[] rows = (nums._isList || nums.min()<0) ? nums.expand8Sort() : null;
      if( rows!=null ) {
        if (rows.length == 0) {      // Empty inclusion list?
        } else if (rows[0] >= 0) { // Positive (inclusion) list
          if (rows[rows.length - 1] > nrows)
            throw new IllegalArgumentException("Row must be an integer from 0 to " + (nrows - 1));
        } else {                  // Negative (exclusion) list
          if (rows[rows.length - 1] >= 0)
            throw new IllegalArgumentException("Cannot mix negative and postive row selection");
          // Invert the list to make a positive list, ignoring out-of-bounds values
          BitSet bs = new BitSet((int) nrows);
          for (int i = 0; i < rows.length; i++) {
            int idx = (int) (-rows[i] - 1); // The positive index
            if (idx >= 0 && idx < nrows)
              bs.set(idx);        // Set column to EXCLUDE
          }
          rows = new long[(int) nrows - bs.cardinality()];
          for (int i = bs.nextClearBit(0), j = 0; i < nrows; i = bs.nextClearBit(i + 1))
            rows[j++] = i;
        }
      }
      final long[] ls = rows;

      returningFrame = new MRTask(){
        @Override public void map(Chunk[] cs, NewChunk[] ncs) {
          if( nums.cnt()==0 ) return;
          if( ls != null && ls.length == 0 ) return;
          long start = cs[0].start();
          long end   = start + cs[0]._len;
          long min = ls==null?(long)nums.min():ls[0], max = ls==null?(long)nums.max()-1:ls[ls.length-1]; // exclusive max to inclusive max when stride == 1
          //     [ start, ...,  end ]     the chunk
          //1 []                          nums out left:  nums.max() < start
          //2                         []  nums out rite:  nums.min() > end
          //3 [ nums ]                    nums run left:  nums.min() < start && nums.max() <= end
          //4          [ nums ]           nums run in  :  start <= nums.min() && nums.max() <= end
          //5                   [ nums ]  nums run rite:  start <= nums.min() && end < nums.max()
          if( !(maxend) ) {   // not situation 1 or 2 above
            long startOffset = (min > start ? min : start);  // situation 4 and 5 => min > start;
            for( int i=(int)(startOffset-start); i= 0 )) {
                for(int c=0;c idxs = new ArrayList<>();
    for( double i=0; i[] dmap = new HashMap[types.length];
    String[][] domains = new String[types.length][];
    int[][][] cmaps = new int[types.length][][];
    for(int k=0;k();
      int c = 0;
      byte t = types[k];
      if( t == Vec.T_CAT ) {
        int[][] maps = new int[frs.length][];
        for(int i=1; i < frs.length; i++) {
          maps[i] = new int[frs[i].vec(k).domain().length];
          for(int j=0; j < maps[i].length; j++ ) {
            String s = frs[i].vec(k).domain()[j];
            if( !dmap[k].containsKey(s)) dmap[k].put(s, maps[i][j]=c++);
            else                         maps[i][j] = dmap[k].get(s);
          }
        }
        cmaps[k] = maps;
      } else {
        cmaps[k] = new int[frs.length][];
      }
      domains[k] = c==0?null:new String[c];
      for( Map.Entry e : dmap[k].entrySet())
        domains[k][e.getValue()] = e.getKey();
    }

    // Now make Keys for the new Vecs
    Key[] keys = fr.anyVec().group().addVecs(fr.numCols());
    Vec[] vecs = new Vec[fr.numCols()];
    int rowLayout = Vec.ESPC.rowLayout(keys[0],espc);
    for( int i=0; i {
    final Vec[] _vecs;          // Input vecs to be row-bound
    final Vec _v;               // Result vec
    final long[] _espc;         // Result layout
    int[][] _cmaps;             // categorical mapping array

    RbindTask(H2O.H2OCountedCompleter cc, Vec[] vecs, Vec v, long[] espc, int[][] cmaps) { super(cc); _vecs = vecs; _v = v; _espc = espc; _cmaps=cmaps; }
    @Override
    public void compute2() {
      addToPendingCount(_vecs.length-1-1);
      int offset=0;
      for( int i=1; i<_vecs.length; i++ ) {
        new RbindMRTask(this, _cmaps[i], _v, offset).dfork(_vecs[i]);
        offset += _vecs[i].nChunks();
      }
    }
    @Override public void onCompletion(CountedCompleter cc) {
      DKV.put(_v);
    }
  }

  private static class RbindMRTask extends MRTask {
    private final int[] _cmap;
    private final int _chunkOffset;
    private final Vec _v;
    RbindMRTask(H2O.H2OCountedCompleter hc, int[] cmap, Vec v, int offset) { super(hc); _cmap = cmap; _v = v; _chunkOffset = offset;}

    @Override public void map(Chunk cs) {
      int idx = _chunkOffset+cs.cidx();
      Key ckey = Vec.chunkKey(_v._key, idx);
      if (_cmap != null) {
        assert !cs.hasFloat(): "Input chunk ("+cs.getClass()+") has float, but is expected to be categorical";
        NewChunk nc = new NewChunk(_v, idx);
        // loop over rows and update ints for new domain mapping according to vecs[c].domain()
        for (int r=0;r < cs._len;++r) {
          if (cs.isNA(r)) nc.addNA();
          else nc.addNum(_cmap[(int)cs.at8(r)], 0);
        }
        nc.close(_fs);
      } else {
        DKV.put(ckey, cs.deepCopy(), _fs, true);
      }
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy