
water.rapids.ASTMerge Maven / Gradle / Ivy
package water.rapids;
import water.*;
import water.fvec.*;
import water.nbhm.*;
import java.util.Arrays;
/** plyr's merge: Join by any other name.
* Sample AST: (merge $leftFrame $rightFrame allLeftFlag allRightFlag)
*
* Joins two frames; all columns with the same names will be the join key. If
* you want to join on a subset of identical names, rename the columns first
* (otherwise the same column name would appear twice in the result).
*
* If allLeftFlag is true, all rows in the leftFrame will be included, even if
* there is no matching row in the rightFrame, and vice-versa for
* allRightFlag. Missing data will appear as NAs. Both flags can be true.
*/
public class ASTMerge extends ASTOp {
static final String VARS[] = new String[]{ "ary", "leftary", "rightary", "allleft", "allright"};
boolean _allLeft, _allRite;
public ASTMerge( ) { super(VARS); }
@Override String opStr(){ return "merge";}
@Override ASTOp make() {return new ASTMerge();}
@Override ASTMerge parse_impl(Exec E) {
// get the frames to work with
AST left = E.parse();
AST rite = E.parse();
AST a = E.parse();
if( a instanceof ASTId ) a = E._env.lookup((ASTId)a);
if( a instanceof ASTNum ) _allLeft = ((ASTNum)a)._d==1;
else throw new IllegalArgumentException("Argument `allLeft` expected to be a boolean.");
a = E.parse();
if( a instanceof ASTId ) a = E._env.lookup((ASTId)a);
if( a instanceof ASTNum ) _allRite = ((ASTNum)a)._d==1;
else throw new IllegalArgumentException("Argument `allRite` expected to be a boolean.");
E.eatEnd();
// Finish the rest
ASTMerge res = (ASTMerge) clone();
res._asts = new AST[]{left,rite};
return res;
}
@Override void exec(Env e, AST[] args) {throw H2O.fail();}
@Override void apply(Env env) {
Frame _l = env.popAry();
Frame _r = env.popAry();
Frame l = new Frame(_l.names().clone(),_l.vecs().clone());
Frame r = new Frame(_r.names().clone(),_r.vecs().clone());
// Look for the set of columns in common; resort left & right to make the
// leading prefix of column names match. Bail out if we find any weird
// column types.
int ncols=0; // Number of columns in common
for( int i=0; i>32));
return this;
}
@Override public int hashCode() { return _hash; }
@Override public boolean equals( Object o ) {
assert o instanceof Row;
Row r = (Row)o;
if( _hash != r._hash ) return false;
if( _chks == r._chks && _row == r._row ) return true;
// Now must check field contents
int len = _enum_maps.length;
for( int c=0; c {
// All active Merges have a per-Node hashset of one of the datasets
static NonBlockingHashMap MERGE_SETS = new NonBlockingHashMap<>();
final Key _uniq; // Key to allow sharing of this MergeSet on each Node
final int _ncols; // Number of leading columns for the Hash Key
final int[][] _id_maps;
final Frame _fr; // Frame to hash-all-rows locally per-node
transient NonBlockingHashSet _rows;
MergeSet( int ncols, int[][] id_maps, Frame fr ) {
_uniq=Key.make(); _ncols = ncols; _id_maps = id_maps; _fr = fr;
}
// Per-node, hash the entire _fr dataset
@Override public void setupLocal() {
MERGE_SETS.put(_uniq,this);
_rows = new NonBlockingHashSet<>();
new MakeHash(this).doAll(_fr,true/*run locally*/);
}
// Executed locally only, build a local HashSet over the entire given dataset
private static class MakeHash extends MRTask {
transient final MergeSet _ms;
MakeHash( MergeSet ms ) { _ms = ms; }
@Override public void map( Chunk chks[] ) {
int len = chks[0]._len;
for( int i=0; i {
private final int _ncols; // Number of merge columns
private final Key _uniq; // Which mergeset being merged
private final int[][] _enum_maps; // Mapping enum domains
private final boolean _allLeft;
DoJoin( int ncols, Key uniq, int[][] enum_maps, boolean allLeft ) {
_ncols = ncols; _uniq = uniq; _enum_maps = enum_maps;_allLeft = allLeft;
}
@Override public void map( Chunk chks[], NewChunk nchks[] ) {
// Shared common hash map
NonBlockingHashSet rows = MergeSet.MERGE_SETS.get(_uniq)._rows;
int len = chks[0]._len;
Row row = new Row(chks); // Recycled Row object on the bigger dataset
for( int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy