org.apache.mahout.h2obindings.ops.Par Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.h2obindings.ops;
import water.MRTask;
import water.fvec.Frame;
import water.fvec.Vec;
import water.fvec.Chunk;
import water.fvec.NewChunk;
import water.parser.ValueString;
import org.apache.mahout.h2obindings.H2OHelper;
import org.apache.mahout.h2obindings.drm.H2ODrm;
/**
* Parallelize operator.
*/
public class Par {
/**
* (re)Parallelize DRM data according to new partitioning hints.
*
* @param drmA Input DRM containing data.
* @param min Hint of minimum number of partitions to parallelize, if not -1.
* @param exact Hint of exact number of partitions to parallelize, if not -1.
* @return new DRM holding the same data but parallelized according to new hints.
*/
public static H2ODrm exec(H2ODrm drmA, int min, int exact) {
final Frame frin = drmA.frame;
final Vec vin = drmA.keys;
// First create a new empty Frame with the required partitioning
Frame frout = H2OHelper.emptyFrame(frin.numRows(), frin.numCols(), min, exact);
Vec vout = null;
if (vin != null) {
// If String keyed, then run an MRTask on the new frame, and also
// creat yet another 1-column newer frame for the re-orged String keys.
// The new String Vec will therefore be similarly partitioned as the
// new Frame.
//
// vout is finally collected by calling anyVec() on outputFrame(),
// as it is the only column in the output frame.
vout = new MRTask() {
public void map(Chunk chks[], NewChunk nc) {
int chunkSize = chks[0].len();
Vec vins[] = frin.vecs();
long start = chks[0].start();
ValueString vstr = new ValueString();
for (int r = 0; r < chunkSize; r++) {
for (int c = 0; c < chks.length; c++) {
chks[c].set(r, vins[c].at(start + r));
}
nc.addStr(vin.atStr(vstr, start + r));
}
}
}.doAll(1, frout).outputFrame(null, null).anyVec();
} else {
// If not String keyed, then run and MRTask on the new frame, and
// just pull in right elements from frin
new MRTask() {
public void map(Chunk chks[]) {
int chunkSize = chks[0].len();
Vec vins[] = frin.vecs();
long start = chks[0].start();
for (int r = 0; r < chunkSize; r++) {
for (int c = 0; c < chks.length; c++) {
chks[c].set(r, vins[c].at(start + r));
}
}
}
}.doAll(frout);
}
return new H2ODrm(frout, vout);
}
}