
org.apache.mahout.h2obindings.ops.RowRange Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.h2obindings.ops;
import scala.collection.immutable.Range;
import water.MRTask;
import water.fvec.Frame;
import water.fvec.Vec;
import water.fvec.Chunk;
import water.fvec.NewChunk;
import water.parser.ValueString;
import org.apache.mahout.h2obindings.drm.H2ODrm;
/**
* Filter operation
*/
public class RowRange {
/**
* Filter rows from intput DRM, to include only row indiced included in R.
*
* @param drmA Input DRM.
* @param R Range object specifying the start and end row numbers to filter.
* @return new DRM with just the filtered rows.
*/
public static H2ODrm exec(H2ODrm drmA, final Range R) {
Frame A = drmA.frame;
Vec keys = drmA.keys;
// Run a filtering MRTask on A. If row number falls within R.start() and
// R.end(), then the row makes it into the output
Frame Arr = new MRTask() {
public void map(Chunk chks[], NewChunk ncs[]) {
int chunkSize = chks[0].len();
long chunkStart = chks[0].start();
// First check if the entire chunk even overlaps with R
if (chunkStart > R.end() || (chunkStart + chunkSize) < R.start()) {
return;
}
// This chunk overlaps, filter out just the overlapping rows
for (int r = 0; r < chunkSize; r++) {
if (!R.contains(chunkStart + r)) {
continue;
}
for (int c = 0; c < chks.length; c++) {
ncs[c].addNum(chks[c].atd(r));
}
}
}
}.doAll(A.numCols(), A).outputFrame(null, null);
Vec Vrr = (keys == null) ? null : new MRTask() {
// This is a String keyed DRM. Do the same thing as above,
// but this time just one column of Strings.
public void map(Chunk chk, NewChunk nc) {
int chunkSize = chk.len();
long chunkStart = chk.start();
ValueString vstr = new ValueString();
if (chunkStart > R.end() || (chunkStart + chunkSize) < R.start()) {
return;
}
for (int r = 0; r < chunkSize; r++) {
if (!R.contains(chunkStart + r)) {
continue;
}
nc.addStr(chk.atStr(vstr, r));
}
}
}.doAll(1, keys).outputFrame(null, null).anyVec();
return new H2ODrm(Arr, Vrr);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy