org.apache.sysml.udf.lib.RemoveEmptyRows Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.udf.lib;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.io.IOUtilFunctions;
import org.apache.sysml.runtime.util.MapReduceTool;
import org.apache.sysml.udf.FunctionParameter;
import org.apache.sysml.udf.Matrix;
import org.apache.sysml.udf.PackageFunction;
import org.apache.sysml.udf.Matrix.ValueType;
@Deprecated
public class RemoveEmptyRows extends PackageFunction
{
private static final long serialVersionUID = 1L;
private static final String OUTPUT_FILE = "TMP";
private Matrix _ret;
@Override
public int getNumFunctionOutputs()
{
return 1;
}
@Override
public FunctionParameter getFunctionOutput(int pos)
{
return _ret;
}
@Override
public void execute()
{
Matrix mat = (Matrix) this.getFunctionInput(0);
String fnameOld = mat.getFilePath();
HashMap keyMap = new HashMap(); //old,new rowID
try
{
//prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameOld);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
if( !fs.exists(path) )
throw new IOException("File "+fnameOld+" does not exist on HDFS.");
FileInputFormat.addInputPath(job, path);
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
//prepare output
String fnameNew = createOutputFilePathAndName( OUTPUT_FILE );
DataOutputStream ostream = MapReduceTool.getHDFSDataOutputStream( fnameNew, true );
//read and write if necessary
InputSplit[] splits = informat.getSplits(job, 1);
LongWritable key = new LongWritable();
Text value = new Text();
long ID = 1;
try
{
//for obj reuse and preventing repeated buffer re-allocations
StringBuilder sb = new StringBuilder();
for(InputSplit split: splits)
{
RecordReader reader = informat.getRecordReader(split, job, Reporter.NULL);
try
{
while( reader.next(key, value) )
{
String cellStr = value.toString().trim();
StringTokenizer st = new StringTokenizer(cellStr, " ");
long row = Integer.parseInt( st.nextToken() );
long col = Integer.parseInt( st.nextToken() );
double lvalue = Double.parseDouble( st.nextToken() );
if( !keyMap.containsKey( row ) )
keyMap.put(row, ID++);
long rowNew = keyMap.get( row );
sb.append(rowNew);
sb.append(' ');
sb.append(col);
sb.append(' ');
sb.append(lvalue);
sb.append('\n');
ostream.writeBytes( sb.toString() );
sb.setLength(0);
}
}
finally
{
if( reader != null )
reader.close();
}
}
_ret = new Matrix(fnameNew, keyMap.size(), mat.getNumCols(), ValueType.Double);
}
finally
{
if( ostream != null )
ostream.close();
}
}
catch(Exception ex)
{
throw new RuntimeException( "Unable to execute external function.", ex );
}
}
}