All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.runtime.matrix.mapred.CSVWriteReducer Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.matrix.mapred;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Iterator;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

import org.apache.sysml.runtime.instructions.mr.CSVWriteInstruction;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.IJV;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.SparseRowsIterator;
import org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes;
import org.apache.sysml.runtime.matrix.mapred.CSVWriteReducer.RowBlockForTextOutput;
import org.apache.sysml.runtime.matrix.mapred.CSVWriteReducer.RowBlockForTextOutput.Situation;
import org.apache.sysml.runtime.util.MapReduceTool;

public class CSVWriteReducer extends ReduceBase implements Reducer
{
	
	private NullWritable nullKey = NullWritable.get();
	private RowBlockForTextOutput outValue = new RowBlockForTextOutput();
	private RowBlockForTextOutput zeroBlock = new RowBlockForTextOutput();
	
	private long[] rowIndexes=null;
	private long[] minRowIndexes=null;
	private long[] maxRowIndexes=null;
	private long[] colIndexes=null;
	private long[] numColBlocks=null;
	private int[] colsPerBlock=null;
	private int[] lastBlockNCols=null;
	private String[] delims=null;
	private boolean[] sparses=null;
	private int[] tagToResultIndex=null;
	
	private void addEndingMissingValues(byte tag, Reporter reporter) 
		throws IOException
	{
		long col=colIndexes[tag]+1;
		for(;col inValue,
			OutputCollector out, Reporter reporter)
			throws IOException 
	{
		long begin = System.currentTimeMillis();
		
		cachedReporter = reporter;

		byte tag = inkey.getTag();
		zeroBlock.setFormatParameters(delims[tag], sparses[tag]);
		outValue.setFormatParameters(delims[tag], sparses[tag]);
		
		Situation sit = Situation.MIDDLE;
		if(rowIndexes[tag]==minRowIndexes[tag])
			sit=Situation.START;
		else if(rowIndexes[tag]!=inkey.getFirstIndex())
			sit=Situation.NEWLINE;
		
		//check whether need to fill in missing values in previous rows
		if(sit==Situation.NEWLINE)
		{
			//if the previous row has not finished
			addEndingMissingValues(tag, reporter);
		}
		
		if(sit==Situation.NEWLINE||sit==Situation.START)
		{	
			//if a row is completely missing
			sit=addMissingRows(tag, inkey.getFirstIndex(), sit, reporter);
		}
		
		//add missing value at the beginning of this row
		for(long col=colIndexes[tag]+1; col out2Ins=new HashMap();
		try {
			CSVWriteInstruction[] ins = MRJobConfiguration.getCSVWriteInstructions(job);
			for(CSVWriteInstruction in: ins)
			{
				out2Ins.put(in.output, in);
				if(in.output>maxIndex)
					maxIndex=in.output;
			}
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
		
		int numParitions=job.getNumReduceTasks();
		int taskID=MapReduceTool.getUniqueTaskId(job);
		//LOG.info("## taks id: "+taskID);
		//for efficiency only, the arrays may have missing values
		rowIndexes=new long[maxIndex+1];
		colIndexes=new long[maxIndex+1];
		maxRowIndexes=new long[maxIndex+1];
		minRowIndexes=new long[maxIndex+1];
		numColBlocks=new long[maxIndex+1];
		lastBlockNCols=new int[maxIndex+1];
		colsPerBlock=new int[maxIndex+1];
		delims=new String[maxIndex+1];
		sparses=new boolean[maxIndex+1];
		tagToResultIndex=new int[maxIndex+1];
		
		for(int i=0; i 0 ) 
			{
				if( _data.isEmptyBlock(false) ) //EMPTY BLOCK
				{
					appendZero(_buffer, sparse, delim, false, _numCols);
				}
				else if( _data.isInSparseFormat() ) //SPARSE BLOCK
				{
					SparseRowsIterator iter = _data.getSparseRowsIterator();
					int j = -1;
					while( iter.hasNext() )
					{
						IJV cell = iter.next();
						appendZero(_buffer, sparse, delim, true, cell.j-j-1);
						
						j = cell.j; //current col
						if( cell.v != 0 ) //for nnz
							_buffer.append(cell.v);
						else if( !sparse ) 
							_buffer.append('0');
						if( j < _numCols-1 )
							_buffer.append(delim);
					}
					appendZero(_buffer, sparse, delim, false, _numCols-j-1);
				}
				else //DENSE BLOCK
				{
					for(int j=0; j<_numCols; j++)
					{
						double val = _data.getValueDenseUnsafe(0, j);
						if( val!=0 ) //for nnz
							_buffer.append(val);
						else if( !sparse ) 
							_buffer.append('0');
							
						if( j < _numCols-1 )
							_buffer.append(delim);
					}	
				}
			}
			
			ByteBuffer bytes = Text.encode(_buffer.toString());
			int length = bytes.limit();
		    out.write(bytes.array(), 0, length);
		}
		
		/**
		 * 
		 * @param buffer
		 * @param sparse
		 * @param delim
		 * @param len
		 */
		private static void appendZero( StringBuilder buffer, boolean sparse, String delim, boolean alwaysDelim, int len )
		{
			if( len <= 0 )
				return;
			
			for( int i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy