All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.runtime.matrix.data.MatrixBlock Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.matrix.data;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectInputStream;
import java.io.ObjectOutput;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;

import org.apache.commons.math3.random.Well1024a;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.hops.Hop.OpOp2;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.lops.MMTSJ.MMTSJType;
import org.apache.sysml.lops.MapMultChain.ChainType;
import org.apache.sysml.lops.PartialAggregate.CorrectionLocationType;
import org.apache.sysml.parser.DMLTranslator;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.DMLUnsupportedOperationException;
import org.apache.sysml.runtime.functionobjects.Builtin;
import org.apache.sysml.runtime.functionobjects.CM;
import org.apache.sysml.runtime.functionobjects.CTable;
import org.apache.sysml.runtime.functionobjects.DiagIndex;
import org.apache.sysml.runtime.functionobjects.Divide;
import org.apache.sysml.runtime.functionobjects.KahanFunction;
import org.apache.sysml.runtime.functionobjects.KahanPlus;
import org.apache.sysml.runtime.functionobjects.KahanPlusSq;
import org.apache.sysml.runtime.functionobjects.Multiply;
import org.apache.sysml.runtime.functionobjects.Plus;
import org.apache.sysml.runtime.functionobjects.ReduceAll;
import org.apache.sysml.runtime.functionobjects.ReduceCol;
import org.apache.sysml.runtime.functionobjects.ReduceRow;
import org.apache.sysml.runtime.functionobjects.RevIndex;
import org.apache.sysml.runtime.functionobjects.SortIndex;
import org.apache.sysml.runtime.functionobjects.SwapIndex;
import org.apache.sysml.runtime.instructions.cp.CM_COV_Object;
import org.apache.sysml.runtime.instructions.cp.DoubleObject;
import org.apache.sysml.runtime.instructions.cp.KahanObject;
import org.apache.sysml.runtime.instructions.cp.ScalarObject;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.LibMatrixBincell.BinaryAccessType;
import org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator;
import org.apache.sysml.runtime.matrix.operators.AggregateOperator;
import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
import org.apache.sysml.runtime.matrix.operators.BinaryOperator;
import org.apache.sysml.runtime.matrix.operators.CMOperator;
import org.apache.sysml.runtime.matrix.operators.CMOperator.AggregateOperationTypes;
import org.apache.sysml.runtime.matrix.operators.COVOperator;
import org.apache.sysml.runtime.matrix.operators.Operator;
import org.apache.sysml.runtime.matrix.operators.QuaternaryOperator;
import org.apache.sysml.runtime.matrix.operators.ReorgOperator;
import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
import org.apache.sysml.runtime.matrix.operators.UnaryOperator;
import org.apache.sysml.runtime.util.DataConverter;
import org.apache.sysml.runtime.util.FastBufferedDataInputStream;
import org.apache.sysml.runtime.util.FastBufferedDataOutputStream;
import org.apache.sysml.runtime.util.IndexRange;
import org.apache.sysml.runtime.util.UtilFunctions;


public class MatrixBlock extends MatrixValue implements Externalizable
{

	private static final long serialVersionUID = 7319972089143154056L;
	
	//sparsity nnz threshold, based on practical experiments on space consumption and performance
	public static final double SPARSITY_TURN_POINT = 0.4;
	//sparsity threshold for ultra-sparse matrix operations (40nnz in a 1kx1k block)
	public static final double ULTRA_SPARSITY_TURN_POINT = 0.00004; 
	//basic header (int rlen, int clen, byte type)
	public static final int HEADER_SIZE = 9;
	
	public enum BlockType{
		EMPTY_BLOCK,  
		ULTRA_SPARSE_BLOCK, //ultra sparse representation, in-mem same as sparse
		SPARSE_BLOCK, //sparse representation, see sparseRows 
		DENSE_BLOCK, //dense representation, see denseBlock			
	}
	
	//matrix meta data
	protected int rlen       = -1;
	protected int clen       = -1;
	protected boolean sparse = true;
	protected long nonZeros   = 0;
	
	//matrix data (sparse or dense)
	protected double[] denseBlock    = null;
	protected SparseRow[] sparseRows = null;
		
	//sparse-block-specific attributes (allocation only)
	protected int estimatedNNzsPerRow = -1; 
		
	//ctable-specific attributes
	protected int maxrow = -1;
	protected int maxcolumn = -1;
	
	//grpaggregate-specific attributes (optional)
	protected int numGroups = -1;
	
	//diag-specific attributes (optional)
	protected boolean diag = false;
	
	
	////////
	// Matrix Constructors
	//
	
	public MatrixBlock()
	{
		rlen = 0;
		clen = 0;
		sparse = true;
		nonZeros = 0;
		maxrow = 0;
		maxcolumn = 0;
	}
	public MatrixBlock(int rl, int cl, boolean sp)
	{
		rlen = rl;
		clen = cl;
		sparse = sp;
		nonZeros = 0;
		maxrow = 0;
		maxcolumn = 0;
	}
	
	public MatrixBlock(int rl, int cl, long estnnzs) 
	{
		this(rl, cl, false, estnnzs);
		
		// Adjust sparsity based on estimated non-zeros
		double denseSize = estimateSizeDenseInMemory(rl, cl);
		double sparseSize = estimateSizeSparseInMemory(rl, cl, (double)estnnzs/(rl*cl));
		this.sparse = (denseSize < sparseSize ? false : true);
		
	}
	
	public MatrixBlock(int rl, int cl, boolean sp, long estnnzs)
	{
		this(rl, cl, sp);
		estimatedNNzsPerRow=(int)Math.ceil((double)estnnzs/(double)rl);	
	}
	
	public MatrixBlock(MatrixBlock that)
	{
		this.copy(that);
	}
	
	////////
	// Initialization methods
	// (reset, init, allocate, etc)
	
	public void reset()
	{
		reset(-rlen);
	}
	
	public void reset(long estnnzs)
	{
		estimatedNNzsPerRow=(int)Math.ceil((double)estnnzs/(double)rlen);
		if(sparse)
		{
			resetSparse();
		}
		else
		{
			if(denseBlock!=null)
			{
				if(denseBlock.length rlen*clen )
			throw new DMLRuntimeException("MatrixBlockDSM.init() invoked with too large dimensions ("+r+","+c+") vs ("+rlen+","+clen+")");
		
		//allocate or resize dense block
		allocateDenseBlock();
		
		//copy and compute nnz
		for(int i=0, ix=0; i < r; i++, ix+=clen) 
			System.arraycopy(arr[i], 0, denseBlock, ix, arr[i].length);
		recomputeNonZeros();
		
		maxrow = r;
		maxcolumn = c;
	}
	
	/**
	 * NOTE: This method is designed only for dense representation.
	 * 
	 * @param arr
	 * @param r
	 * @param c
	 * @throws DMLRuntimeException
	 */
	public void init(double[] arr, int r, int c) 
		throws DMLRuntimeException 
	{	
		//input checks 
		if ( sparse )
			throw new DMLRuntimeException("MatrixBlockDSM.init() can be invoked only on matrices with dense representation.");
		if( r*c > rlen*clen )
			throw new DMLRuntimeException("MatrixBlockDSM.init() invoked with too large dimensions ("+r+","+c+") vs ("+rlen+","+clen+")");
		
		//allocate or resize dense block
		allocateDenseBlock();
		
		//copy and compute nnz 
		System.arraycopy(arr, 0, denseBlock, 0, arr.length);
		recomputeNonZeros();
		
		maxrow = r;
		maxcolumn = c;
	}
	
	/**
	 * 
	 * @param val
	 * @param r
	 * @param c
	 * @throws DMLRuntimeException
	 */
	public void init(double val, int r, int c)
		throws DMLRuntimeException 
	{	
		//input checks 
		if ( sparse )
			throw new DMLRuntimeException("MatrixBlockDSM.init() can be invoked only on matrices with dense representation.");
		if( r*c > rlen*clen )
			throw new DMLRuntimeException("MatrixBlockDSM.init() invoked with too large dimensions ("+r+","+c+") vs ("+rlen+","+clen+")");
		
		if( val != 0 ) {
			//allocate or resize dense block
			allocateDenseBlock();
			
			if( r*c == rlen*clen ) { //FULL MATRIX INIT
				//memset value  
				Arrays.fill(denseBlock, val);
			}
			else { //PARTIAL MATRIX INIT
				//rowwise memset value 
				for(int i=0, ix=0; i < r; i++, ix+=clen) 
					Arrays.fill(denseBlock, ix, ix+c, val);
			}
			
			//set non zeros to input dims
			nonZeros = r*c;
		}
		
		maxrow = r;
		maxcolumn = c;
	}
	
	/**
	 * 
	 * @return
	 */
	public boolean isAllocated()
	{
		if( sparse )
			return (sparseRows!=null);
		else
			return (denseBlock!=null);
	}
	
	/**
	 * @throws DMLRuntimeException 
	 * 
	 */
	public void allocateDenseBlock() 
		throws RuntimeException 
	{
		allocateDenseBlock( true );
	}
	
	/**
	 * 
	 */
	public void allocateDenseOrSparseBlock()
	{
		if( sparse )
			allocateSparseRowsBlock();
		else
			allocateDenseBlock();
	}
	
	/**
	 * 
	 * @param clearNNZ
	 * @throws DMLRuntimeException
	 */
	public void allocateDenseBlock(boolean clearNNZ) 
		throws RuntimeException 
	{
		long limit = (long)rlen * clen;
		
		//check max size constraint (16GB dense), since java arrays are limited to 2^(32-1) elements)
		if( limit > Integer.MAX_VALUE ) {
			throw new RuntimeException("Dense in-memory matrix block ("+rlen+"x"+clen+") exceeds supported size of "+Integer.MAX_VALUE+" elements (16GB). " +
					                   "Please, reduce the JVM heapsize to execute this in MR.");
		}
		
		//allocate block if non-existing or too small (guaranteed to be 0-initialized),
		if(denseBlock == null || denseBlock.length < limit ) {
			denseBlock = new double[(int)limit];
		}
		
		//clear nnz if necessary
		if( clearNNZ ) {
			nonZeros = 0;
		}
	}
	
	/**
	 * 
	 */
	public void allocateSparseRowsBlock()
	{
		allocateSparseRowsBlock(true);
	}
	
	/**
	 * 
	 * @param clearNNZ
	 */
	public void allocateSparseRowsBlock(boolean clearNNZ)
	{	
		//allocate block if non-existing or too small (guaranteed to be 0-initialized),
		if( sparseRows == null ) {
			sparseRows=new SparseRow[rlen];
		}
		else if( sparseRows.length < rlen ) {
			SparseRow[] oldSparseRows=sparseRows;
			sparseRows = new SparseRow[rlen];
			for(int i=0; irlen || c > clen )
			throw new RuntimeException("indexes ("+r+","+c+") out of range ("+rlen+","+clen+")");
		
		return quickGetValue(r, c);
	}
	
	@Override
	public void setValue(int r, int c, double v) 
	{
		//matrix bounds check 
		if( r>rlen || c > clen )
			throw new RuntimeException("indexes ("+r+","+c+") out of range ("+rlen+","+clen+")");

		quickSetValue(r, c, v);
	}
	
	/**
	 * 
	 * @param r
	 * @param c
	 * @return
	 */
	public double quickGetValue(int r, int c) 
	{
		if(sparse)
		{
			if( sparseRows==null || sparseRows.length<=r || sparseRows[r]==null )
				return 0;
			return sparseRows[r].get(c);
		}
		else
		{
			if( denseBlock==null )
				return 0;
			return denseBlock[r*clen+c]; 
		}
	}
	
	/**
	 * 
	 * @param r
	 * @param c
	 * @param v
	 */
	public void quickSetValue(int r, int c, double v) 
	{
		if(sparse)
		{
			//early abort
			if( (sparseRows==null || sparseRows.length<=r || sparseRows[r]==null) && v==0 )
				return;
			
			//allocation on demand
			allocateSparseRowsBlock(false);
			if( sparseRows[r]==null )
				sparseRows[r] = new SparseRow(estimatedNNzsPerRow, clen);
			
			//set value and maintain nnz
			if( sparseRows[r].set(c, v) )
				nonZeros += (v!=0) ? 1 : -1;
		}
		else
		{
			//early abort
			if( denseBlock==null && v==0 )
				return;		
			
			//allocate and init dense block (w/o overwriting nnz)
			allocateDenseBlock(false);
			
			//set value and maintain nnz
			int index=r*clen+c;
			if( denseBlock[index]==0 )
				nonZeros++;
			denseBlock[index] = v;
			if( v==0 )
				nonZeros--;
		}
	}
	
	public double getValueDenseUnsafe(int r, int c) 
	{
		if(denseBlock==null)
			return 0;
		return denseBlock[r*clen+c]; 
	}
	

	/**
	 * This can be only called when you know you have properly allocated spaces for a dense representation
	 * and r and c are in the the range of the dimension
	 * Note: this function won't keep track of the nozeros
	 */	
	public void setValueDenseUnsafe(int r, int c, double v) 
	{
		denseBlock[r*clen+c]=v;		
	}
	
	public double getValueSparseUnsafe(int r, int c) 
	{
		if(sparseRows==null || sparseRows.length<=r || sparseRows[r]==null)
			return 0;
		return sparseRows[r].get(c);	
	}
	
	/**
	 * Append value is only used when values are appended at the end of each row for the sparse representation
	 * This can only be called, when the caller knows the access pattern of the block
	 * 	 
	 * @param r
	 * @param c
	 * @param v
	 */
	public void appendValue(int r, int c, double v)
	{
		//early abort (append guarantees no overwrite)
		if( v == 0 ) 
			return;

		if( !sparse ) //DENSE 
		{
			//allocate on demand (w/o overwriting nnz)
			allocateDenseBlock(false);
			
			//set value and maintain nnz
			denseBlock[r*clen+c] = v;
			nonZeros++;
		}
		else //SPARSE
		{
			//allocation on demand (w/o overwriting nnz)
			allocateSparseRowsBlock(false);
			if(sparseRows[r]==null)
				sparseRows[r]=new SparseRow(estimatedNNzsPerRow, clen);
			
			//set value and maintain nnz
			sparseRows[r].append(c, v);
			nonZeros++;
		}
	}
	
	public void appendRow(int r, SparseRow values)
	{
		if(values==null)
			return;
		if(sparse)
		{
			//allocation on demand
			allocateSparseRowsBlock(false);
			if(sparseRows[r]==null)
				sparseRows[r]=new SparseRow(values);
			else
				sparseRows[r].copy(values);
			nonZeros+=values.size();
			
		}
		else
		{
			int[] cols=values.getIndexContainer();
			double[] vals=values.getValueContainer();
			for(int i=0; i1 )
				arow.sort();
	}
	
	/**
	 * Utility function for computing the min non-zero value. 
	 * 
	 * @return
	 * @throws DMLRuntimeException
	 */
	public double minNonZero() 
		throws DMLRuntimeException
	{
		//check for empty block and return immediately
		if( isEmptyBlock() )
			return -1;
		
		//NOTE: usually this method is only applied on dense vectors and hence not really tuned yet.
		double min = Double.MAX_VALUE;
		for( int i=0; i (long)rlen*clen )
			throw new DMLRuntimeException("Number of non-zeros mismatch on merge disjoint (target="+rlen+"x"+clen+", nnz target="+nonZeros+", nnz source="+that.nonZeros+")");
		
		//check for empty target (copy in full)
		if( isEmptyBlock(false) ) {
			copy(that);
			return;
		}
		
		//core matrix block merge (guaranteed non-empty source/target, nnz maintenance not required)
		long nnz = nonZeros + that.nonZeros;
		if( sparse )
			mergeIntoSparse(that, appendOnly);
		else
			mergeIntoDense(that);	
		
		//maintain number of nonzeros
		nonZeros = nnz;
	}
	
	/**
	 * 
	 * @param that
	 */
	private void mergeIntoDense(MatrixBlock that)
	{
		if( that.sparse ) //DENSE <- SPARSE
		{
			double[] a = denseBlock;
			SparseRow[] b = that.sparseRows;
			int m = rlen;
			int n = clen;
			
			for( int i=0, aix=0; i=BlockType.values().length )
			throw new IOException("invalid format: '"+bformat+"' (need to be 0-"+BlockType.values().length+").");
			
		BlockType format=BlockType.values()[bformat];
		try 
		{
			switch(format)
			{
				case ULTRA_SPARSE_BLOCK:
					nonZeros = readNnzInfo( in, true );
					sparse = evalSparseFormatInMemory(rlen, clen, nonZeros);
					cleanupBlock(true, true); //clean all
					if( sparse )
						readUltraSparseBlock(in);
					else
						readUltraSparseToDense(in);
					break;
				case SPARSE_BLOCK:
					nonZeros = readNnzInfo( in, false );
					sparse = evalSparseFormatInMemory(rlen, clen, nonZeros);
					cleanupBlock(sparse, !sparse); 
					if( sparse )
						readSparseBlock(in);
					else
						readSparseToDense(in);
					break;
				case DENSE_BLOCK:
					sparse = false;
					cleanupBlock(false, true); //reuse dense
					readDenseBlock(in); //always dense in-mem if dense on disk
					break;
				case EMPTY_BLOCK:
					sparse = true;
					cleanupBlock(true, true); //clean all
					nonZeros = 0;
					break;
			}
		}
		catch(DMLRuntimeException ex)
		{
			throw new IOException("Error reading block of type '"+format.toString()+"'.", ex);
		}
	}

	/**
	 * 
	 * @param in
	 * @throws IOException
	 * @throws DMLRuntimeException 
	 */
	private void readDenseBlock(DataInput in) 
		throws IOException, DMLRuntimeException 
	{
		allocateDenseBlock(true); //allocate block, clear nnz
		
		int limit = rlen*clen;
		
		if( in instanceof MatrixBlockDataInput ) //fast deserialize
		{
			MatrixBlockDataInput mbin = (MatrixBlockDataInput)in;
			nonZeros = mbin.readDoubleArray(limit, denseBlock);
		}
		else if( in instanceof DataInputBuffer && MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION ) 
		{
			//workaround because sequencefile.reader.next(key, value) does not yet support serialization framework
			DataInputBuffer din = (DataInputBuffer)in;
			MatrixBlockDataInput mbin = new FastBufferedDataInputStream(din);
			nonZeros = mbin.readDoubleArray(limit, denseBlock);			
			((FastBufferedDataInputStream)mbin).close();
		}
		else //default deserialize
		{
			for( int i=0; i 1 ) //ULTRA-SPARSE BLOCK
		{ 
			//block: read ijv-triples
			for(long i=0; i 1 ) //ULTRA-SPARSE BLOCK
		{ 
			//block: read ijv-triples
			for(long i=0; i 1 ) //ULTRA-SPARSE BLOCK
		{
			//block: write ijv-triples
			for(int r=0;r 1 ) //ULTRA-SPARSE BLOCK
		{
			//block: write ijv-triples
			for(int r=0, ix=0; r Integer.MAX_VALUE  && !ultrasparse) {
			nonZeros = in.readLong();
		}
		else {
			nonZeros = in.readInt(); 
		}
		
		return nonZeros;
	}
	
	/**
	 * 
	 * @param out
	 * @throws IOException
	 */
	private void writeNnzInfo( DataOutput out, boolean ultrasparse ) 
		throws IOException
	{
		//note: if ultrasparse, int always sufficient because nnz Integer.MAX_VALUE && !ultrasparse) {
			out.writeLong( nonZeros ); 
		}
		else {
			out.writeInt( (int)nonZeros );
		}
	}
	
	/**
	 * Redirects the default java serialization via externalizable to our default 
	 * hadoop writable serialization for efficient broadcast/rdd deserialization. 
	 * 
	 * @param is
	 * @throws IOException
	 */
	public void readExternal(ObjectInput is) 
		throws IOException
	{
		if( is instanceof ObjectInputStream )
		{
			//fast deserialize of dense/sparse blocks
			ObjectInputStream ois = (ObjectInputStream)is;
			FastBufferedDataInputStream fis = new FastBufferedDataInputStream(ois);
			readFields(fis);
		}
		else {
			//default deserialize (general case)
			readFields(is);
		}
	}
	
	/**
	 * Redirects the default java serialization via externalizable to our default 
	 * hadoop writable serialization for efficient broadcast/rdd serialization. 
	 * 
	 * @param is
	 * @throws IOException
	 */
	public void writeExternal(ObjectOutput os) 
		throws IOException
	{
		if( os instanceof ObjectOutputStream ) {
			//fast serialize of dense/sparse blocks
			ObjectOutputStream oos = (ObjectOutputStream)os;
			FastBufferedDataOutputStream fos = new FastBufferedDataOutputStream(oos);
			write(fos);
			fos.flush();
		}
		else {
			//default serialize (general case)
			write(os);	
		}
	}
	
	/**
	 * NOTE: The used estimates must be kept consistent with the respective write functions. 
	 * 
	 * @return
	 */
	public long getExactSizeOnDisk()
	{
		//determine format
		boolean sparseSrc = sparse;
		boolean sparseDst = evalSparseFormatOnDisk();
		
		long lrlen = (long) rlen;
		long lclen = (long) clen;
		long lnonZeros = (long) nonZeros;
		
		//ensure exact size estimates for write
		if( lnonZeros <= 0 )
		{
			recomputeNonZeros();
			lnonZeros = (long) nonZeros;
		}
				
		//get exact size estimate (see write for the corresponding meaning)
		if( sparseSrc )
		{
			//write sparse to *
			if(sparseRows==null || lnonZeros==0)
				return HEADER_SIZE; //empty block
			else if( lnonZeros Integer.MAX_VALUE) ? 8 : 4;
		//data: (int num per row, int-double pair per non-zero value)
		size += nrows * 4 + nnz * 12;	

		return size;
	}
	
	/**
	 * 
	 * @param nrows
	 * @param ncols
	 * @param nnz
	 * @return
	 */
	private static long estimateSizeUltraSparseOnDisk( long nrows, long ncols, long nnz )
	{
		//basic header (int rlen, int clen, byte type) 
		long size = HEADER_SIZE;
		//extended header (int nnz, guaranteed by rlen 1 ) //block: ijv-triples 
			size += nnz * 16; 	
		else //column: iv-pairs
			size += nnz * 12; 
		
		return size;
	}
	
	/**
	 * 
	 * @param m1
	 * @param m2
	 * @param op
	 * @return
	 */
	public static SparsityEstimate estimateSparsityOnAggBinary(MatrixBlock m1, MatrixBlock m2, AggregateBinaryOperator op)
	{
		//Since MatrixMultLib always uses a dense output (except for ultra-sparse mm)
		//with subsequent check for sparsity, we should always return a dense estimate.
		//Once, we support more aggregate binary operations, we need to change this.
		
		//WARNING: KEEP CONSISTENT WITH LIBMATRIXMULT
		//Note that it is crucial to report the right output representation because
		//in case of block reuse (e.g., mmcj) the output 'reset' refers to either
		//dense or sparse representation and hence would produce incorrect results
		//if we report the wrong representation (i.e., missing reset on ultrasparse mm). 
		
		boolean ultrasparse = (m1.isUltraSparse() || m2.isUltraSparse());
		return new SparsityEstimate(ultrasparse, m1.getNumRows()*m2.getNumRows());
	}
	
	/**
	 * 
	 * @param m1
	 * @param m2
	 * @param op
	 * @return
	 */
	private static SparsityEstimate estimateSparsityOnBinary(MatrixBlock m1, MatrixBlock m2, BinaryOperator op)
	{
		SparsityEstimate est = new SparsityEstimate();
		
		//estimate dense output for all sparse-unsafe operations, except DIV (because it commonly behaves like
		//sparse-safe but is not due to 0/0->NaN, this is consistent with the current hop sparsity estimate)
		//see also, special sparse-safe case for DIV in LibMatrixBincell 
		if( !op.sparseSafe && !(op.fn instanceof Divide) ) {
			est.sparse = false;
			return est;
		}
		
		BinaryAccessType atype = LibMatrixBincell.getBinaryAccessType(m1, m2);
		boolean outer = (atype == BinaryAccessType.OUTER_VECTOR_VECTOR);
		long m = m1.getNumRows();
		long n = outer ? m2.getNumColumns() : m1.getNumColumns();
		long nz1 = m1.getNonZeros();
		long nz2 = m2.getNonZeros();
		
		//account for matrix vector and vector/vector
		long estnnz = 0;
		if( atype == BinaryAccessType.OUTER_VECTOR_VECTOR )
		{
			//for outer vector operations the sparsity estimate is exactly known
			estnnz = nz1 * nz2; 
		}
		else //DEFAULT CASE
		{		
			if( atype == BinaryAccessType.MATRIX_COL_VECTOR )
				nz2 = nz2 * n;
			else if( atype == BinaryAccessType.MATRIX_ROW_VECTOR )
				nz2 = nz2 * m;
		
			//compute output sparsity consistent w/ the hop compiler
			OpOp2 bop = op.getBinaryOperatorOpOp2();
			double sp1 = OptimizerUtils.getSparsity(m, n, nz1);
			double sp2 = OptimizerUtils.getSparsity(m, n, nz2);
			double spout = OptimizerUtils.getBinaryOpSparsity(sp1, sp2, bop, true);
			estnnz = UtilFunctions.toLong(spout * m * n);
		}
		
		est.sparse = evalSparseFormatInMemory(m, n, estnnz);
		est.estimatedNonZeros = estnnz;
		
		return est;
	}
	
	private boolean estimateSparsityOnSlice(int selectRlen, int selectClen, int finalRlen, int finalClen)
	{
		long ennz = (long)((double)nonZeros/rlen/clen*selectRlen*selectClen);
		return evalSparseFormatInMemory(finalRlen, finalClen, ennz); 
	}
	
	private boolean estimateSparsityOnLeftIndexing(long rlenm1, long clenm1, long nnzm1, long rlenm2, long clenm2, long nnzm2)
	{
		//min bound: nnzm1 - rlenm2*clenm2 + nnzm2
		//max bound: min(rlenm1*rlenm2, nnzm1+nnzm2)
		
		long ennz = Math.min(rlenm1*clenm1, nnzm1+nnzm2);
		return evalSparseFormatInMemory(rlenm1, clenm1, ennz);
	}
	
	private boolean estimateSparsityOnGroupedAgg( long rlen, long groups )
	{
		long ennz = Math.min(groups, rlen);
		return evalSparseFormatInMemory(groups, 1, ennz);
	}
	
	////////
	// Core block operations (called from instructions)
	
	/**
	 * 
	 */
	@Override
	public MatrixValue scalarOperations(ScalarOperator op, MatrixValue result) 
		throws DMLUnsupportedOperationException, DMLRuntimeException
	{
		MatrixBlock ret = checkType(result);
		
		// estimate the sparsity structure of result matrix
		boolean sp = this.sparse; // by default, we guess result.sparsity=input.sparsity
		if (!op.sparseSafe)
			sp = false; // if the operation is not sparse safe, then result will be in dense format
		
		//allocate the output matrix block
		if( ret==null )
			ret = new MatrixBlock(rlen, clen, sp, this.nonZeros);
		else
			ret.reset(rlen, clen, sp, this.nonZeros);
		
		//core scalar operations
		LibMatrixBincell.bincellOp(this, ret, op);
		
		return ret;
	}
	
	/**
	 * 
	 */
	@Override
	public MatrixValue unaryOperations(UnaryOperator op, MatrixValue result) 
		throws DMLUnsupportedOperationException, DMLRuntimeException
	{
		MatrixBlock ret = checkType(result);
		
		// estimate the sparsity structure of result matrix
		boolean sp = this.sparse; // by default, we guess result.sparsity=input.sparsity
		if (!op.sparseSafe)
			sp = false; // if the operation is not sparse safe, then result will be in dense format
		
		//allocate output
		if( ret == null )
			ret = new MatrixBlock(rlen, clen, sp, this.nonZeros);
		else 
			ret.reset(rlen, clen, sp);
		
		//core execute
		if( LibMatrixAgg.isSupportedUnaryOperator(op) ) 
		{
			//e.g., cumsum/cumprod/cummin/cumax
			LibMatrixAgg.aggregateUnaryMatrix(this, ret, op);
		}
		else
		{
			//default execute unary operations
			if(op.sparseSafe)
				sparseUnaryOperations(op, ret);
			else
				denseUnaryOperations(op, ret);
		}
		
		//ensure empty results sparse representation 
		//(no additional memory requirements)
		if( ret.isEmptyBlock(false) )
			ret.examSparsity();
		
		return ret;
	}

	/**
	 * 
	 * @param op
	 * @throws DMLUnsupportedOperationException
	 * @throws DMLRuntimeException
	 */
	private void sparseUnaryOperations(UnaryOperator op, MatrixBlock ret) 
		throws DMLUnsupportedOperationException, DMLRuntimeException
	{
		//early abort possible since sparse-safe
		if( isEmptyBlock(false) )
			return;
		
		final int m = rlen;
		final int n = clen;
		
		if( sparse ) //SPARSE <- SPARSE
		{
			SparseRow[] a = sparseRows;
			
			for(int i=0; i both values the same, break ties
							// in favor of higher index.
							long curMaxIndex = (long) quickGetValue(r,0);
							quickSetValue(r, 0, Math.max(curMaxIndex, newMaxIndex));
						} else if(1.0 == update){
							// Return value of 1 ==> new value is better; use its index
							quickSetValue(r, 0, newMaxIndex);
							cor.quickSetValue(r, 0, newMaxValue);
						} else {
							// Other return value ==> current answer is best
						}
					}
					// *** END HACK ***
				}else{
					for(int r=0; r both values the same, break ties
						// in favor of higher index.
						long curMaxIndex = (long) quickGetValue(r,0);
						quickSetValue(r, 0, Math.max(curMaxIndex, newMaxIndex));
					} else if(1.0 == update){
						// Return value of 1 ==> new value is better; use its index
						quickSetValue(r, 0, newMaxIndex);
						quickSetValue(r, 1, newMaxValue);
					} else {
						// Other return value ==> current answer is best
					}
				}
				// *** END HACK ***
			}
			else
			{
				if(aggOp.increOp.fn instanceof KahanPlus)
				{
					LibMatrixAgg.aggregateBinaryMatrix(newWithCor, this, aggOp);
				}
				else
				{
					for(int r=0; r 1 )
			LibMatrixMult.matrixMultTransposeSelf(this, out, leftTranspose, k);
		else
			LibMatrixMult.matrixMultTransposeSelf(this, out, leftTranspose);
		
		return out;
	}
	
	/**
	 * 
	 * @param v
	 * @param w
	 * @param out
	 * @param ctype
	 * @return
	 * @throws DMLRuntimeException
	 * @throws DMLUnsupportedOperationException
	 */
	public MatrixBlock chainMatrixMultOperations( MatrixBlock v, MatrixBlock w, MatrixBlock out, ChainType ctype ) 
		throws DMLRuntimeException, DMLUnsupportedOperationException 	
	{
		return chainMatrixMultOperations(v, w, out, ctype, 1);
	}
	
	/**
	 * 
	 * @param v
	 * @param w
	 * @param out
	 * @param ctype
	 * @return
	 * @throws DMLRuntimeException
	 * @throws DMLUnsupportedOperationException
	 */
	public MatrixBlock chainMatrixMultOperations( MatrixBlock v, MatrixBlock w, MatrixBlock out, ChainType ctype, int k ) 	
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{
		//check for transpose type
		if( !(ctype == ChainType.XtXv || ctype == ChainType.XtwXv || ctype == ChainType.XtXvy) )
			throw new DMLRuntimeException("Invalid mmchain type '"+ctype.toString()+"'.");
		
		//check for matching dimensions
		if( this.getNumColumns() != v.getNumRows() )
			throw new DMLRuntimeException("Dimensions mismatch on mmchain operation ("+this.getNumColumns()+" != "+v.getNumRows()+")");
		if( v!=null && v.getNumColumns() != 1 )
			throw new DMLRuntimeException("Invalid input vector (column vector expected, but ncol="+v.getNumColumns()+")");
		if( w!=null && w.getNumColumns() != 1 )
			throw new DMLRuntimeException("Invalid weight vector (column vector expected, but ncol="+w.getNumColumns()+")");
		
		//prepare result
		if( out != null )
			out.reset(clen, 1, false);
		else 
			out = new MatrixBlock(clen, 1, false);
		
		//compute matrix mult
		if( k > 1 )
			LibMatrixMult.matrixMultChain(this, v, w, out, ctype, k);
		else
			LibMatrixMult.matrixMultChain(this, v, w, out, ctype);
		
		return out;
	}
	
	/**
	 * 
	 * @param m1Val
	 * @param m2Val
	 * @param out1Val
	 * @param out2Val
	 * @throws DMLRuntimeException
	 * @throws DMLUnsupportedOperationException
	 */
	public void permutationMatrixMultOperations( MatrixValue m2Val, MatrixValue out1Val, MatrixValue out2Val ) 	
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{
		permutationMatrixMultOperations(m2Val, out1Val, out2Val, 1);
	}
	
	/**
	 * 
	 * @param m1Val
	 * @param m2Val
	 * @param out1Val
	 * @param out2Val
	 * @throws DMLRuntimeException
	 * @throws DMLUnsupportedOperationException
	 */
	public void permutationMatrixMultOperations( MatrixValue m2Val, MatrixValue out1Val, MatrixValue out2Val, int k ) 	
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{
		//check input types and dimensions
		MatrixBlock m2 = checkType(m2Val);
		MatrixBlock ret1 = checkType(out1Val);
		MatrixBlock ret2 = checkType(out2Val);
		
		if(this.rlen!=m2.rlen)
			throw new RuntimeException("Dimensions do not match for permutation matrix multiplication ("+this.rlen+"!="+m2.rlen+").");

		//compute permutation matrix multiplication
		if (k > 1)
			LibMatrixMult.matrixMultPermute(this, m2, ret1, ret2, k);
		else
			LibMatrixMult.matrixMultPermute(this, m2, ret1, ret2);

	}
	
	/**
	 * Method to perform leftIndexing operation for a given lower and upper bounds in row and column dimensions.
	 * Updated matrix is returned as the output.
	 * 
	 * Operations to be performed: 
	 *   1) result=this; 
	 *   2) result[rowLower:rowUpper, colLower:colUpper] = rhsMatrix;
	 *  
	 * @throws DMLRuntimeException 
	 * @throws DMLUnsupportedOperationException 
	 */
	public MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, int rl, int ru, 
			int cl, int cu, MatrixBlock ret, boolean inplace) 
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{	
		// Check the validity of bounds
		if ( rl < 0 || rl >= getNumRows() || ru < rl || ru >= getNumRows()
				|| cl < 0 || cu >= getNumColumns() || cu < cl || cu >= getNumColumns() ) {
			throw new DMLRuntimeException("Invalid values for matrix indexing: ["+(rl+1)+":"+(ru+1)+"," + (cl+1)+":"+(cu+1)+"] " +
							"must be within matrix dimensions ["+getNumRows()+","+getNumColumns()+"].");
		}
		
		if ( (ru-rl+1) < rhsMatrix.getNumRows() || (cu-cl+1) < rhsMatrix.getNumColumns()) {
			throw new DMLRuntimeException("Invalid values for matrix indexing: " +
					"dimensions of the source matrix ["+rhsMatrix.getNumRows()+"x" + rhsMatrix.getNumColumns() + "] " +
					"do not match the shape of the matrix specified by indices [" +
					(rl+1) +":" + (ru+1) + ", " + (cl+1) + ":" + (cu+1) + "].");
		}
		
		MatrixBlock result = ret;
		
		boolean sp = estimateSparsityOnLeftIndexing(rlen, clen, nonZeros, 
				     rhsMatrix.getNumRows(), rhsMatrix.getNumColumns(), rhsMatrix.getNonZeros());
		
		if( !inplace ) //general case
		{
			if(result==null)
				result=new MatrixBlock(rlen, clen, sp);
			else
				result.reset(rlen, clen, sp);
			result.copy(this, sp);
		}
		else //update in-place
		{
			//use current block as in-place result
			result = this;
			
			//ensure that the current block adheres to the sparsity estimate
			//and thus implicitly the memory budget used by the compiler
			if( result.sparse && !sp )
				result.sparseToDense();
			else if( !result.sparse && sp )
				result.denseToSparse();	
		}
		
		//NOTE conceptually we could directly use a zeroout and copy(..., false) but
		//     since this was factors slower, we still use a full copy and subsequently
		//     copy(..., true) - however, this can be changed in the future once we 
		//     improved the performance of zeroout.
		//result = (MatrixBlockDSM) zeroOutOperations(result, new IndexRange(rowLower,rowUpper, colLower, colUpper ), false);
		
		MatrixBlock src = (MatrixBlock)rhsMatrix;
		

		if(rl==ru && cl==cu) //specific case: cell update			
		{
			//copy single value and update nnz
			result.quickSetValue(rl, cl, src.quickGetValue(0, 0));
		}		
		else //general case
		{
			//copy submatrix into result
			result.copy(rl, ru, cl, cu, src, true);
		}

		return result;
	}
	
	/**
	 * Explicitly allow left indexing for scalars. Note: This operation is now 0-based.
	 * 
	 * * Operations to be performed: 
	 *   1) result=this; 
	 *   2) result[row,column] = scalar.getDoubleValue();
	 * 
	 * @param scalar
	 * @param row
	 * @param col
	 * @param ret
	 * @return
	 * @throws DMLRuntimeException
	 * @throws DMLUnsupportedOperationException
	 */
	public MatrixBlock leftIndexingOperations(ScalarObject scalar, int rl, int cl, MatrixBlock ret, boolean inplace) 
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{
		double inVal = scalar.getDoubleValue();
		boolean sp = estimateSparsityOnLeftIndexing(rlen, clen, nonZeros, 1, 1, (inVal!=0)?1:0);
		
		if( !inplace ) //general case
		{
			if(ret==null)
				ret=new MatrixBlock(rlen, clen, sp);
			else
				ret.reset(rlen, clen, sp);
			ret.copy(this, sp);
			
		}
		else //update in-place
			ret = this;
		
		ret.quickSetValue(rl, cl, inVal);
		return ret;
	}
	
	/**
	 * Method to perform rangeReIndex operation for a given lower and upper bounds in row and column dimensions.
	 * Extracted submatrix is returned as "result". Note: This operation is now 0-based.
	 * 
	 * @throws DMLRuntimeException 
	 * @throws DMLUnsupportedOperationException 
	 */
	public MatrixBlock sliceOperations(int rl, int ru, int cl, int cu, MatrixBlock ret) 
		throws DMLRuntimeException 
	{	
		// check the validity of bounds
		if ( rl < 0 || rl >= getNumRows() || ru < rl || ru >= getNumRows()
				|| cl < 0 || cu >= getNumColumns() || cu < cl || cu >= getNumColumns() ) {
			throw new DMLRuntimeException("Invalid values for matrix indexing: ["+(rl+1)+":"+(ru+1)+"," + (cl+1)+":"+(cu+1)+"] " +
							"must be within matrix dimensions ["+getNumRows()+","+getNumColumns()+"]");
		}
		
		// Output matrix will have the same sparsity as that of the input matrix.
		// (assuming a uniform distribution of non-zeros in the input)
		MatrixBlock result=checkType(ret);
		long estnnz= (long) ((double)this.nonZeros/rlen/clen*(ru-rl+1)*(cu-cl+1));
		boolean result_sparsity = this.sparse && MatrixBlock.evalSparseFormatInMemory(ru-rl+1, cu-cl+1, estnnz);
		if(result==null)
			result=new MatrixBlock(ru-rl+1, cu-cl+1, result_sparsity, estnnz);
		else
			result.reset(ru-rl+1, cu-cl+1, result_sparsity, estnnz);
		
		// actual slice operation
		if( rl==0 && ru==rlen-1 && cl==0 && cu==clen-1 ) {
			// copy if entire matrix required
			result.copy( this );
		}
		else //general case
		{
			//core slicing operation (nnz maintained internally)
			if (sparse) 
				sliceSparse(rl, ru, cl, cu, result);
			else 
				sliceDense(rl, ru, cl, cu, result);
		}
		
		return result;
	}

	
	/**
	 * 
	 * @param rl
	 * @param ru
	 * @param cl
	 * @param cu
	 * @param dest
	 * @throws DMLRuntimeException 
	 */
	private void sliceSparse(int rl, int ru, int cl, int cu, MatrixBlock dest) 
		throws DMLRuntimeException
	{
		//check for early abort
		if( isEmptyBlock(false) ) 
			return;
		
		if( cl==cu ) //COLUMN VECTOR 
		{
			//note: always dense dest
			dest.allocateDenseBlock();
			for( int i=rl; i<=ru; i++ ) {
				SparseRow arow = sparseRows[i];
				if( arow != null && !arow.isEmpty() ) {
					double val = arow.get(cl);
					if( val != 0 ) {
						dest.denseBlock[i-rl] = val;
						dest.nonZeros++;
					}
				}
			}
		}
		else if( rl==ru && cl==0 && cu==clen-1 ) //ROW VECTOR 
		{
			//note: always sparse dest, but also works for dense
			dest.appendRow(0, sparseRows[rl]);
		}
		else //general case (sparse/dense dest)
		{
			for(int i=rl; i <= ru; i++) 
				if(sparseRows[i] != null && !sparseRows[i].isEmpty()) 
				{
					SparseRow arow = sparseRows[i];
					int alen = arow.size();
					int[] aix = arow.getIndexContainer();
					double[] avals = arow.getValueContainer();
					int astart = (cl>0)?arow.searchIndexesFirstGTE(cl):0;
					if( astart != -1 )
						for( int j=astart; j vector
			{
				System.arraycopy(denseBlock, rl, dest.denseBlock, 0, ru-rl+1);
			}
			else //matrix -> vector
			{
				//IBM JVM bug (JDK7) causes crash for certain cl/cu values (e.g., divide by zero for 4) 
				//for( int i=rl*clen+cl, ix=0; i<=ru*clen+cu; i+=clen, ix++ )
				//	dest.denseBlock[ix] = denseBlock[i];
				int len = clen;
				for( int i=rl*len+cl, ix=0; i<=ru*len+cu; i+=len, ix++ )
					dest.denseBlock[ix] = denseBlock[i];
			}
		}
		else // GENERAL RANGE INDEXING
		{
			//IBM JVM bug (JDK7) causes crash for certain cl/cu values (e.g., divide by zero for 4) 
			//for(int i = rl, ix1 = rl*clen+cl, ix2=0; i <= ru; i++, ix1+=clen, ix2+=dest.clen) 
			//	System.arraycopy(denseBlock, ix1, dest.denseBlock, ix2, dest.clen);
			int len1 = clen;
			int len2 = dest.clen;
			for(int i = rl, ix1 = rl*len1+cl, ix2=0; i <= ru; i++, ix1+=len1, ix2+=len2) 
				System.arraycopy(denseBlock, ix1, dest.denseBlock, ix2, len2);
		}
		
		//compute nnz of output (not maintained due to native calls)
		dest.recomputeNonZeros();
	}
	
	public void sliceOperations(ArrayList outlist, IndexRange range, int rowCut, int colCut, 
			int normalBlockRowFactor, int normalBlockColFactor, int boundaryRlen, int boundaryClen)
	{
		MatrixBlock topleft=null, topright=null, bottomleft=null, bottomright=null;
		Iterator p=outlist.iterator();
		int blockRowFactor=normalBlockRowFactor, blockColFactor=normalBlockColFactor;
		if(rowCut>range.rowEnd)
			blockRowFactor=boundaryRlen;
		if(colCut>range.colEnd)
			blockColFactor=boundaryClen;
		
		int minrowcut=(int)Math.min(rowCut,range.rowEnd);
		int mincolcut=(int)Math.min(colCut, range.colEnd);
		int maxrowcut=(int)Math.max(rowCut, range.rowStart);
		int maxcolcut=(int)Math.max(colCut, range.colStart);
		
		if(range.rowStart=colCut)
		{
			topright=(MatrixBlock) p.next().getValue();
			topright.reset(blockRowFactor, boundaryClen, 
					estimateSparsityOnSlice(minrowcut-(int)range.rowStart, (int)range.colEnd-maxcolcut+1, blockRowFactor, boundaryClen));
		}
		if(range.rowEnd>=rowCut && range.colStart=rowCut && range.colEnd>=colCut)
		{
			bottomright=(MatrixBlock) p.next().getValue();
			bottomright.reset(boundaryRlen, boundaryClen, 
					estimateSparsityOnSlice((int)range.rowEnd-maxrowcut+1, (int)range.colEnd-maxcolcut+1, boundaryRlen, boundaryClen));
		}
		
		if(sparse)
		{
			if(sparseRows!=null)
			{
				int r=(int)range.rowStart;
				for(; rend) 
			return;
		
		//actual slice operation
		for(int i=start; i<=end; i++) {
			if(cols[i] outlist, int blockRowFactor,
			int blockColFactor, boolean cbind, boolean m2IsLast, int nextNCol)
			throws DMLUnsupportedOperationException, DMLRuntimeException 
	{	
		MatrixBlock m2 = (MatrixBlock)v2;
		
		//case 1: copy lhs and rhs to output
		if( cbind && clen==blockColFactor 
			|| !cbind && rlen==blockRowFactor )
		{
			((MatrixBlock) outlist.get(0).getValue()).copy(this);
			((MatrixBlock) outlist.get(1).getValue()).copy(m2);
		}
		//case 2: append part of rhs to lhs, append to 2nd output if necessary
		else
		{
			//single output block (via plain append operation)
			if( cbind && clen + m2.clen < blockColFactor
				|| !cbind && rlen + m2.rlen < blockRowFactor )
			{
				appendOperations(m2, (MatrixBlock) outlist.get(0).getValue(), cbind);
			}
			//two output blocks (via slice and append)
			else
			{
				//prepare output block 1
				MatrixBlock ret1 = (MatrixBlock) outlist.get(0).getValue();
				int lrlen1 = cbind ? rlen-1 : blockRowFactor-rlen-1;
				int lclen1 = cbind ? blockColFactor-clen-1 : clen-1;
				MatrixBlock tmp1 = m2.sliceOperations(0, lrlen1, 0, lclen1, new MatrixBlock());
				appendOperations(tmp1, ret1, cbind);
	
				//prepare output block 2
				MatrixBlock ret2 = (MatrixBlock) outlist.get(1).getValue();
				if( cbind )
					m2.sliceOperations(0, rlen-1, lclen1+1, m2.clen-1, ret2);
				else
					m2.sliceOperations(lrlen1+1, m2.rlen-1, 0, clen-1, ret2);
			}
		}
	}
	
	/**
	 * 
	 */
	public MatrixValue zeroOutOperations(MatrixValue result, IndexRange range, boolean complementary)
			throws DMLUnsupportedOperationException, DMLRuntimeException 
	{
		checkType(result);
		double currentSparsity=(double)nonZeros/(double)rlen/(double)clen;
		double estimatedSps=currentSparsity*(double)(range.rowEnd-range.rowStart+1)
		                    *(double)(range.colEnd-range.colStart+1)/(double)rlen/(double)clen;
		if(!complementary)
			estimatedSps=currentSparsity-estimatedSps;
		
		boolean lsparse = evalSparseFormatInMemory(rlen, clen, (long)(estimatedSps*rlen*clen));
		
		if(result==null)
			result=new MatrixBlock(rlen, clen, lsparse, (int)(estimatedSps*rlen*clen));
		else
			result.reset(rlen, clen, lsparse, (int)(estimatedSps*rlen*clen));
		
		
		if(sparse)
		{
			if(sparseRows!=null)
			{
				if(!complementary)//if zero out
				{
					for(int r=0; rend) 
							continue;
						
						for(int i=start; i 1 )
				LibMatrixAgg.aggregateUnaryMatrix(this, ret, op, op.getNumThreads());
			else
				LibMatrixAgg.aggregateUnaryMatrix(this, ret, op);
			LibMatrixAgg.recomputeIndexes(ret, op, blockingFactorRow, blockingFactorCol, indexesIn);
		}
		else if(op.sparseSafe)
			sparseAggregateUnaryHelp(op, ret, blockingFactorRow, blockingFactorCol, indexesIn);
		else
			denseAggregateUnaryHelp(op, ret, blockingFactorRow, blockingFactorCol, indexesIn);
		
		if(op.aggOp.correctionExists && inCP)
			((MatrixBlock)result).dropLastRowsOrColums(op.aggOp.correctionLocation);
		
		return ret;
	}
	
	private void sparseAggregateUnaryHelp(AggregateUnaryOperator op, MatrixBlock result,
			int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn) throws DMLRuntimeException
	{
		//initialize result
		if(op.aggOp.initialValue!=0)
			result.resetDenseWithValue(result.rlen, result.clen, op.aggOp.initialValue);
		
		CellIndex tempCellIndex = new CellIndex(-1,-1);
		KahanObject buffer=new KahanObject(0,0);
		int r = 0, c = 0;
		
		if(sparse)
		{
			if(sparseRows!=null)
			{
				for(r=0; r=0)
							{
								this.nonZeros-=sparseRows[r].size()-newSize;
								sparseRows[r].truncate(newSize);
							}
						}
				}
			}
			else //DENSE
			{
				if(this.denseBlock!=null)
				{
					//the first row doesn't need to be copied
					int targetIndex=clen-step;
					int sourceOffset=clen;
					this.nonZeros=0;
					for(int i=0; i 0 ) 
			index = 0;
		else
			index = 1;
		
		// keep scanning the weights, until we hit the required position fromPos
		while ( count < fromPos ) {
			count += quickGetValue(index,1);
			++index;
		}
		
		double runningSum; 
		double val;
		int wt, selectedCount;
		
		runningSum = (count-fromPos) * quickGetValue(index-1, 0);
		selectedCount = (count-fromPos);
		
		while(count <= toPos ) {
			val = quickGetValue(index,0);
			wt = (int) quickGetValue(index,1);
			
			runningSum += (val * Math.min(wt, selectRange-selectedCount));
			selectedCount += Math.min(wt, selectRange-selectedCount);
			count += wt;
			++index;
		}
		
		//System.out.println(fromPos + ", " + toPos + ": " + count + ", "+ runningSum + ", " + selectedCount);
		
		return runningSum/selectedCount;
	}
	
	public MatrixValue pickValues(MatrixValue quantiles, MatrixValue ret) 
		throws DMLUnsupportedOperationException, DMLRuntimeException {
	
		MatrixBlock qs=checkType(quantiles);
		
		if ( qs.clen != 1 ) {
			throw new DMLRuntimeException("Multiple quantiles can only be computed on a 1D matrix");
		}
		
		MatrixBlock output = checkType(ret);

		if(output==null)
			output=new MatrixBlock(qs.rlen, qs.clen, false); // resulting matrix is mostly likely be dense
		else
			output.reset(qs.rlen, qs.clen, false);
		
		for ( int i=0; i < qs.rlen; i++ ) {
			output.quickSetValue(i, 0, this.pickValue(qs.quickGetValue(i,0)) );
		}
		
		return output;
	}
	
	public double median() throws DMLRuntimeException {
		double sum_wt = sumWeightForQuantile();
		return pickValue(0.5, sum_wt%2==0);
	}
	
	public double pickValue(double quantile) throws DMLRuntimeException{
		return pickValue(quantile, false);
	}
	
	public double pickValue(double quantile, boolean average) 
		throws DMLRuntimeException 
	{
		double sum_wt = sumWeightForQuantile();
		
		// do averaging only if it is asked for; and sum_wt is even
		average = average && (sum_wt%2 == 0);
		
		int pos = (int) Math.ceil(quantile*sum_wt);
		
		int t = 0, i=-1;
		do {
			i++;
			t += quickGetValue(i,1);
		} while(t 1 )
			LibMatrixMult.matrixMult(m1, m2, ret, op.getNumThreads());
		else
			LibMatrixMult.matrixMult(m1, m2, ret);
		
		return ret;
	}
	
	/**
	 * 
	 * @param m1
	 * @param m2
	 * @param m3
	 * @param op
	 * @return
	 * @throws DMLUnsupportedOperationException
	 * @throws DMLRuntimeException
	 */
	public ScalarObject aggregateTernaryOperations(MatrixBlock m1, MatrixBlock m2, MatrixBlock m3, AggregateBinaryOperator op) 
		throws DMLUnsupportedOperationException, DMLRuntimeException
	{
		//check input dimensions and operators
		if( m1.rlen!=m2.rlen || m1.clen!=m2.clen || (m3!=null && (m2.rlen!=m3.rlen || m2.clen!=m3.clen)) )
			throw new DMLRuntimeException("Invalid dimensions for aggregate tertiary ("+m1.rlen+"x"+m1.clen+", "+m2.rlen+"x"+m2.clen+", "+m3.rlen+"x"+m3.clen+").");
		if( !( op.aggOp.increOp.fn instanceof KahanPlus && op.binaryFn instanceof Multiply) )
			throw new DMLRuntimeException("Unsupported operator for aggregate tertiary operations.");
		
		//execute ternary aggregate function
		double val = -1;
		if( op.getNumThreads() > 1 )
			val = LibMatrixAgg.aggregateTernary(m1, m2, m3, op.getNumThreads());
		else
			val = LibMatrixAgg.aggregateTernary(m1, m2, m3);
		
		//create output
		return new DoubleObject(val);
	}
	
	
	/**
	 * 
	 * @param mbLeft
	 * @param mbRight
	 * @param mbOut
	 * @param bOp
	 * @oaram uaggOp
	 * @return
	 * @throws DMLUnsupportedOperationException
	 * @throws DMLRuntimeException
	 */
	public MatrixBlock  uaggouterchainOperations(MatrixBlock mbLeft, MatrixBlock mbRight, MatrixBlock mbOut, BinaryOperator bOp, AggregateUnaryOperator uaggOp) 
		throws DMLRuntimeException, DMLUnsupportedOperationException
	{
		double bv[] = DataConverter.convertToDoubleVector(mbRight);
		int bvi[] = null;
		
		//process instruction
		if (LibMatrixOuterAgg.isSupportedUaggOp(uaggOp, bOp))
		{
			if((LibMatrixOuterAgg.isRowIndexMax(uaggOp)) || (LibMatrixOuterAgg.isRowIndexMin(uaggOp))) 
			{
				bvi = LibMatrixOuterAgg.prepareRowIndices(bv.length, bv, bOp, uaggOp);
			} else {
				Arrays.sort(bv);
			}

			int iRows = (uaggOp.indexFn instanceof ReduceCol ? mbLeft.getNumRows(): 2); 
			int iCols = (uaggOp.indexFn instanceof ReduceRow ? mbLeft.getNumColumns(): 2); 
			if(mbOut==null)
				mbOut=new MatrixBlock(iRows, iCols, false);  // Output matrix will be dense matrix most of the time.
			else
				mbOut.reset(iRows, iCols, false);

			LibMatrixOuterAgg.aggregateMatrix(mbLeft, mbOut, bv, bvi, bOp, uaggOp);
		} else
			throw new DMLRuntimeException("Unsupported operator for unary aggregate operations.");
		
		return mbOut;
	}
	
		
	/**
	 * Invocation from CP instructions. The aggregate is computed on the groups object
	 * against target and weights. 
	 * 
	 * Notes:
	 * * The computed number of groups is reused for multiple invocations with different target.
	 * * This implementation supports that the target is passed as column or row vector,
	 *   in case of row vectors we also use sparse-safe implementations for sparse safe
	 *   aggregation operators.
	 * 
	 * @param tgt
	 * @param wghts
	 * @param ret
	 * @param op
	 * @return
	 * @throws DMLRuntimeException
	 * @throws DMLUnsupportedOperationException
	 */
	public MatrixBlock groupedAggOperations(MatrixValue tgt, MatrixValue wghts, MatrixValue ret, int ngroups, Operator op) 
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{
		//single-threaded grouped aggregate 
		return groupedAggOperations(tgt, wghts, ret, ngroups, op, 1);
	}
	
	/**
	 * 
	 * @param tgt
	 * @param wghts
	 * @param ret
	 * @param ngroups
	 * @param op
	 * @param k
	 * @return
	 * @throws DMLRuntimeException
	 * @throws DMLUnsupportedOperationException
	 */
	public MatrixBlock groupedAggOperations(MatrixValue tgt, MatrixValue wghts, MatrixValue ret, int ngroups, Operator op, int k) 
		throws DMLRuntimeException, DMLUnsupportedOperationException 		
	{
		//setup input matrices
		MatrixBlock target = checkType(tgt);
		MatrixBlock weights = checkType(wghts);
		
		//check valid dimensions
		boolean validMatrixOp = (weights == null && ngroups>=1);
		if( this.getNumColumns() != 1 || (weights!=null && weights.getNumColumns()!=1) )
			throw new DMLRuntimeException("groupedAggregate can only operate on 1-dimensional column matrices for groups and weights.");
		if( target.getNumColumns() != 1 && op instanceof CMOperator && !validMatrixOp )
			throw new DMLRuntimeException("groupedAggregate can only operate on 1-dimensional column matrices for target (for this aggregation function).");
		if( target.getNumColumns() != 1 && target.getNumRows()!=1 && !validMatrixOp )
			throw new DMLRuntimeException("groupedAggregate can only operate on 1-dimensional column or row matrix for target.");
		if( this.getNumRows() != target.getNumRows() && this.getNumRows() !=Math.max(target.getNumRows(),target.getNumColumns()) || (weights != null && this.getNumRows() != weights.getNumRows()) ) 
			throw new DMLRuntimeException("groupedAggregate can only operate on matrices with equal dimensions.");
		
		// obtain numGroups from instruction, if provided
		if (ngroups > 0)
			numGroups = ngroups;
		
		// Determine the number of groups
		if( numGroups <= 0 ) //reuse if available
		{
			double min = this.min();
			double max = this.max();
			
			if ( min <= 0 )
				throw new DMLRuntimeException("Invalid value (" + min + ") encountered in 'groups' while computing groupedAggregate");
			if ( max <= 0 )
				throw new DMLRuntimeException("Invalid value (" + max + ") encountered in 'groups' while computing groupedAggregate.");
		
			numGroups = (int) max;
		}
	
		// Allocate result matrix
		boolean rowVector = (target.getNumRows()==1 && target.getNumColumns()>1);
		MatrixBlock result = checkType(ret);
		boolean result_sparsity = estimateSparsityOnGroupedAgg(rlen, numGroups);
		if(result==null)
			result=new MatrixBlock(numGroups, rowVector?1:target.getNumColumns(), result_sparsity);
		else
			result.reset(numGroups, rowVector?1:target.getNumColumns(), result_sparsity);

		//execute grouped aggregate operation
		if( k > 1 )
			LibMatrixAgg.groupedAggregate(this, target, weights, result, numGroups, op, k);
		else
			LibMatrixAgg.groupedAggregate(this, target, weights, result, numGroups, op);
		
		return result;
	}
	
	
	/**
	 * 
	 * @param ret
	 * @param rows
	 * @param select
	 * @return
	 * @throws DMLRuntimeException
	 * @throws DMLUnsupportedOperationException
	 */
	public MatrixBlock removeEmptyOperations( MatrixBlock ret, boolean rows, MatrixBlock select )
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{	
		MatrixBlock result = checkType(ret);
		return LibMatrixReorg.rmempty(this, result, rows, select);
	}
	
	/**
	 * 
	 * @param ret
	 * @param rows
	 * @return
	 * @throws DMLRuntimeException
	 * @throws DMLUnsupportedOperationException
	 */
	public MatrixBlock removeEmptyOperations( MatrixBlock ret, boolean rows)
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{
		return removeEmptyOperations(ret, rows, null);
	}
	
	/**
	 * 
	 * @param ret
	 * @param max
	 * @param rows
	 * @param cast
	 * @param ignore
	 * @return
	 * @throws DMLRuntimeException
	 * @throws DMLUnsupportedOperationException
	 */
	public MatrixBlock rexpandOperations( MatrixBlock ret, double max, boolean rows, boolean cast, boolean ignore )
		throws DMLRuntimeException, DMLUnsupportedOperationException 
	{	
		MatrixBlock result = checkType(ret);
		return LibMatrixReorg.rexpand(this, result, max, rows, cast, ignore);
	}
	
	
	@Override
	public MatrixValue replaceOperations(MatrixValue result, double pattern, double replacement) 
		throws DMLUnsupportedOperationException, DMLRuntimeException
	{
		MatrixBlock ret = checkType(result);
		examSparsity(); //ensure its in the right format
		ret.reset(rlen, clen, sparse);
		if( nonZeros == 0 && pattern != 0  )
			return ret; //early abort
		boolean NaNpattern = Double.isNaN(pattern);
		
		if( sparse ) //SPARSE
		{
			if( pattern != 0d ) //SPARSE <- SPARSE (sparse-safe)
			{
				ret.allocateSparseRowsBlock();
				SparseRow[] a = sparseRows;
				SparseRow[] c = ret.sparseRows;
				
				for( int i=0; i[min,max].
	 * 
	 * When pdf="normal", cell values are drawn from standard normal
	 * distribution N(0,1). The range of generated values will always be
	 * (-Inf,+Inf).
	 * 
	 * @param rgen
	 * @param nnzInBlock
	 * @param bigrand
	 * @param bSeed
	 * @return
	 * @throws DMLRuntimeException
	 */
	public MatrixBlock randOperationsInPlace(
								RandomMatrixGenerator rgen, long[] nnzInBlock, 
								Well1024a bigrand, long bSeed ) 
		throws DMLRuntimeException
	{
		LibMatrixDatagen.generateRandomMatrix( this, rgen, nnzInBlock, bigrand, bSeed );
		return this;
	}
	
	/**
	 * Function to generate a matrix of random numbers. This is invoked both
	 * from CP as well as from MR. In case of CP, it generates an entire matrix
	 * block-by-block. A bigrand is passed so that block-level
	 * seeds are generated internally. In case of MR, it generates a single
	 * block for given block-level seed bSeed.
	 * 
	 * When pdf="uniform", cell values are drawn from uniform distribution in
	 * range [min,max].
	 * 
	 * When pdf="normal", cell values are drawn from standard normal
	 * distribution N(0,1). The range of generated values will always be
	 * (-Inf,+Inf).
	 * 
	 * @param rgen
	 * @param nnzInBlock
	 * @param bigrand
	 * @param bSeed
	 * @param k
	 * @return
	 * @throws DMLRuntimeException
	 */
	public MatrixBlock randOperationsInPlace(RandomMatrixGenerator rgen, 
			long[] nnzInBlock, Well1024a bigrand, long bSeed, int k) 
		throws DMLRuntimeException
	{
		LibMatrixDatagen.generateRandomMatrix( this, rgen, nnzInBlock, 
				bigrand, bSeed, k );
		return this;
	}
	
	/**
	 * Method to generate a sequence according to the given parameters. The
	 * generated sequence is always in dense format.
	 * 
	 * Both end points specified from and to must be
	 * included in the generated sequence i.e., [from,to] both inclusive. Note
	 * that, to is included only if (to-from) is perfectly
	 * divisible by incr.
	 * 
	 * For example, seq(0,1,0.5) generates (0.0 0.5 1.0) 
	 *      whereas seq(0,1,0.6) generates (0.0 0.6) but not (0.0 0.6 1.0)
	 * 
	 * @param from
	 * @param to
	 * @param incr
	 * @return
	 * @throws DMLRuntimeException
	 */
	public static MatrixBlock seqOperations(double from, double to, double incr) 
		throws DMLRuntimeException 
	{
		MatrixBlock out = new MatrixBlock();
		LibMatrixDatagen.generateSequence( out, from, to, incr );
		
		return out;
	}
	
	/**
	 * 
	 * @param from
	 * @param to
	 * @param incr
	 * @return
	 * @throws DMLRuntimeException
	 */
	public MatrixBlock seqOperationsInPlace(double from, double to, double incr) 
		throws DMLRuntimeException 
	{
		LibMatrixDatagen.generateSequence( this, from, to, incr );
		
		return this;
	}
	
	/**
	 * 
	 * @param range
	 * @param size
	 * @param replace
	 * @return
	 * @throws DMLRuntimeException
	 */
	public static MatrixBlock sampleOperations(long range, int size, boolean replace, long seed) 
		throws DMLRuntimeException 
	{
		MatrixBlock out = new MatrixBlock();
		LibMatrixDatagen.generateSample( out, range, size, replace, seed );
		
		return out;
	}
	
	////////
	// Misc methods
	
	private static MatrixBlock checkType(MatrixValue block) 
		throws RuntimeException
	{
		if( block!=null && !(block instanceof MatrixBlock))
			throw new RuntimeException("Unsupported matrix value: "+block.getClass().getSimpleName());
		return (MatrixBlock) block;
	}
	
	public void print()
	{
		System.out.println("sparse = "+sparse);
		if(!sparse)
			System.out.println("nonzeros = "+nonZeros);
		for(int i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy