All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.runtime.compress.CompressedMatrixBlock Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.compress;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.PriorityQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.math3.random.Well1024a;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.lops.MMTSJ.MMTSJType;
import org.apache.sysml.lops.MapMultChain.ChainType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.compress.ColGroup.CompressionType;
import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator;
import org.apache.sysml.runtime.compress.estim.CompressedSizeInfo;
import org.apache.sysml.runtime.compress.estim.SizeEstimatorFactory;
import org.apache.sysml.runtime.compress.utils.ConverterUtils;
import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
import org.apache.sysml.runtime.controlprogram.caching.CacheBlock;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType;
import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing;
import org.apache.sysml.runtime.functionobjects.Builtin;
import org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode;
import org.apache.sysml.runtime.functionobjects.KahanPlus;
import org.apache.sysml.runtime.functionobjects.KahanPlusSq;
import org.apache.sysml.runtime.functionobjects.Multiply;
import org.apache.sysml.runtime.functionobjects.ReduceAll;
import org.apache.sysml.runtime.functionobjects.ReduceCol;
import org.apache.sysml.runtime.instructions.cp.CM_COV_Object;
import org.apache.sysml.runtime.instructions.cp.ScalarObject;
import org.apache.sysml.runtime.matrix.data.CTableMap;
import org.apache.sysml.runtime.matrix.data.LibMatrixBincell;
import org.apache.sysml.runtime.matrix.data.LibMatrixReorg;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.MatrixValue;
import org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator;
import org.apache.sysml.runtime.matrix.data.SparseBlock;
import org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue;
import org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator;
import org.apache.sysml.runtime.matrix.operators.AggregateOperator;
import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
import org.apache.sysml.runtime.matrix.operators.BinaryOperator;
import org.apache.sysml.runtime.matrix.operators.CMOperator;
import org.apache.sysml.runtime.matrix.operators.COVOperator;
import org.apache.sysml.runtime.matrix.operators.Operator;
import org.apache.sysml.runtime.matrix.operators.QuaternaryOperator;
import org.apache.sysml.runtime.matrix.operators.ReorgOperator;
import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
import org.apache.sysml.runtime.matrix.operators.UnaryOperator;
import org.apache.sysml.runtime.util.IndexRange;

/**
 * Experimental version of MatrixBlock that allows a compressed internal
 * representation.
 */
public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
{
	private static final long serialVersionUID = 7319972089143154057L;
	
	//internal configuration
	public static final boolean TRANSPOSE_INPUT = true;
	public static final boolean MATERIALIZE_ZEROS = false;
	public static final long MIN_PAR_AGG_THRESHOLD = 16*1024*1024; //16MB
	public static final boolean INVESTIGATE_ESTIMATES = false;
	private static final boolean LDEBUG = false; //local debug flag
	
	private static final Log LOG = LogFactory.getLog(CompressedMatrixBlock.class.getName());
	
	static{
		// for internal debugging only
		if( LDEBUG ) {
			Logger.getLogger("org.apache.sysml.runtime.compress")
				  .setLevel((Level) Level.DEBUG);
		}	
	}
	
	protected ArrayList _colGroups = null;
	protected CompressionStatistics _stats = null;
	
	public CompressedMatrixBlock() {
		super(-1, -1, true);
	}
	
	/**
	 * Main constructor for building a block from scratch.
	 * 
	 * @param rl
	 *            number of rows in the block
	 * @param cl
	 *            number of columns
	 * @param sparse
	 *            true if the UNCOMPRESSED representation of the block should be
	 *            sparse
	 */
	public CompressedMatrixBlock(int rl, int cl, boolean sparse) {
		super(rl, cl, sparse);
	}

	/**
	 * "Copy" constructor to populate this compressed block with the
	 * uncompressed contents of a conventional block. Does not compress
	 * the block.
	 * 
	 * @param mb matrix block
	 */
	public CompressedMatrixBlock(MatrixBlock mb) {
		super(mb.getNumRows(), mb.getNumColumns(), mb.isInSparseFormat());
		
		//shallow copy (deep copy on compression, prevents unnecessary copy) 
		if( isInSparseFormat() )
			sparseBlock = mb.getSparseBlock();
		else
			denseBlock = mb.getDenseBlock();
		nonZeros = mb.getNonZeros();
	}

	/**
	 * Obtain the column groups.
	 * 
	 * @return the column groups constructed by the compression process.
	 * 
	 */
	public ArrayList getColGroups() {
		return _colGroups;
	}

	/**
	 * Obtain whether this block is in compressed form or not.
	 * 
	 * @return true if this block is in compressed form; false if the block has
	 *         not yet been compressed
	 */
	public boolean isCompressed() {
		return (_colGroups != null);
	}

	public boolean isSingleUncompressedGroup(){
		return (_colGroups!=null && _colGroups.size()==1 
				&& _colGroups.get(0) instanceof ColGroupUncompressed);
	}

	private void allocateColGroupList() {
		_colGroups = new ArrayList();
	}
	
	@Override
	public boolean isEmptyBlock(boolean safe)  {
		if( !isCompressed() )
			return super.isEmptyBlock(safe);		
		return (_colGroups == null || getNonZeros()==0);
	}
	
	/**
	 * Compress the contents of this matrix block. After compression, the
	 * uncompressed data is discarded. Attempts to update this block after
	 * calling this method currently result in INCORRECT BEHAVIOR, something
	 * which should be fixed if we move ahead with this compression strategy.
	 * 
	 * +per column sparsity
	 * 
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
	public void compress() 
		throws DMLRuntimeException
	{
		//default sequential execution
		compress(1);
	}
	
	/**
	 * Compress block.
	 * 
	 * @param k  number of threads
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
	public void compress(int k) 
		throws DMLRuntimeException 
	{
		//check for redundant compression
		if( isCompressed() ){
			throw new DMLRuntimeException("Redundant compression, block already compressed.");
		}

		Timing time = new Timing(true);
		_stats = new CompressionStatistics();
		
		// SAMPLE-BASED DECISIONS:
		// Decisions such as testing if a column is amenable to bitmap
		// compression or evaluating co-coding potentionls are made based on a
		// subset of the rows. For large datasets, sampling might take a
		// significant amount of time. So, we generate only one sample and use
		// it for the entire compression process.

		//prepare basic meta data and deep copy / transpose input
		final int numRows = getNumRows();
		final int numCols = getNumColumns();
		final boolean sparse = isInSparseFormat();
		final double sp = OptimizerUtils.getSparsity(numRows, numCols, getNonZeros());
		MatrixBlock rawblock = !TRANSPOSE_INPUT ? new MatrixBlock(this) :
			LibMatrixReorg.transpose(this, new MatrixBlock(numCols, numRows, sparse), k);
		
		//construct sample-based size estimator
		CompressedSizeEstimator bitmapSizeEstimator = 
				SizeEstimatorFactory.getSizeEstimator(rawblock, numRows);

		// The current implementation of this method is written for correctness,
		// not for performance or for minimal use of temporary space.

		// We start with a full set of columns.
		HashSet remainingCols = new HashSet();
		for (int i = 0; i < numCols; i++)
			remainingCols.add(i);

		// PHASE 1: Classify columns by compression type
		// We start by determining which columns are amenable to bitmap compression
		double uncompressedColumnSize = getUncompressedSize(numRows, 1, sp);

		// information about the bitmap amenable columns
		List bitmapCols = new ArrayList();
		List uncompressedCols = new ArrayList();
		List colsCards = new ArrayList();
		List compressedSizes = new ArrayList();
		HashMap compressionRatios = new HashMap();
		
		// Classify columns according to ration (size uncompressed / size compressed), 
		// where a column is compressible if ratio > 1.
		CompressedSizeInfo[] sizeInfos = (k > 1) ?
				computeCompressedSizeInfos(bitmapSizeEstimator, numCols, k) : 
				computeCompressedSizeInfos(bitmapSizeEstimator, numCols);		
		for (int col = 0; col < numCols; col++)  {	
			long compressedSize = sizeInfos[col].getMinSize();
			double compRatio = uncompressedColumnSize / compressedSize;			
			if (compRatio > 1) {
				bitmapCols.add(col);
				compressionRatios.put(col, compRatio);
				colsCards.add(sizeInfos[col].getEstCarinality());
				compressedSizes.add(compressedSize);
			}
			else
				uncompressedCols.add(col);
		}

		_stats.timePhase1 = time.stop();
		if( LOG.isDebugEnabled() ) {
			LOG.debug("Compression statistics:");
			LOG.debug("--compression phase 1: "+_stats.timePhase1);
		}

		// PHASE 2: Grouping columns
		// Divide the bitmap columns into column groups.
		List bitmapColGrps = PlanningCoCoder.findCocodesByPartitioning(
				bitmapSizeEstimator, bitmapCols, colsCards, compressedSizes, numRows, 
				isInSparseFormat() ? sp : 1, k);

		_stats.timePhase2 = time.stop();
		if( LOG.isDebugEnabled() )
			LOG.debug("--compression phase 2: "+_stats.timePhase2);
		
		if( INVESTIGATE_ESTIMATES ) {
			double est = 0;
			for( int[] groupIndices : bitmapColGrps )
				est += bitmapSizeEstimator.estimateCompressedColGroupSize(groupIndices).getMinSize();
			est += uncompressedCols.size() * uncompressedColumnSize;
			_stats.estSize = est;
		}
		
		// PHASE 3: Compress and correct sample-based decisions
		ColGroup[] colGroups = (k > 1) ?
				compressColGroups(rawblock, bitmapSizeEstimator, compressionRatios, numRows, sp, bitmapColGrps, k) : 
				compressColGroups(rawblock, bitmapSizeEstimator, compressionRatios, numRows, sp, bitmapColGrps); 	
		allocateColGroupList();
		for( int j=0; j list = new ArrayList(remainingCols);
			ColGroupUncompressed ucgroup = new ColGroupUncompressed(list, rawblock);
			_colGroups.add(ucgroup);
		}
		
		_stats.size = estimateCompressedSizeInMemory();
		_stats.ratio= estimateSizeInMemory() / _stats.size;
		
		//final cleanup (discard uncompressed block)
		rawblock.cleanupBlock(true, true);
		this.cleanupBlock(true, true);
		
		_stats.timePhase4 = time.stop();
		if( LOG.isDebugEnabled() ) {
			LOG.debug("--compression phase 4: "+_stats.timePhase4);
			LOG.debug("--num col groups: "+_colGroups.size());
			LOG.debug("--compressed size: "+_stats.size);
			LOG.debug("--compression ratio: "+_stats.ratio);
		}
	}

	public CompressionStatistics getCompressionStatistics() {
		return _stats;
	}

	private static CompressedSizeInfo[] computeCompressedSizeInfos(CompressedSizeEstimator estim, int clen) {
		CompressedSizeInfo[] ret = new CompressedSizeInfo[clen];
		for( int col=0; col tasks = new ArrayList();
			for( int col=0; col> rtask = pool.invokeAll(tasks);	
			ArrayList ret = new ArrayList();
			for( Future lrtask : rtask )
				ret.add(lrtask.get());
			pool.shutdown();
			return ret.toArray(new CompressedSizeInfo[0]);
		}
		catch(Exception ex) {
			throw new DMLRuntimeException(ex);
		}
	}

	private static ColGroup[] compressColGroups(MatrixBlock in, CompressedSizeEstimator estim, HashMap compRatios, int rlen, double sp, List groups)
	{
		ColGroup[] ret = new ColGroup[groups.size()];
		for( int i=0; i compRatios, int rlen, double sp, List groups, int k) 
		throws DMLRuntimeException
	{
		try {
			ExecutorService pool = Executors.newFixedThreadPool( k );
			ArrayList tasks = new ArrayList();
			for( int[] colIndexes : groups )
				tasks.add(new CompressTask(in, estim, compRatios, rlen, sp, colIndexes));
			List> rtask = pool.invokeAll(tasks);	
			ArrayList ret = new ArrayList();
			for( Future lrtask : rtask )
				ret.add(lrtask.get());
			pool.shutdown();
			return ret.toArray(new ColGroup[0]);
		}
		catch(Exception ex) {
			throw new DMLRuntimeException(ex);
		}
	}

	private static ColGroup compressColGroup(MatrixBlock in, CompressedSizeEstimator estim, HashMap compRatios, int rlen, double sp, int[] colIndexes) 
	{
		int[] allGroupIndices = null;
		int allColsCount = colIndexes.length;
		CompressedSizeInfo sizeInfo;
		// The compression type is decided based on a full bitmap since it
		// will be reused for the actual compression step.
		UncompressedBitmap ubm = null;
		PriorityQueue compRatioPQ = null;
		boolean skipGroup = false;
		while (true) 
		{
			//exact big list and observe compression ratio
			ubm = BitmapEncoder.extractBitmap(colIndexes, in); 
			sizeInfo = estim.estimateCompressedColGroupSize(ubm);	
			double compRatio = getUncompressedSize(rlen, colIndexes.length, sp) / sizeInfo.getMinSize();
			
			if( compRatio > 1 ) {
				break; // we have a good group
			} 
			
			// modify the group
			if (compRatioPQ == null) {
				// first modification
				allGroupIndices = colIndexes.clone();
				compRatioPQ = new PriorityQueue();
				for (int i = 0; i < colIndexes.length; i++)
					compRatioPQ.add(new CompressedColumn(i, compRatios.get(colIndexes[i])));
			}

			// index in allGroupIndices
			int removeIx = compRatioPQ.poll().colIx;
			allGroupIndices[removeIx] = -1;
			allColsCount--;
			if (allColsCount == 0) {
				skipGroup = true;
				break;
			}
			colIndexes = new int[allColsCount];
			// copying the values that do not equal -1
			int ix = 0;
			for(int col : allGroupIndices)
				if (col != -1)
					colIndexes[ix++] = col;
		}

		//add group to uncompressed fallback
		if( skipGroup )
			return null;

		//create compressed column group
		long rleSize = sizeInfo.getRLESize();
		long oleSize = sizeInfo.getOLESize();
		if( rleSize < oleSize )
			return new ColGroupRLE(colIndexes, rlen, ubm);
		else
			return new ColGroupOLE(colIndexes, rlen, ubm);
	}
	
	/**
	 * Compute a conservative estimate of the uncompressed size of a column group.
	 * 
	 * @param rlen row length
	 * @param clen column length
	 * @param sparsity the sparsity
	 * @return estimate of uncompressed size of column group
	 */
	private static double getUncompressedSize(int rlen, int clen, double sparsity) {
		//we estimate the uncompressed size as 8 * nnz in order to cover both
		//sparse and dense with moderate underestimation (which is conservative as 
		//it is biased towards uncompressed columns)
		return 8 * rlen * clen * sparsity;
	}

	/**
	 * Decompress block.
	 * 
	 * @return a new uncompressed matrix block containing the contents of this
	 *         block
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
	public MatrixBlock decompress() throws DMLRuntimeException 
	{
		//early abort for not yet compressed blocks
		if( !isCompressed() )
			return new MatrixBlock(this); 
		
		Timing time = new Timing(true);
		
		//preallocation sparse rows to avoid repeated reallocations		
		MatrixBlock ret = new MatrixBlock(getNumRows(), getNumColumns(), isInSparseFormat(), getNonZeros());
		if( ret.isInSparseFormat() ) {
			int[] rnnz = new int[rlen];
			for (ColGroup grp : _colGroups)
				grp.countNonZerosPerRow(rnnz, 0, rlen);
			ret.allocateSparseRowsBlock();
			SparseBlock rows = ret.getSparseBlock();
			for( int i=0; i tasks = new ArrayList();
			for( int i=0; i> rtasks = pool.invokeAll(tasks);	
			pool.shutdown();
			for( Future rt : rtasks )
				rt.get(); //error handling
		}
		catch(Exception ex) {
			throw new DMLRuntimeException(ex);
		}
		
		//post-processing 
		ret.setNonZeros(nonZeros);
		
		if( LOG.isDebugEnabled() )
			LOG.debug("decompressed block w/ k="+k+" in "+time.stop()+"ms.");
		
		return ret;
	}

	/**
	 * Obtain an upper bound on the memory used to store the compressed block.
	 * 
	 * @return an upper bound on the memory used to store this compressed block
	 *         considering class overhead.
	 */
	public long estimateCompressedSizeInMemory() {
		if (!isCompressed())
			return 0;
		// basic data inherited from MatrixBlock
		long total = MatrixBlock.estimateSizeInMemory(0, 0, 0);
		// adding the size of colGroups ArrayList overhead
		// object overhead (32B) + int size (4B) + int modCount (4B) + Object[]
		// elementData overhead + reference (32+8)B +reference ofr each Object (8B)
		total += 80 + 8 * _colGroups.size();
		for (ColGroup grp : _colGroups)
			total += grp.estimateInMemorySize();
		return total;
	}

	private static class CompressedColumn implements Comparable {
		int colIx;
		double compRatio;

		public CompressedColumn(int colIx, double compRatio) {
			this.colIx = colIx;
			this.compRatio = compRatio;
		}

		@Override
		public int compareTo(CompressedColumn o) {
			return (int) Math.signum(compRatio - o.compRatio);
		}
	}
	
	public static class CompressionStatistics {
		public double timePhase1 = -1;
		public double timePhase2 = -1;
		public double timePhase3 = -1;
		public double timePhase4 = -1;
		public double estSize = -1;
		public double size = -1;
		public double ratio = -1;
		
		public CompressionStatistics() {
			//do nothing
		}
	} 

	@Override
	public double quickGetValue(int r, int c) {
		if( !isCompressed() ) {
			return super.quickGetValue(r, c);
		}
		
		//find column group according to col index
		ColGroup grp = null;
		for( ColGroup group : _colGroups )
			if( Arrays.binarySearch(group.getColIndices(), c) >= 0 ) {
				grp = group; break;
			}
		
		//find row value 
		return grp.get(r, c);
	}	
	
	//////////////////////////////////////////
	// Serialization / Deserialization

	@Override
	public long getExactSizeOnDisk() 
	{
		//header information
		long ret = 12;
		
		for( ColGroup grp : _colGroups ) {
			ret += 1; //type info
			ret += grp.getExactSizeOnDisk();
		}
		
		return ret;
	}
	
	@Override
	public void readFields(DataInput in) 
		throws IOException 
	{
		boolean compressed = in.readBoolean();
		
		//deserialize uncompressed block
		if( !compressed ) {
			super.readFields(in);
			return;
		}
		
		//deserialize compressed block
		rlen = in.readInt();
		clen = in.readInt();
		nonZeros = in.readLong();
		int ncolGroups = in.readInt();
		
		_colGroups = new ArrayList(ncolGroups);
		for( int i=0; i newColGroups = new ArrayList();
		for (ColGroup grp : _colGroups) {
			newColGroups.add(grp.scalarOperation(sop));
		}
		ret._colGroups = newColGroups;
		ret.setNonZeros(rlen*clen);
		
		return ret;
	}

	@Override
	public MatrixBlock appendOperations(MatrixBlock that, MatrixBlock ret) 
		throws DMLRuntimeException
	{
		//call uncompressed matrix append if necessary
		if( !isCompressed() ) {
			if( that instanceof CompressedMatrixBlock )
				that = ((CompressedMatrixBlock) that).decompress();
			return super.appendOperations(that, ret, true);
		}
		
		final int m = rlen;
		final int n = clen+that.getNumColumns();
		final long nnz = nonZeros+that.getNonZeros();		
		
		//init result matrix 
		CompressedMatrixBlock ret2 = null;
		if( ret == null || !(ret instanceof CompressedMatrixBlock) ) {
			ret2 = new CompressedMatrixBlock(m, n, isInSparseFormat());
		}
		else {
			ret2 = (CompressedMatrixBlock) ret;
			ret2.reset(m, n);
		}
			
		//shallow copy of lhs column groups
		ret2.allocateColGroupList();
		ret2._colGroups.addAll(_colGroups);
		
		//copy of rhs column groups w/ col index shifting
		if( !(that instanceof CompressedMatrixBlock) ) {
			that = new CompressedMatrixBlock(that);
			((CompressedMatrixBlock)that).compress();
		}
		ArrayList inColGroups = ((CompressedMatrixBlock) that)._colGroups;
		for( ColGroup group : inColGroups ) {
			ColGroup tmp = ConverterUtils.copyColGroup(group);
			tmp.shiftColIndices(clen);
			ret2._colGroups.add(tmp);
		}
		
		//meta data maintenance
		ret2.setNonZeros(nnz);		
		return ret2;
	}
	
	@Override
	public MatrixBlock chainMatrixMultOperations(MatrixBlock v, MatrixBlock w, MatrixBlock out, ChainType ctype) 
		throws DMLRuntimeException 
	{
		//call uncompressed matrix mult if necessary
		if( !isCompressed() ) {
			return super.chainMatrixMultOperations(v, w, out, ctype);
		}
		
		//single-threaded mmchain of single uncompressed colgroup
		if( isSingleUncompressedGroup() ){
			return ((ColGroupUncompressed)_colGroups.get(0))
				.getData().chainMatrixMultOperations(v, w, out, ctype);
		}
		
		//Timing time = new Timing(true);
		
		//prepare result
		if( out != null )
			out.reset(clen, 1, false);
		else 
			out = new MatrixBlock(clen, 1, false);
		
		//empty block handling
		if( isEmptyBlock(false) ) 
			return out;
			
		//compute matrix mult
		MatrixBlock tmp = new MatrixBlock(rlen, 1, false);
		rightMultByVector(v, tmp);
		if( ctype == ChainType.XtwXv ) {
			BinaryOperator bop = new BinaryOperator(Multiply.getMultiplyFnObject());
			LibMatrixBincell.bincellOpInPlace(tmp, w, bop);
		}
		leftMultByVectorTranspose(_colGroups, tmp, out, true);
		
		//System.out.println("Compressed MMChain in "+time.stop());
		
		return out;
	}

	@Override
	public MatrixBlock chainMatrixMultOperations(MatrixBlock v, MatrixBlock w, MatrixBlock out, ChainType ctype, int k) 
		throws DMLRuntimeException 
	{
		//call uncompressed matrix mult if necessary
		if( !isCompressed() ){
			return super.chainMatrixMultOperations(v, w, out, ctype, k);
		}
		
		//multi-threaded mmchain of single uncompressed colgroup
		if( isSingleUncompressedGroup() ){
			return ((ColGroupUncompressed)_colGroups.get(0))
				.getData().chainMatrixMultOperations(v, w, out, ctype, k);
		}

		Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
		
		//prepare result
		if( out != null )
			out.reset(clen, 1, false);
		else 
			out = new MatrixBlock(clen, 1, false);
		
		//empty block handling
		if( isEmptyBlock(false) ) 
			return out;
		
		//compute matrix mult
		MatrixBlock tmp = new MatrixBlock(rlen, 1, false);
		rightMultByVector(v, tmp, k);
		if( ctype == ChainType.XtwXv ) {
			BinaryOperator bop = new BinaryOperator(Multiply.getMultiplyFnObject());
			LibMatrixBincell.bincellOpInPlace(tmp, w, bop);
		}
		leftMultByVectorTranspose(_colGroups, tmp, out, true, k);
		
		if( LOG.isDebugEnabled() )
			LOG.debug("Compressed MMChain k="+k+" in "+time.stop());
		
		return out;
	}
	
	@Override
	public MatrixValue aggregateBinaryOperations(MatrixValue mv1, MatrixValue mv2, MatrixValue result, AggregateBinaryOperator op)
			throws DMLRuntimeException 
	{
		//call uncompressed matrix mult if necessary
		if( !isCompressed() ) {
			return super.aggregateBinaryOperations(mv1, mv2, result, op);
		}
	
		//multi-threaded mm of single uncompressed colgroup
		if( isSingleUncompressedGroup() ){
			MatrixBlock tmp = ((ColGroupUncompressed)_colGroups.get(0)).getData();
			return tmp.aggregateBinaryOperations(this==mv1?tmp:mv1, this==mv2?tmp:mv2, result, op);
		}
		
		Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
		
		//setup meta data (dimensions, sparsity)
		int rl = mv1.getNumRows();
		int cl = mv2.getNumColumns();
		
		//create output matrix block
		MatrixBlock ret = (MatrixBlock) result;
		if( ret==null )
			ret = new MatrixBlock(rl, cl, false, rl*cl);
		else
			ret.reset(rl, cl, false, rl*cl);
		
		//compute matrix mult
		if( mv1.getNumRows()>1 && mv2.getNumColumns()==1 ) { //MV right
			CompressedMatrixBlock cmb = (CompressedMatrixBlock)mv1;
			MatrixBlock mb = (MatrixBlock) mv2;
			if( op.getNumThreads()>1 )
				cmb.rightMultByVector(mb, ret, op.getNumThreads());
			else
				cmb.rightMultByVector(mb, ret);
		}
		else if( mv1.getNumRows()==1 && mv2.getNumColumns()>1 ) { //MV left
			MatrixBlock mb = (MatrixBlock) mv1;
			if( op.getNumThreads()>1 )
				leftMultByVectorTranspose(_colGroups, mb, ret, false, op.getNumThreads());
			else
				leftMultByVectorTranspose(_colGroups, mb, ret, false);
		}
		else {
			//NOTE: we could decompress and invoke super.aggregateBinary but for now
			//we want to have an eager fail if this happens
			throw new DMLRuntimeException("Unsupported matrix-matrix multiplication over compressed matrix block.");
		}
		
		if( LOG.isDebugEnabled() )
			LOG.debug("Compressed MM in "+time.stop());
		
		return ret;
	}
	
	@Override
	public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, 
			int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn, boolean inCP) 
		throws DMLRuntimeException
	{
		//call uncompressed matrix mult if necessary
		if( !isCompressed() ) {
			return super.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn, inCP);
		}
		
		//check for supported operations
		if( !(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq
			 || (op.aggOp.increOp.fn instanceof Builtin && 
				(((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MIN 
				||((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MAX))) ){
			throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
		}
		
		Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;

		//prepare output dimensions
		CellIndex tempCellIndex = new CellIndex(-1,-1);
		op.indexFn.computeDimension(rlen, clen, tempCellIndex);
		if(op.aggOp.correctionExists) {
			switch(op.aggOp.correctionLocation)
			{
				case LASTROW: tempCellIndex.row++;  break;
				case LASTCOLUMN: tempCellIndex.column++; break;
				case LASTTWOROWS: tempCellIndex.row+=2; break;
				case LASTTWOCOLUMNS: tempCellIndex.column+=2; break;
				default:
					throw new DMLRuntimeException("unrecognized correctionLocation: "+op.aggOp.correctionLocation);	
			}
		}
		
		// initialize and allocate the result
		if(result==null)
			result=new MatrixBlock(tempCellIndex.row, tempCellIndex.column, false);
		else
			result.reset(tempCellIndex.row, tempCellIndex.column, false);
		MatrixBlock ret = (MatrixBlock) result;
		ret.allocateDenseBlock();
		
		//special handling init value for rowmins/rowmax
		if( op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin ) {
			double val = Double.MAX_VALUE * ((((Builtin)op.aggOp.increOp.fn).getBuiltinCode()==BuiltinCode.MAX)?-1:1);
			Arrays.fill(ret.getDenseBlock(), val);
		}
		
		//core unary aggregate
		if(    op.getNumThreads() > 1 
			&& getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD ) 
		{
			//multi-threaded execution of all groups 
			ArrayList[] grpParts = createStaticTaskPartitioning(
					(op.indexFn instanceof ReduceCol) ? 1 : op.getNumThreads(), false);
			ColGroupUncompressed uc = getUncompressedColGroup();
			try {
				//compute uncompressed column group in parallel (otherwise bottleneck)
				if( uc != null )
					 ret = (MatrixBlock)uc.getData().aggregateUnaryOperations(op, ret, blockingFactorRow, blockingFactorCol, indexesIn, false);					
				//compute all compressed column groups
				ExecutorService pool = Executors.newFixedThreadPool( op.getNumThreads() );
				ArrayList tasks = new ArrayList();
				if( op.indexFn instanceof ReduceCol && grpParts.length > 0 ) {
					int seqsz = BitmapEncoder.BITMAP_BLOCK_SZ;
					int blklen = (int)(Math.ceil((double)rlen/op.getNumThreads()));
					blklen += (blklen%seqsz != 0)?seqsz-blklen%seqsz:0;
					for( int i=0; i grp : grpParts )
						tasks.add(new UnaryAggregateTask(grp, ret, 0, rlen, op));
				List> rtasks = pool.invokeAll(tasks);	
				pool.shutdown();
				
				//aggregate partial results
				if( op.indexFn instanceof ReduceAll ) {
					double val = ret.quickGetValue(0, 0);
					for( Future rtask : rtasks )
						val = op.aggOp.increOp.fn.execute(val, 
								rtask.get().quickGetValue(0, 0));
					ret.quickSetValue(0, 0, val);
				}		
			}
			catch(Exception ex) {
				throw new DMLRuntimeException(ex);
			}
		}
		else {
			//process UC column group
			for (ColGroup grp : _colGroups)
				if( grp instanceof ColGroupUncompressed )
					grp.unaryAggregateOperations(op, ret);
			
			//process OLE/RLE column groups
			for (ColGroup grp : _colGroups)
				if( !(grp instanceof ColGroupUncompressed) )
					grp.unaryAggregateOperations(op, ret);
		}
		
		//special handling zeros for rowmins/rowmax
		if( op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin ) {
			int[] rnnz = new int[rlen];
			for( ColGroup grp : _colGroups )
				grp.countNonZerosPerRow(rnnz, 0, rlen);
			Builtin builtin = (Builtin)op.aggOp.increOp.fn;
			for( int i=0; i tasks = new ArrayList();
				int numgrp = _colGroups.size();
				int blklen = (int)(Math.ceil((double)numgrp/(2*k)));
				for( int i=0; i<2*k & i*blklen> ret = pool.invokeAll(tasks);
				for( Future tret : ret )
					tret.get(); //check for errors
				pool.shutdown();
			}
			catch(Exception ex) {
				throw new DMLRuntimeException(ex);
			}
			
			// post-processing
			out.recomputeNonZeros();
		}
		
		if( LOG.isDebugEnabled() )
			LOG.debug("Compressed TSMM k="+k+" in "+time.stop());
		
		return out;
	}

	
	/**
	 * Multiply this matrix block by a column vector on the right.
	 * 
	 * @param vector
	 *            right-hand operand of the multiplication
	 * @param result
	 *            buffer to hold the result; must have the appropriate size
	 *            already
	 */
	private void rightMultByVector(MatrixBlock vector, MatrixBlock result)
		throws DMLRuntimeException 
	{
		// initialize and allocate the result
		result.allocateDenseBlock();

		// delegate matrix-vector operation to each column group
		for( ColGroup grp : _colGroups )
			if( grp instanceof ColGroupUncompressed ) //overwrites output
				grp.rightMultByVector(vector, result, 0, result.getNumRows());
		for( ColGroup grp : _colGroups )
			if( !(grp instanceof ColGroupUncompressed) ) //adds to output
				grp.rightMultByVector(vector, result, 0, result.getNumRows());
		
		// post-processing
		result.recomputeNonZeros();
	}

	/**
	 * Multi-threaded version of rightMultByVector.
	 * 
	 * @param vector matrix block vector
	 * @param result matrix block result
	 * @param k number of threads
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
	private void rightMultByVector(MatrixBlock vector, MatrixBlock result, int k)
		throws DMLRuntimeException 
	{
		// initialize and allocate the result
		result.allocateDenseBlock();

		//multi-threaded execution of all groups
		try {
			ExecutorService pool = Executors.newFixedThreadPool( k );
			int rlen = getNumRows();
			int seqsz = BitmapEncoder.BITMAP_BLOCK_SZ;
			int blklen = (int)(Math.ceil((double)rlen/k));
			blklen += (blklen%seqsz != 0)?seqsz-blklen%seqsz:0;
			ArrayList tasks = new ArrayList();
			for( int i=0; i colGroups, MatrixBlock vector, MatrixBlock result, boolean doTranspose) 
		throws DMLRuntimeException 
	{
		//transpose vector if required
		MatrixBlock rowVector = vector;
		if (doTranspose) {
			rowVector = new MatrixBlock(1, vector.getNumRows(), false);
			LibMatrixReorg.transpose(vector, rowVector);
		}
		
		// initialize and allocate the result
		result.reset();
		result.allocateDenseBlock();
		
		// delegate matrix-vector operation to each column group
		for (ColGroup grp : colGroups) {			
			grp.leftMultByRowVector(rowVector, result);
		}

		// post-processing
		result.recomputeNonZeros();
	}
	
	/**
	 * Multi-thread version of leftMultByVectorTranspose.
	 * 
	 * @param colGroups list of column groups
	 * @param vector
	 *            left-hand operand of the multiplication
	 * @param result
	 *            buffer to hold the result; must have the appropriate size
	 *            already
	 * @param doTranspose if true, transpose vector
	 * @param k number of threads
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
	private static void leftMultByVectorTranspose(List colGroups,MatrixBlock vector, MatrixBlock result, boolean doTranspose, int k) 
		throws DMLRuntimeException 
	{
		int kuc = Math.max(1, k - colGroups.size() + 1);
		
		//transpose vector if required
		MatrixBlock rowVector = vector;
		if (doTranspose) {
			rowVector = new MatrixBlock(1, vector.getNumRows(), false);
			LibMatrixReorg.transpose(vector, rowVector);
		}
		
		// initialize and allocate the result
		result.reset();
		result.allocateDenseBlock();

		//multi-threaded execution
		try {
			ExecutorService pool = Executors.newFixedThreadPool( Math.min(colGroups.size(), k) );
			ArrayList tasks = new ArrayList();
			for( ColGroup grp : colGroups )
				tasks.add(new LeftMatrixMultTask(grp, rowVector, result, kuc));
			pool.invokeAll(tasks);	
			pool.shutdown();
		}
		catch(Exception ex) {
			throw new DMLRuntimeException(ex);
		}

		// post-processing
		result.recomputeNonZeros();
	}

	private static void leftMultByTransposeSelf(ArrayList groups, MatrixBlock result, int gl, int gu)
		throws DMLRuntimeException 
	{
		final int numRows = groups.get(0).getNumRows();
		final int numGroups = groups.size();		
		
		//preallocated dense matrix block
		MatrixBlock lhs = new MatrixBlock(numRows, 1, false);
		lhs.allocateDenseBlock();
		
		//approach: for each colgroup, extract uncompressed columns one at-a-time
		//vector-matrix multiplies against remaining col groups
		for( int i=gl; i tmpList = groups.subList(i, numGroups);
			
			//for all uncompressed lhs columns vectors
			for( int j=0; j[] createStaticTaskPartitioning(int k, boolean inclUncompressed)
	{
		// special case: single uncompressed col group
		if( _colGroups.size()==1 && _colGroups.get(0) instanceof ColGroupUncompressed ){
			return new ArrayList[0];
		}
		
		// initialize round robin col group distribution
		// (static task partitioning to reduce mem requirements/final agg)
		int numTasks = Math.min(k, _colGroups.size());
		ArrayList[] grpParts = new ArrayList[numTasks];
		int pos = 0;
		for( ColGroup grp : _colGroups ){
			if( grpParts[pos]==null )
				grpParts[pos] = new ArrayList();
			if( inclUncompressed || !(grp instanceof ColGroupUncompressed) ) {
				grpParts[pos].add(grp);
				pos = (pos==numTasks-1) ? 0 : pos+1;
			}
		}
		
		return grpParts;
	}

	private ColGroupUncompressed getUncompressedColGroup()
	{
		for( ColGroup grp : _colGroups )
			if( grp instanceof ColGroupUncompressed ) 
				return (ColGroupUncompressed)grp;
		
		return null;
	}

	private static class LeftMatrixMultTask implements Callable 
	{
		private ColGroup _group = null;
		private MatrixBlock _vect = null;
		private MatrixBlock _ret = null;
		private int _kuc = 1;
		
		protected LeftMatrixMultTask( ColGroup group, MatrixBlock vect, MatrixBlock ret, int kuc)  {
			_group = group;
			_vect = vect;
			_ret = ret;
			_kuc = kuc;
		}
		
		@Override
		public Object call() throws DMLRuntimeException 
		{
			// delegate matrix-vector operation to each column group
			if( _group instanceof ColGroupUncompressed && _kuc >1 && ColGroupBitmap.LOW_LEVEL_OPT )
				((ColGroupUncompressed)_group).leftMultByRowVector(_vect, _ret, _kuc);
			else
				_group.leftMultByRowVector(_vect, _ret);
			return null;
		}
	}

	private static class RightMatrixMultTask implements Callable 
	{
		private ArrayList _groups = null;
		private MatrixBlock _vect = null;
		private MatrixBlock _ret = null;
		private int _rl = -1;
		private int _ru = -1;
		
		protected RightMatrixMultTask( ArrayList groups, MatrixBlock vect, MatrixBlock ret, int rl, int ru)  {
			_groups = groups;
			_vect = vect;
			_ret = ret;
			_rl = rl;
			_ru = ru;
		}
		
		@Override
		public Object call() throws DMLRuntimeException 
		{
			// delegate vector-matrix operation to each column group
			for( ColGroup grp : _groups )
				if( grp instanceof ColGroupUncompressed ) //overwrites output
					grp.rightMultByVector(_vect, _ret, _rl, _ru);
			for( ColGroup grp : _groups )
				if( !(grp instanceof ColGroupUncompressed) ) //adds to output
					grp.rightMultByVector(_vect, _ret, _rl, _ru);
			return null;
		}
	}
	
	private static class MatrixMultTransposeTask implements Callable 
	{
		private ArrayList _groups = null;
		private MatrixBlock _ret = null;
		private int _gl = -1;
		private int _gu = -1;
		
		protected MatrixMultTransposeTask(ArrayList groups, MatrixBlock ret, int gl, int gu)  {
			_groups = groups;
			_ret = ret;
			_gl = gl;
			_gu = gu;
		}
		
		@Override
		public Object call() throws DMLRuntimeException {
			leftMultByTransposeSelf(_groups, _ret, _gl, _gu);
			return null;
		}
	}
	
	private static class UnaryAggregateTask implements Callable 
	{
		private ArrayList _groups = null;
		private int _rl = -1;
		private int _ru = -1;
		private MatrixBlock _ret = null;
		private AggregateUnaryOperator _op = null;
		
		protected UnaryAggregateTask( ArrayList groups, MatrixBlock ret, int rl, int ru, AggregateUnaryOperator op)  {
			_groups = groups;
			_op = op;
			_rl = rl;
			_ru = ru;
			
			if( _op.indexFn instanceof ReduceAll ) { //sum
				_ret = new MatrixBlock(ret.getNumRows(), ret.getNumColumns(), false);
				_ret.allocateDenseBlock();
				if( _op.aggOp.increOp.fn instanceof Builtin )
					System.arraycopy(ret.getDenseBlock(), 0, _ret.getDenseBlock(), 0, ret.getNumRows()*ret.getNumColumns());
			}
			else { //colSums
				_ret = ret;
			}
		}
		
		@Override
		public MatrixBlock call() throws DMLRuntimeException {
			// delegate unary aggregate operation to each column group
			// (uncompressed column group handles separately)
			for( ColGroup grp : _groups )
				((ColGroupBitmap)grp).unaryAggregateOperations(_op, _ret, _rl, _ru);
			return _ret;
		}
	}

	private static class SizeEstimTask implements Callable 
	{
		private CompressedSizeEstimator _estim = null;
		private int _col = -1;
		
		protected SizeEstimTask( CompressedSizeEstimator estim, int col )  {
			_estim = estim;
			_col = col;
		}
		
		@Override
		public CompressedSizeInfo call() throws DMLRuntimeException {
			return _estim.estimateCompressedColGroupSize(new int[] { _col });
		}
	}

	private static class CompressTask implements Callable 
	{
		private MatrixBlock _in = null;
		private CompressedSizeEstimator _estim = null;
		private HashMap _compRatios = null;
		private int _rlen = -1;
		private double _sp = -1;
		private int[] _colIndexes = null;
		
		protected CompressTask( MatrixBlock in, CompressedSizeEstimator estim, HashMap compRatios, int rlen, double sp, int[] colIndexes )  {
			_in = in;
			_estim = estim;
			_compRatios = compRatios;
			_rlen = rlen;
			_sp = sp;
			_colIndexes = colIndexes;
		}
		
		@Override
		public ColGroup call() throws DMLRuntimeException {
			return compressColGroup(_in, _estim, _compRatios, _rlen, _sp, _colIndexes);
		}
	}
	
	private static class DecompressTask implements Callable 
	{
		private List _colGroups = null;
		private MatrixBlock _ret = null;
		private int _rl = -1;
		private int _ru = -1;
		
		protected DecompressTask( List colGroups, MatrixBlock ret, int rl, int ru )  {
			_colGroups = colGroups;
			_ret = ret;
			_rl = rl;
			_ru = ru;
		}
		
		@Override
		public Object call() throws DMLRuntimeException {
			
			//preallocate sparse rows to avoid repeated alloc		
			if( _ret.isInSparseFormat() ) {
				int[] rnnz = new int[_ru-_rl];
				for (ColGroup grp : _colGroups)
					grp.countNonZerosPerRow(rnnz, _rl, _ru);
				SparseBlock rows = _ret.getSparseBlock();
				for( int i=_rl; i<_ru; i++ )
					rows.allocate(i, rnnz[i-_rl]);
			}
			
			//decompress row partition
			for (ColGroup grp : _colGroups)
				grp.decompressToBlock(_ret, _rl, _ru);

			//post processing (sort due to append)
			if( _ret.isInSparseFormat() )
				_ret.sortSparseRows(_rl, _ru);
			
			return null;
		}
	}
	
	//////////////////////////////////////////
	// Graceful fallback to uncompressed linear algebra
	
	@Override
	public MatrixValue unaryOperations(UnaryOperator op, MatrixValue result) 
			throws DMLRuntimeException {
		printDecompressWarning("unaryOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		return tmp.unaryOperations(op, result);
	}

	@Override
	public void unaryOperationsInPlace(UnaryOperator op) 
			throws DMLRuntimeException {
		printDecompressWarning("unaryOperationsInPlace");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		tmp.unaryOperationsInPlace(op);
	}

	@Override
	public MatrixValue binaryOperations(BinaryOperator op, MatrixValue thatValue, MatrixValue result) 
			throws DMLRuntimeException {
		printDecompressWarning("binaryOperations", (MatrixBlock)thatValue);
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(thatValue);
		return left.binaryOperations(op, right, result);
	}

	@Override
	public void binaryOperationsInPlace(BinaryOperator op, MatrixValue thatValue) 
			throws DMLRuntimeException {
		printDecompressWarning("binaryOperationsInPlace", (MatrixBlock)thatValue);
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(thatValue);
		left.binaryOperationsInPlace(op, right);
	}

	@Override
	public void incrementalAggregate(AggregateOperator aggOp, MatrixValue correction, MatrixValue newWithCorrection)
			throws DMLRuntimeException {
		throw new DMLRuntimeException("CompressedMatrixBlock: incrementalAggregate not supported.");
	}

	@Override
	public void incrementalAggregate(AggregateOperator aggOp, MatrixValue newWithCorrection) 
			throws DMLRuntimeException {
		throw new DMLRuntimeException("CompressedMatrixBlock: incrementalAggregate not supported.");
	}

	@Override
	public MatrixValue reorgOperations(ReorgOperator op, MatrixValue ret, int startRow, int startColumn, int length)
			throws DMLRuntimeException {
		printDecompressWarning("reorgOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		return tmp.reorgOperations(op, ret, startRow, startColumn, length);
	}

	@Override
	public MatrixBlock appendOperations(MatrixBlock that, MatrixBlock ret, boolean cbind) 
		throws DMLRuntimeException {
		if( cbind ) //use supported operation
			return appendOperations(that, ret);			
		printDecompressWarning("appendOperations-rbind", that);
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(that);
		return left.appendOperations(right, ret, cbind);
	}
	
	@Override
	public void appendOperations(MatrixValue v2,
			ArrayList outlist, int blockRowFactor,
			int blockColFactor, boolean cbind, boolean m2IsLast, int nextNCol)
			throws DMLRuntimeException {
		printDecompressWarning("appendOperations", (MatrixBlock)v2);
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(v2);
		left.appendOperations(right, outlist, blockRowFactor, blockColFactor, cbind, m2IsLast, nextNCol);
	}

	@Override
	public void permutationMatrixMultOperations(MatrixValue m2Val, MatrixValue out1Val, MatrixValue out2Val)
			throws DMLRuntimeException {
		permutationMatrixMultOperations(m2Val, out1Val, out2Val, 1);
	}

	@Override
	public void permutationMatrixMultOperations(MatrixValue m2Val, MatrixValue out1Val, MatrixValue out2Val, int k)
			throws DMLRuntimeException {
		printDecompressWarning("permutationMatrixMultOperations", (MatrixBlock)m2Val);
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(m2Val);
		left.permutationMatrixMultOperations(right, out1Val, out2Val, k);
	}

	@Override
	public MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, int rl, int ru, int cl, int cu, MatrixBlock ret, UpdateType update)
			throws DMLRuntimeException {
		printDecompressWarning("leftIndexingOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(rhsMatrix);
		return left.leftIndexingOperations(right, rl, ru, cl, cu, ret, update);
	}

	@Override
	public MatrixBlock leftIndexingOperations(ScalarObject scalar, int rl, int cl, MatrixBlock ret, UpdateType update)
			throws DMLRuntimeException {
		printDecompressWarning("leftIndexingOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		return tmp.leftIndexingOperations(scalar, rl, cl, ret, update);
	}

	@Override
	public MatrixBlock sliceOperations(int rl, int ru, int cl, int cu, CacheBlock ret) 
			throws DMLRuntimeException {
		printDecompressWarning("sliceOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		return tmp.sliceOperations(rl, ru, cl, cu, ret);
	}

	@Override
	public void sliceOperations(ArrayList outlist, IndexRange range, 
			int rowCut, int colCut, int normalBlockRowFactor,
			int normalBlockColFactor, int boundaryRlen, int boundaryClen) {
		printDecompressWarning("sliceOperations");
		try {
			MatrixBlock tmp = isCompressed() ? decompress() : this;
			tmp.sliceOperations(outlist, range, rowCut, colCut, normalBlockRowFactor,
					normalBlockColFactor, boundaryRlen, boundaryClen);
		}
		catch(DMLRuntimeException ex) {
			throw new RuntimeException(ex);
		}
	}

	@Override
	public MatrixValue zeroOutOperations(MatrixValue result, IndexRange range, boolean complementary) 
			throws DMLRuntimeException {
		printDecompressWarning("zeroOutOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		return tmp.zeroOutOperations(result, range, complementary);
	}
	
	@Override
	public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op,
			MatrixValue result, int blockingFactorRow, int blockingFactorCol,
			MatrixIndexes indexesIn) throws DMLRuntimeException {
		printDecompressWarning("aggregateUnaryOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		return tmp.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn);
	}

	@Override
	public CM_COV_Object cmOperations(CMOperator op) throws DMLRuntimeException {
		printDecompressWarning("cmOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		return tmp.cmOperations(op);
	}

	@Override
	public CM_COV_Object cmOperations(CMOperator op, MatrixBlock weights)
			throws DMLRuntimeException {
		printDecompressWarning("cmOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(weights);
		return left.cmOperations(op, right);
	}

	@Override
	public CM_COV_Object covOperations(COVOperator op, MatrixBlock that)
			throws DMLRuntimeException {
		printDecompressWarning("covOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(that);
		return left.covOperations(op, right);
	}

	@Override
	public CM_COV_Object covOperations(COVOperator op, MatrixBlock that, MatrixBlock weights) 
			throws DMLRuntimeException {
		printDecompressWarning("covOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right1 = getUncompressed(that);
		MatrixBlock right2 = getUncompressed(weights);
		return left.covOperations(op, right1, right2);
	}

	@Override
	public MatrixValue sortOperations(MatrixValue weights, MatrixValue result)
			throws DMLRuntimeException {
		printDecompressWarning("sortOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(weights);
		return left.sortOperations(right, result);
	}

	@Override
	public MatrixValue aggregateBinaryOperations(MatrixIndexes m1Index,
			MatrixValue m1Value, MatrixIndexes m2Index, MatrixValue m2Value,
			MatrixValue result, AggregateBinaryOperator op)
			throws DMLRuntimeException {
		printDecompressWarning("aggregateBinaryOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(m2Value);
		return left.aggregateBinaryOperations(m1Index, left, m2Index, right, result, op);
	}

	@Override
	public ScalarObject aggregateTernaryOperations(MatrixBlock m1, MatrixBlock m2, MatrixBlock m3, AggregateBinaryOperator op)
			throws DMLRuntimeException {
		printDecompressWarning("aggregateTernaryOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right1 = getUncompressed(m2);
		MatrixBlock right2 = getUncompressed(m3);
		return left.aggregateTernaryOperations(left, right1, right2, op);
	}

	@Override
	public MatrixBlock uaggouterchainOperations(MatrixBlock mbLeft, MatrixBlock mbRight, 
			MatrixBlock mbOut, BinaryOperator bOp, AggregateUnaryOperator uaggOp) 
			throws DMLRuntimeException {
		printDecompressWarning("uaggouterchainOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(mbRight);
		return left.uaggouterchainOperations(left, right, mbOut, bOp, uaggOp);
	}

	@Override
	public MatrixBlock groupedAggOperations(MatrixValue tgt, MatrixValue wghts, MatrixValue ret, int ngroups, Operator op)
			throws DMLRuntimeException {
		return groupedAggOperations(tgt, wghts, ret, ngroups, op, 1);
	}

	@Override
	public MatrixBlock groupedAggOperations(MatrixValue tgt, MatrixValue wghts,
			MatrixValue ret, int ngroups, Operator op, int k)
			throws DMLRuntimeException {
		printDecompressWarning("groupedAggOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(wghts);
		return left.groupedAggOperations(left, right, ret, ngroups, op, k);
	}

	@Override
	public MatrixBlock removeEmptyOperations(MatrixBlock ret, boolean rows, MatrixBlock select) 
			throws DMLRuntimeException {
		printDecompressWarning("removeEmptyOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		return tmp.removeEmptyOperations(ret, rows, select);
	}

	@Override
	public MatrixBlock removeEmptyOperations(MatrixBlock ret, boolean rows)
			throws DMLRuntimeException {
		printDecompressWarning("removeEmptyOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		return tmp.removeEmptyOperations(ret, rows);
	}

	@Override
	public MatrixBlock rexpandOperations(MatrixBlock ret, double max,
			boolean rows, boolean cast, boolean ignore)
			throws DMLRuntimeException {
		printDecompressWarning("rexpandOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		return tmp.rexpandOperations(ret, max, rows, cast, ignore);
	}

	@Override
	public MatrixValue replaceOperations(MatrixValue result, double pattern, double replacement) 
			throws DMLRuntimeException {
		printDecompressWarning("replaceOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		return tmp.replaceOperations(result, pattern, replacement);
	}

	@Override
	public void ternaryOperations(Operator op, double scalar,
			MatrixValue that, CTableMap resultMap, MatrixBlock resultBlock)
			throws DMLRuntimeException {
		printDecompressWarning("ternaryOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(that);
		left.ternaryOperations(op, scalar, right, resultMap, resultBlock);
	}

	@Override
	public void ternaryOperations(Operator op, double scalar,
			double scalar2, CTableMap resultMap, MatrixBlock resultBlock)
			throws DMLRuntimeException {
		printDecompressWarning("ternaryOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		tmp.ternaryOperations(op, scalar, scalar2, resultMap, resultBlock);
	}

	@Override
	public void ternaryOperations(Operator op, MatrixIndexes ix1,
			double scalar, boolean left, int brlen, CTableMap resultMap,
			MatrixBlock resultBlock) throws DMLRuntimeException {
		printDecompressWarning("ternaryOperations");
		MatrixBlock tmp = isCompressed() ? decompress() : this;
		tmp.ternaryOperations(op, ix1, scalar, left, brlen, resultMap, resultBlock);
	}

	@Override
	public void ternaryOperations(Operator op, MatrixValue that,
			double scalar, boolean ignoreZeros, CTableMap resultMap,
			MatrixBlock resultBlock) throws DMLRuntimeException {
		printDecompressWarning("ternaryOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(that);
		left.ternaryOperations(op, right, scalar, ignoreZeros, resultMap, resultBlock);
	}

	@Override
	public void ternaryOperations(Operator op, MatrixValue that, double scalar, MatrixBlock resultBlock)
			throws DMLRuntimeException {
		printDecompressWarning("ternaryOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right = getUncompressed(that);
		left.ternaryOperations(op, right, scalar, resultBlock);
	}

	@Override
	public void ternaryOperations(Operator op, MatrixValue that,
			MatrixValue that2, CTableMap resultMap)
			throws DMLRuntimeException {
		printDecompressWarning("ternaryOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right1 = getUncompressed(that);
		MatrixBlock right2 = getUncompressed(that2);
		left.ternaryOperations(op, right1, right2, resultMap);
	}

	@Override
	public void ternaryOperations(Operator op, MatrixValue that,
			MatrixValue that2, CTableMap resultMap, MatrixBlock resultBlock)
			throws DMLRuntimeException {
		printDecompressWarning("ternaryOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right1 = getUncompressed(that);
		MatrixBlock right2 = getUncompressed(that2);
		left.ternaryOperations(op, right1, right2, resultMap, resultBlock);
	}

	@Override
	public MatrixValue quaternaryOperations(QuaternaryOperator qop,
			MatrixValue um, MatrixValue vm, MatrixValue wm, MatrixValue out)
			throws DMLRuntimeException {
		return quaternaryOperations(qop, um, vm, wm, out, 1);
	}

	@Override
	public MatrixValue quaternaryOperations(QuaternaryOperator qop, MatrixValue um, 
			MatrixValue vm, MatrixValue wm, MatrixValue out, int k) 
			throws DMLRuntimeException {
		printDecompressWarning("quaternaryOperations");
		MatrixBlock left = isCompressed() ? decompress() : this;
		MatrixBlock right1 = getUncompressed(um);
		MatrixBlock right2 = getUncompressed(vm);
		MatrixBlock right3 = getUncompressed(wm);
		return left.quaternaryOperations(qop, right1, right2, right3, out, k);
	}

	@Override
	public MatrixBlock randOperationsInPlace(RandomMatrixGenerator rgen,
			long[] nnzInBlock, Well1024a bigrand, long bSeed)
			throws DMLRuntimeException {
		throw new RuntimeException("CompressedMatrixBlock: randOperationsInPlace not supported.");
	}

	@Override
	public MatrixBlock randOperationsInPlace(RandomMatrixGenerator rgen,
			long[] nnzInBlock, Well1024a bigrand, long bSeed, int k)
			throws DMLRuntimeException {
		throw new RuntimeException("CompressedMatrixBlock: randOperationsInPlace not supported.");
	}

	@Override
	public MatrixBlock seqOperationsInPlace(double from, double to, double incr)
			throws DMLRuntimeException {
		//output should always be uncompressed
		throw new RuntimeException("CompressedMatrixBlock: seqOperationsInPlace not supported.");
	}

	private static boolean isCompressed(MatrixBlock mb) {
		return (mb instanceof CompressedMatrixBlock && ((CompressedMatrixBlock)mb).isCompressed());
	}

	private static MatrixBlock getUncompressed(MatrixValue mVal) 
			throws DMLRuntimeException {
		return isCompressed((MatrixBlock)mVal) ? 
				((CompressedMatrixBlock)mVal).decompress() : 
				(MatrixBlock)mVal;
	}

	private void printDecompressWarning(String operation) {
		if( isCompressed() ) {
			LOG.warn("Operation '"+operation+"' not supported yet - decompressing for ULA operations.");
		}
	}

	private void printDecompressWarning(String operation, MatrixBlock m2) {
		if( isCompressed() || isCompressed(m2) ) {
			LOG.warn("Operation '"+operation+"' not supported yet - decompressing for ULA operations.");
		}
	}
}