org.apache.sysml.runtime.compress.ColGroupRLE Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Declarative Machine Learning
There is a newer version: 1.2.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.compress;

import java.util.Arrays;
import java.util.Iterator;

import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.compress.utils.ConverterUtils;
import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
import org.apache.sysml.runtime.functionobjects.Builtin;
import org.apache.sysml.runtime.functionobjects.KahanFunction;
import org.apache.sysml.runtime.functionobjects.KahanPlus;
import org.apache.sysml.runtime.functionobjects.KahanPlusSq;
import org.apache.sysml.runtime.functionobjects.ReduceAll;
import org.apache.sysml.runtime.functionobjects.ReduceCol;
import org.apache.sysml.runtime.functionobjects.ReduceRow;
import org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode;
import org.apache.sysml.runtime.instructions.cp.KahanObject;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.Pair;
import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator;
import org.apache.sysml.runtime.matrix.operators.ScalarOperator;


/** A group of columns compressed with a single run-length encoded bitmap. */
public class ColGroupRLE extends ColGroupBitmap 
{
	private static final long serialVersionUID = 7450232907594748177L;

	public ColGroupRLE() {
		super(CompressionType.RLE_BITMAP);
	}
	
	/**
	 * Main constructor. Constructs and stores the necessary bitmaps.
	 * 
	 * @param colIndices
	 *            indices (within the block) of the columns included in this
	 *            column
	 * @param numRows
	 *            total number of rows in the parent block
	 * @param ubm
	 *            Uncompressed bitmap representation of the block
	 */
	public ColGroupRLE(int[] colIndices, int numRows, UncompressedBitmap ubm) 
	{
		super(CompressionType.RLE_BITMAP, colIndices, numRows, ubm);
		
		// compress the bitmaps
		final int numVals = ubm.getNumValues();
		char[][] lbitmaps = new char[numVals][];
		int totalLen = 0;
		for( int k=0; k getDecodeIterator(int k) {
		return new BitmapDecoderRLE(_data, _ptr[k], len(k)); 
	}
	
	@Override
	public void decompressToBlock(MatrixBlock target, int rl, int ru) 
	{
		if( LOW_LEVEL_OPT && getNumValues() > 1 )
		{
			final int blksz = 128 * 1024;
			final int numCols = getNumCols();
			final int numVals = getNumValues();
			
			//position and start offset arrays
			int[] astart = new int[numVals];
			int[] apos = skipScan(numVals, rl, astart);
			
			//cache conscious append via horizontal scans 
			for( int bi=rl; bi 1 )
		{
			final int blksz = 128 * 1024;
			final int numCols = getNumCols();
			final int numVals = getNumValues();
			final int n = getNumRows();
			
			//position and start offset arrays
			int[] apos = new int[numVals];
			int[] astart = new int[numVals];
			int[] cix = new int[numCols];
			
			//prepare target col indexes
			for( int j=0; j= blen ) 
						continue;
					int start = astart[k];
					for( ; bix 1 )
		{
			final int blksz = 128 * 1024;
			final int numCols = getNumCols();
			final int numVals = getNumValues();
			final int n = getNumRows();
			double[] c = target.getDenseBlock();
			
			//position and start offset arrays
			int[] apos = new int[numVals];
			int[] astart = new int[numVals];
			
			//cache conscious append via horizontal scans 
			for( int bi=0; bi= blen ) 
						continue;
					int start = astart[k];
					for( ; bix 1 
			&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ )
		{
			//L3 cache alignment, see comment rightMultByVector OLE column group 
			//core difference of RLE to OLE is that runs are not segment alignment,
			//which requires care of handling runs crossing cache-buckets
			final int blksz = ColGroupBitmap.WRITE_CACHE_BLKSZ; 
			
			//step 1: prepare position and value arrays
			
			//current pos / values per RLE list
			int[] astart = new int[numVals];
			int[] apos = skipScan(numVals, rl, astart);
			double[] aval = preaggValues(numVals, sb);
			
			//step 2: cache conscious matrix-vector via horizontal scans 
			for( int bi=rl; bi= bimax)
							break;
						start += lstart + llen;
						bix += 2;
					}
					
					apos[k] = bix;	
					astart[k] = start;
				}
			}
		}
		else
		{
			for (int k = 0; k < numVals; k++) {
				int boff = _ptr[k];
				int blen = len(k);
				double val = sumValues(k, sb);
				int bix = 0;
				int start = 0;
				
				//scan to beginning offset if necessary 
				if( rl > 0 ) { //rl aligned with blksz	
					while( bix= rl )
							break;
						start += lstart + llen;
						bix += 2;
					}
				}
				
				//compute partial results, not aligned
				while( bix= ru)
						break;
					start += lstart + llen;
					bix += 2;
				}
			}
		}
	}

	@Override
	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result)
			throws DMLRuntimeException 
	{		
		double[] a = ConverterUtils.getDenseVector(vector);
		double[] c = result.getDenseBlock();
		final int numCols = getNumCols();
		final int numVals = getNumValues();
		final int n = getNumRows();
		
		if( LOW_LEVEL_OPT && numVals > 1 
			&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ ) 
		{
			final int blksz = ColGroupBitmap.READ_CACHE_BLKSZ; 
			
			//step 1: prepare position and value arrays
			
			//current pos per OLs / output values
			int[] apos = new int[numVals];
			int[] astart = new int[numVals];
			double[] cvals = new double[numVals];
			
			//step 2: cache conscious matrix-vector via horizontal scans 
			for( int ai=0; ai tmp = skipScanVal(k, rl);
				int bix = tmp.getKey();
				int curRunStartOff = tmp.getValue();
				int curRunEnd = tmp.getValue();
				for ( ; bix tmp = skipScanVal(k, rl);
			int bix = tmp.getKey();
			int curRunStartOff = tmp.getValue();
			int curRunEnd = tmp.getValue();
			for(; bix < blen && curRunEnd < ru; bix+=2) {
				curRunStartOff = curRunEnd + _data[boff+bix];
				curRunEnd = curRunStartOff + _data[boff+bix+1];
				for (int rix=curRunStartOff; rix 0 ) { //rl aligned with blksz	
			for (int k = 0; k < numVals; k++) {
				int boff = _ptr[k];
				int blen = len(k);
				int bix = 0;
				int start = 0;
				while( bix= rl )
						break;
					start += lstart + llen;
					bix += 2;
				}
				apos[k] = bix;
				astart[k] = start;
			}
		}
		
		return apos;
	}

	private Pair skipScanVal(int k, int rl) {
		int apos = 0; 
		int astart = 0;
		
		if( rl > 0 ) { //rl aligned with blksz	
			int boff = _ptr[k];
			int blen = len(k);
			int bix = 0;
			int start = 0;
			while( bix= rl )
					break;
				start += lstart + llen;
				bix += 2;
			}
			apos = bix;
			astart = start;
		}
		
		return new Pair(apos, astart);
	}
}