org.apache.sysml.runtime.compress.ColGroupRLE Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.compress;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.compress.utils.ConverterUtils;
import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
import org.apache.sysml.runtime.functionobjects.Builtin;
import org.apache.sysml.runtime.functionobjects.KahanFunction;
import org.apache.sysml.runtime.functionobjects.KahanPlus;
import org.apache.sysml.runtime.instructions.cp.KahanObject;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.Pair;
import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
/** A group of columns compressed with a single run-length encoded bitmap. */
public class ColGroupRLE extends ColGroupOffset
{
private static final long serialVersionUID = 7450232907594748177L;
private static final Log LOG = LogFactory.getLog(ColGroupRLE.class.getName());
public ColGroupRLE() {
super();
}
/**
* Main constructor. Constructs and stores the necessary bitmaps.
*
* @param colIndices
* indices (within the block) of the columns included in this
* column
* @param numRows
* total number of rows in the parent block
* @param ubm
* Uncompressed bitmap representation of the block
*/
public ColGroupRLE(int[] colIndices, int numRows, UncompressedBitmap ubm)
{
super(colIndices, numRows, ubm);
// compress the bitmaps
final int numVals = ubm.getNumValues();
char[][] lbitmaps = new char[numVals][];
int totalLen = 0;
for( int k=0; k ucSize )
LOG.warn("RLE group larger than UC dense: "+estimateInMemorySize()+" "+ucSize);
}
public ColGroupRLE(int[] colIndices, int numRows, boolean zeros, double[] values, char[] bitmaps, int[] bitmapOffs) {
super(colIndices, numRows, zeros, values);
_data = bitmaps;
_ptr = bitmapOffs;
}
@Override
public CompressionType getCompType() {
return CompressionType.RLE_BITMAP;
}
@Override
public void decompressToBlock(MatrixBlock target, int rl, int ru)
{
if( LOW_LEVEL_OPT && getNumValues() > 1 )
{
final int blksz = 128 * 1024;
final int numCols = getNumCols();
final int numVals = getNumValues();
//position and start offset arrays
int[] astart = new int[numVals];
int[] apos = skipScan(numVals, rl, astart);
//cache conscious append via horizontal scans
for( int bi=rl; bi 1 )
{
final int blksz = 128 * 1024;
final int numCols = getNumCols();
final int numVals = getNumValues();
final int n = getNumRows();
//position and start offset arrays
int[] apos = new int[numVals];
int[] astart = new int[numVals];
int[] cix = new int[numCols];
//prepare target col indexes
for( int j=0; j= blen )
continue;
int start = astart[k];
for( ; bix= blen )
continue;
int start = astart[k];
for( ; bix tmp = skipScanVal(k, rl);
int bix = tmp.getKey();
int curRunStartOff = tmp.getValue();
int curRunEnd = tmp.getValue();
int count = 0;
for ( ; bix 1
&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ )
{
//L3 cache alignment, see comment rightMultByVector OLE column group
//core difference of RLE to OLE is that runs are not segment alignment,
//which requires care of handling runs crossing cache-buckets
final int blksz = ColGroupOffset.WRITE_CACHE_BLKSZ;
//step 1: prepare position and value arrays
//current pos / values per RLE list
int[] astart = new int[numVals];
int[] apos = skipScan(numVals, rl, astart);
double[] aval = preaggValues(numVals, sb);
//step 2: cache conscious matrix-vector via horizontal scans
for( int bi=rl; bi= bimax)
break;
start += lstart + llen;
bix += 2;
}
apos[k] = bix;
astart[k] = start;
}
}
}
else
{
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
double val = sumValues(k, sb);
int bix = 0;
int start = 0;
//scan to beginning offset if necessary
if( rl > 0 ) { //rl aligned with blksz
while( bix= rl )
break;
start += lstart + llen;
bix += 2;
}
}
//compute partial results, not aligned
while( bix= ru)
break;
start += lstart + llen;
bix += 2;
}
}
}
}
@Override
public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result)
throws DMLRuntimeException
{
double[] a = ConverterUtils.getDenseVector(vector);
double[] c = result.getDenseBlock();
final int numCols = getNumCols();
final int numVals = getNumValues();
final int n = getNumRows();
if( LOW_LEVEL_OPT && numVals > 1
&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ )
{
final int blksz = ColGroupOffset.READ_CACHE_BLKSZ;
//step 1: prepare position and value arrays
//current pos per OLs / output values
int[] astart = new int[numVals];
int[] apos = allocIVector(numVals, true);
double[] cvals = allocDVector(numVals, true);
//step 2: cache conscious matrix-vector via horizontal scans
for( int ai=0; ai 1
&& _numRows > BitmapEncoder.BITMAP_BLOCK_SZ )
{
final int blksz = ColGroupOffset.WRITE_CACHE_BLKSZ/2;
//step 1: prepare position and value arrays
//current pos / values per RLE list
int[] astart = new int[numVals];
int[] apos = skipScan(numVals, rl, astart);
double[] aval = sumAllValues(kplus, kbuff, false);
//step 2: cache conscious matrix-vector via horizontal scans
for( int bi=rl; bi= bimax)
break;
start += lstart + llen;
bix += 2;
}
apos[k] = bix;
astart[k] = start;
}
}
}
else
{
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
double val = sumValues(k, kplus, kbuff);
if (val != 0.0) {
Pair tmp = skipScanVal(k, rl);
int bix = tmp.getKey();
int curRunStartOff = tmp.getValue();
int curRunEnd = tmp.getValue();
for ( ; bix tmp = skipScanVal(k, rl);
int bix = tmp.getKey();
int curRunStartOff = tmp.getValue();
int curRunEnd = tmp.getValue();
for(; bix < blen && curRunEnd < ru; bix+=2) {
curRunStartOff = curRunEnd + _data[boff+bix];
curRunEnd = curRunStartOff + _data[boff+bix+1];
for (int rix=curRunStartOff; rix 0 ) { //rl aligned with blksz
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
int bix = 0;
int start = 0;
while( bix= rl )
break;
start += lstart + llen;
bix += 2;
}
apos[k] = bix;
astart[k] = start;
}
}
return apos;
}
private Pair skipScanVal(int k, int rl) {
int apos = 0;
int astart = 0;
if( rl > 0 ) { //rl aligned with blksz
int boff = _ptr[k];
int blen = len(k);
int bix = 0;
int start = 0;
while( bix= rl )
break;
start += lstart + llen;
bix += 2;
}
apos = bix;
astart = start;
}
return new Pair(apos, astart);
}
@Override
public Iterator getIterator(int k) {
return new RLEValueIterator(k, 0, getNumRows());
}
@Override
public Iterator getIterator(int k, int rl, int ru) {
return new RLEValueIterator(k, rl, ru);
}
private class RLEValueIterator implements Iterator
{
private final int _ru;
private final int _boff;
private final int _blen;
private int _bix;
private int _start;
private int _rpos;
public RLEValueIterator(int k, int rl, int ru) {
_ru = ru;
_boff = _ptr[k];
_blen = len(k);
_bix = 0;
_start = 0; //init first run
_rpos = _data[_boff+_bix];
while( _rpos < rl )
nextRowOffset();
}
@Override
public boolean hasNext() {
return (_rpos < _ru);
}
@Override
public Integer next() {
if( !hasNext() )
throw new RuntimeException("No more RLE entries.");
int ret = _rpos;
nextRowOffset();
return ret;
}
private void nextRowOffset() {
if( !hasNext() )
return;
//get current run information
int lstart = _data[_boff + _bix]; //start
int llen = _data[_boff + _bix + 1]; //len
//advance to next run if necessary
if( _rpos - _start - lstart + 1 >= llen ) {
_start += lstart + llen;
_bix +=2;
_rpos = (_bix>=_blen) ? _ru :
_start + _data[_boff + _bix];
}
//increment row index within run
else {
_rpos++;
}
}
}
}