org.apache.sysml.runtime.util.UtilFunctions Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.util;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.ArrayUtils;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.matrix.MetaDataNumItemsByEachReducer;
import org.apache.sysml.runtime.matrix.data.FrameBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.Pair;
import org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue;
public class UtilFunctions
{
//for accurate cast of double values to int and long
//IEEE754: binary64 (double precision) eps = 2^(-53) = 1.11 * 10^(-16)
//(same epsilon as used for matrix index cast in R)
public static final double DOUBLE_EPS = Math.pow(2, -53);
//prime numbers for old hash function (divide prime close to max int,
//because it determines the max hash domain size
public static final long ADD_PRIME1 = 99991;
public static final int DIVIDE_PRIME = 1405695061;
public static int intHashCode(int key1, int key2) {
return 31 * (31 + key1) + key2;
}
public static int longHashCode(long key1) {
return (int)(key1^(key1>>>32));
}
/**
* Returns the hash code for a long-long pair. This is the default
* hash function for the keys of a distributed matrix in MR/Spark.
*
* @param key1 first long key
* @param key2 second long key
* @return hash code
*/
public static int longHashCode(long key1, long key2) {
//basic hash mixing of two longs hashes (similar to
//Arrays.hashCode(long[]) but w/o array creation/copy)
int h = 31 + (int)(key1 ^ (key1 >>> 32));
return h*31 + (int)(key2 ^ (key2 >>> 32));
}
/**
* Returns the hash code for a long-long-long triple. This is the default
* hash function for the keys of a distributed matrix in MR/Spark.
*
* @param key1 first long key
* @param key2 second long key
* @param key3 third long key
* @return hash code
*/
public static int longHashCode(long key1, long key2, long key3) {
//basic hash mixing of three longs hashes (similar to
//Arrays.hashCode(long[]) but w/o array creation/copy)
int h1 = 31 + (int)(key1 ^ (key1 >>> 32));
int h2 = h1*31 + (int)(key2 ^ (key2 >>> 32));
return h2*31 + (int)(key3 ^ (key3 >>> 32));
}
public static int nextIntPow2( int in ) {
int expon = (in==0) ? 0 : 32-Integer.numberOfLeadingZeros(in-1);
long pow2 = pow(2, expon);
return (int)((pow2>Integer.MAX_VALUE)?Integer.MAX_VALUE : pow2);
}
public static long pow(int base, int exp) {
return (base==2 && 0 <= exp && exp < 63) ?
1L << exp : (long)Math.pow(base, exp);
}
/**
* Computes the 1-based block index based on the global cell index and block size meta
* data. See computeCellIndex for the inverse operation.
*
* @param cellIndex global cell index
* @param blockSize block size
* @return 1-based block index
*/
public static long computeBlockIndex(long cellIndex, int blockSize) {
return (cellIndex-1)/blockSize + 1;
}
/**
* Computes the 0-based cell-in-block index based on the global cell index and block
* size meta data. See computeCellIndex for the inverse operation.
*
* @param cellIndex global cell index
* @param blockSize block size
* @return 0-based cell-in-block index
*/
public static int computeCellInBlock(long cellIndex, int blockSize) {
return (int) ((cellIndex-1)%blockSize);
}
/**
* Computes the global 1-based cell index based on the block index, block size meta data,
* and specific 0-based in-block cell index.
*
* NOTE: this is equivalent to cellIndexCalculation.
*
* @param blockIndex block index
* @param blockSize block size
* @param cellInBlock 0-based cell-in-block index
* @return global 1-based cell index
*/
public static long computeCellIndex( long blockIndex, int blockSize, int cellInBlock ) {
return (blockIndex-1)*blockSize + 1 + cellInBlock;
}
/**
* Computes the actual block size based on matrix dimension, block index, and block size
* meta data. For boundary blocks, the actual block size is less or equal than the block
* size meta data; otherwise they are identical.
*
* @param len matrix dimension
* @param blockIndex block index
* @param blockSize block size metadata
* @return actual block size
*/
public static int computeBlockSize( long len, long blockIndex, long blockSize ) {
long remain = len - (blockIndex-1)*blockSize;
return (int)Math.min(blockSize, remain);
}
public static boolean isInBlockRange( MatrixIndexes ix, int brlen, int bclen, long rl, long ru, long cl, long cu )
{
long bRLowerIndex = (ix.getRowIndex()-1)*brlen + 1;
long bRUpperIndex = ix.getRowIndex()*brlen;
long bCLowerIndex = (ix.getColumnIndex()-1)*bclen + 1;
long bCUpperIndex = ix.getColumnIndex()*bclen;
if(rl > bRUpperIndex || ru < bRLowerIndex) {
return false;
}
else if(cl > bCUpperIndex || cu < bCLowerIndex) {
return false;
}
else {
return true;
}
}
public static boolean isInFrameBlockRange( Long ix, int brlen, long rl, long ru )
{
if(rl > ix+brlen-1 || ru < ix)
return false;
else
return true;
}
public static boolean isInBlockRange( MatrixIndexes ix, int brlen, int bclen, IndexRange ixrange )
{
return isInBlockRange(ix, brlen, bclen,
ixrange.rowStart, ixrange.rowEnd,
ixrange.colStart, ixrange.colEnd);
}
public static boolean isInFrameBlockRange( Long ix, int brlen, int bclen, IndexRange ixrange )
{
return isInFrameBlockRange(ix, brlen, ixrange.rowStart, ixrange.rowEnd);
}
// Reused by both MR and Spark for performing zero out
public static IndexRange getSelectedRangeForZeroOut(IndexedMatrixValue in, int blockRowFactor, int blockColFactor, IndexRange indexRange)
{
IndexRange tempRange = new IndexRange(-1, -1, -1, -1);
long topBlockRowIndex=UtilFunctions.computeBlockIndex(indexRange.rowStart, blockRowFactor);
int topRowInTopBlock=UtilFunctions.computeCellInBlock(indexRange.rowStart, blockRowFactor);
long bottomBlockRowIndex=UtilFunctions.computeBlockIndex(indexRange.rowEnd, blockRowFactor);
int bottomRowInBottomBlock=UtilFunctions.computeCellInBlock(indexRange.rowEnd, blockRowFactor);
long leftBlockColIndex=UtilFunctions.computeBlockIndex(indexRange.colStart, blockColFactor);
int leftColInLeftBlock=UtilFunctions.computeCellInBlock(indexRange.colStart, blockColFactor);
long rightBlockColIndex=UtilFunctions.computeBlockIndex(indexRange.colEnd, blockColFactor);
int rightColInRightBlock=UtilFunctions.computeCellInBlock(indexRange.colEnd, blockColFactor);
//no overlap
if(in.getIndexes().getRowIndex()bottomBlockRowIndex
|| in.getIndexes().getColumnIndex()rightBlockColIndex)
{
tempRange.set(-1,-1,-1,-1);
return tempRange;
}
//get the index range inside the block
tempRange.set(0, in.getValue().getNumRows()-1, 0, in.getValue().getNumColumns()-1);
if(topBlockRowIndex==in.getIndexes().getRowIndex())
tempRange.rowStart=topRowInTopBlock;
if(bottomBlockRowIndex==in.getIndexes().getRowIndex())
tempRange.rowEnd=bottomRowInBottomBlock;
if(leftBlockColIndex==in.getIndexes().getColumnIndex())
tempRange.colStart=leftColInLeftBlock;
if(rightBlockColIndex==in.getIndexes().getColumnIndex())
tempRange.colEnd=rightColInRightBlock;
return tempRange;
}
// Reused by both MR and Spark for performing zero out
public static IndexRange getSelectedRangeForZeroOut(Pair in, int blockRowFactor, int blockColFactor, IndexRange indexRange, long lSrcRowIndex, long lDestRowIndex)
{
int iRowStart, iRowEnd, iColStart, iColEnd;
if(indexRange.rowStart <= lDestRowIndex)
iRowStart = 0;
else
iRowStart = (int) (indexRange.rowStart - in.getKey());
iRowEnd = (int) Math.min(indexRange.rowEnd - lSrcRowIndex, blockRowFactor)-1;
iColStart = UtilFunctions.computeCellInBlock(indexRange.colStart, blockColFactor);
iColEnd = UtilFunctions.computeCellInBlock(indexRange.colEnd, blockColFactor);
return new IndexRange(iRowStart, iRowEnd, iColStart, iColEnd);
}
public static long getTotalLength(MetaDataNumItemsByEachReducer metadata) {
long[] counts=metadata.getNumItemsArray();
long total=0;
for(long count: counts)
total+=count;
return total;
}
public static long getLengthForInterQuantile(MetaDataNumItemsByEachReducer metadata, double p)
{
long total = UtilFunctions.getTotalLength(metadata);
long lpos=(long)Math.ceil(total*p);//lower bound is inclusive
long upos=(long)Math.ceil(total*(1-p));//upper bound is inclusive
return upos-lpos+1;
}
/**
* JDK8 floating decimal double parsing, which is generally faster
* than <JDK8 parseDouble and works well in multi-threaded tasks.
*
* @param str string to parse to double
* @return double value
*/
public static double parseToDouble(String str)
{
//return FloatingDecimal.parseDouble(str);
return Double.parseDouble(str);
}
public static int parseToInt( String str )
{
int ret = -1;
if( str.contains(".") )
ret = toInt( Double.parseDouble(str) );
else
ret = Integer.parseInt(str);
return ret;
}
public static long parseToLong( String str )
{
long ret = -1;
if( str.contains(".") )
ret = toLong( Double.parseDouble(str) );
else
ret = Long.parseLong(str);
return ret;
}
public static int toInt( double val ) {
return (int) Math.floor( val + DOUBLE_EPS );
}
public static long toLong( double val ) {
return (long) Math.floor( val + DOUBLE_EPS );
}
public static int toInt(Object obj) {
return (obj instanceof Long) ?
((Long)obj).intValue() : ((Integer)obj).intValue();
}
public static float[] toFloat(double[] data) {
float[] ret = new float[data.length];
for( int i=0; i to && incr > 0) || (from < to && incr < 0)) ) {
throw new RuntimeException("Invalid seq parameters: ("+from+", "+to+", "+incr+")");
}
return 1L + (long) Math.floor(to/incr - from/incr);
}
/**
* Obtain sequence list
*
* @param low lower bound (inclusive)
* @param up upper bound (inclusive)
* @param incr increment
* @return list of integers
*/
public static List getSeqList(int low, int up, int incr) {
ArrayList ret = new ArrayList<>();
for( int i=low; i<=up; i+=incr )
ret.add(i);
return ret;
}
/**
* Obtain sequence array
*
* @param low lower bound (inclusive)
* @param up upper bound (inclusive)
* @param incr increment
* @return array of integers
*/
public static int[] getSeqArray(int low, int up, int incr) {
int len = (int) getSeqLength(low, up, incr);
int[] ret = new int[len];
for( int i=0, val=low; i 57 )
return false;
return true;
}
public static byte max( byte[] array )
{
byte ret = Byte.MIN_VALUE;
for( int i=0; iret)?array[i]:ret;
return ret;
}
public static String unquote(String s) {
if (s != null
&& s.length() >=2 && ((s.startsWith("\"") && s.endsWith("\""))
|| (s.startsWith("'") && s.endsWith("'")))) {
s = s.substring(1, s.length() - 1);
}
return s;
}
public static String quote(String s) {
return "\"" + s + "\"";
}
/**
* Parses a memory size with optional g/m/k quantifiers into its
* number representation.
*
* @param arg memory size as readable string
* @return byte count of memory size
*/
public static long parseMemorySize(String arg) {
if ( arg.endsWith("g") || arg.endsWith("G") )
return Long.parseLong(arg.substring(0,arg.length()-1)) * 1024 * 1024 * 1024;
else if ( arg.endsWith("m") || arg.endsWith("M") )
return Long.parseLong(arg.substring(0,arg.length()-1)) * 1024 * 1024;
else if( arg.endsWith("k") || arg.endsWith("K") )
return Long.parseLong(arg.substring(0,arg.length()-1)) * 1024;
else
return Long.parseLong(arg.substring(0,arg.length()));
}
/**
* Format a memory size with g/m/k quantifiers into its
* number representation.
*
* @param arg byte count of memory size
* @return memory size as readable string
*/
public static String formatMemorySize(long arg) {
if (arg >= 1024 * 1024 * 1024)
return String.format("%d GB", arg/(1024*1024*1024));
else if (arg >= 1024 * 1024)
return String.format("%d MB", arg/(1024*1024));
else if (arg >= 1024)
return String.format("%d KB", arg/(1024));
else
return String.format("%d", arg);
}
public static double getDouble(Object obj) {
return (obj instanceof Double) ? (Double)obj :
Double.parseDouble(obj.toString());
}
public static boolean isNonZero(Object obj) {
if( obj instanceof Double )
return ((Double) obj) != 0;
else {
//avoid expensive double parsing
String sobj = obj.toString();
return (!sobj.equals("0") && !sobj.equals("0.0"));
}
}
public static int computeNnz(double[] a, int ai, int len) {
int lnnz = 0;
for( int i=ai; i List asList(List... inputs) {
List ret = new ArrayList<>();
for( List list : inputs )
ret.addAll(list);
return ret;
}
@SafeVarargs
public static Set asSet(List... inputs) {
Set ret = new HashSet<>();
for( List list : inputs )
ret.addAll(list);
return ret;
}
@SafeVarargs
public static Set asSet(T[]... inputs) {
Set ret = new HashSet<>();
for( T[] input : inputs )
for( T element : input )
ret.add(element);
return ret;
}
@SafeVarargs
public static Set asSet(T... inputs) {
Set ret = new HashSet<>();
for( T element : inputs )
ret.add(element);
return ret;
}
}