org.apache.sysml.runtime.util.UtilFunctions Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Declarative Machine Learning
There is a newer version: 1.2.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.sysml.runtime.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.lang.ArrayUtils;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.matrix.MetaDataNumItemsByEachReducer;
import org.apache.sysml.runtime.matrix.data.FrameBlock;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.Pair;
import org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue;

public class UtilFunctions 
{
	//for accurate cast of double values to int and long 
	//IEEE754: binary64 (double precision) eps = 2^(-53) = 1.11 * 10^(-16)
	//(same epsilon as used for matrix index cast in R)
	public static final double DOUBLE_EPS = Math.pow(2, -53);
	
	//prime numbers for old hash function (divide prime close to max int, 
	//because it determines the max hash domain size
	public static final long ADD_PRIME1 = 99991;
	public static final int DIVIDE_PRIME = 1405695061; 
	
	public static int intHashCode(int key1, int key2) {
		return 31 * (31 + key1) + key2;
	}
	
	public static int longHashCode(long key1) {
		return (int)(key1^(key1>>>32));
	}

	/**
	 * Returns the hash code for a long-long pair. This is the default
	 * hash function for the keys of a distributed matrix in MR/Spark.
	 * 
	 * @param key1 first long key
	 * @param key2 second long key
	 * @return hash code
	 */
	public static int longHashCode(long key1, long key2) {
		//basic hash mixing of two longs hashes (similar to
		//Arrays.hashCode(long[]) but w/o array creation/copy)
		int h = 31 + (int)(key1 ^ (key1 >>> 32));
		return h*31 + (int)(key2 ^ (key2 >>> 32));
	}
	
	/**
	 * Returns the hash code for a long-long-long triple. This is the default
	 * hash function for the keys of a distributed matrix in MR/Spark.
	 * 
	 * @param key1 first long key
	 * @param key2 second long key
	 * @param key3 third long key
	 * @return hash code
	 */
	public static int longHashCode(long key1, long key2, long key3) {
		//basic hash mixing of three longs hashes (similar to
		//Arrays.hashCode(long[]) but w/o array creation/copy)
		int h1 = 31 + (int)(key1 ^ (key1 >>> 32));
		int h2 = h1*31 + (int)(key2 ^ (key2 >>> 32));
		return h2*31 + (int)(key3 ^ (key3 >>> 32));
	}
	
	public static int nextIntPow2( int in ) {
		int expon = (in==0) ? 0 : 32-Integer.numberOfLeadingZeros(in-1);
		long pow2 = pow(2, expon);
		return (int)((pow2>Integer.MAX_VALUE)?Integer.MAX_VALUE : pow2);	
	}
	
	public static long pow(int base, int exp) {
		return (base==2 && 0 <= exp && exp < 63) ?
			1L << exp : (long)Math.pow(base, exp);
	}
	
	/**
	 * Computes the 1-based block index based on the global cell index and block size meta
	 * data. See computeCellIndex for the inverse operation.
	 * 
	 * @param cellIndex global cell index
	 * @param blockSize block size
	 * @return 1-based block index
	 */
	public static long computeBlockIndex(long cellIndex, int blockSize) {
		return (cellIndex-1)/blockSize + 1;
	}
	
	/**
	 * Computes the 0-based cell-in-block index based on the global cell index and block
	 * size meta data. See computeCellIndex for the inverse operation.
	 * 
	 * @param cellIndex global cell index
	 * @param blockSize block size
	 * @return 0-based cell-in-block index
	 */
	public static int computeCellInBlock(long cellIndex, int blockSize) {
		return (int) ((cellIndex-1)%blockSize);
	}
	
	/**
	 * Computes the global 1-based cell index based on the block index, block size meta data,
	 * and specific 0-based in-block cell index.
	 * 
	 * NOTE: this is equivalent to cellIndexCalculation.
	 * 
	 * @param blockIndex block index
	 * @param blockSize block size
	 * @param cellInBlock 0-based cell-in-block index
	 * @return global 1-based cell index
	 */
	public static long computeCellIndex( long blockIndex, int blockSize, int cellInBlock ) {
		return (blockIndex-1)*blockSize + 1 + cellInBlock;
	}
	
	/**
	 * Computes the actual block size based on matrix dimension, block index, and block size
	 * meta data. For boundary blocks, the actual block size is less or equal than the block 
	 * size meta data; otherwise they are identical.  
	 *  
	 * @param len matrix dimension
	 * @param blockIndex block index
	 * @param blockSize block size metadata
	 * @return actual block size
	 */
	public static int computeBlockSize( long len, long blockIndex, long blockSize ) {
		long remain = len - (blockIndex-1)*blockSize;
		return (int)Math.min(blockSize, remain);
	}

	public static boolean isInBlockRange( MatrixIndexes ix, int brlen, int bclen, long rl, long ru, long cl, long cu )
	{
		long bRLowerIndex = (ix.getRowIndex()-1)*brlen + 1;
		long bRUpperIndex = ix.getRowIndex()*brlen;
		long bCLowerIndex = (ix.getColumnIndex()-1)*bclen + 1;
		long bCUpperIndex = ix.getColumnIndex()*bclen;
		
		if(rl > bRUpperIndex || ru < bRLowerIndex) {
			return false;
		}
		else if(cl > bCUpperIndex || cu < bCLowerIndex) {
			return false;
		}
		else {
			return true;
		}
	}

	public static boolean isInFrameBlockRange( Long ix, int brlen, long rl, long ru )
	{
		if(rl > ix+brlen-1 || ru < ix)
			return false;
		else
			return true;
	}

	public static boolean isInBlockRange( MatrixIndexes ix, int brlen, int bclen, IndexRange ixrange )
	{
		return isInBlockRange(ix, brlen, bclen, 
				ixrange.rowStart, ixrange.rowEnd, 
				ixrange.colStart, ixrange.colEnd);
	}

	public static boolean isInFrameBlockRange( Long ix, int brlen, int bclen, IndexRange ixrange )
	{
		return isInFrameBlockRange(ix, brlen, ixrange.rowStart, ixrange.rowEnd);
	}
	
	// Reused by both MR and Spark for performing zero out
	public static IndexRange getSelectedRangeForZeroOut(IndexedMatrixValue in, int blockRowFactor, int blockColFactor, IndexRange indexRange) 
	{
		IndexRange tempRange = new IndexRange(-1, -1, -1, -1);
		
		long topBlockRowIndex=UtilFunctions.computeBlockIndex(indexRange.rowStart, blockRowFactor);
		int topRowInTopBlock=UtilFunctions.computeCellInBlock(indexRange.rowStart, blockRowFactor);
		long bottomBlockRowIndex=UtilFunctions.computeBlockIndex(indexRange.rowEnd, blockRowFactor);
		int bottomRowInBottomBlock=UtilFunctions.computeCellInBlock(indexRange.rowEnd, blockRowFactor);
		
		long leftBlockColIndex=UtilFunctions.computeBlockIndex(indexRange.colStart, blockColFactor);
		int leftColInLeftBlock=UtilFunctions.computeCellInBlock(indexRange.colStart, blockColFactor);
		long rightBlockColIndex=UtilFunctions.computeBlockIndex(indexRange.colEnd, blockColFactor);
		int rightColInRightBlock=UtilFunctions.computeCellInBlock(indexRange.colEnd, blockColFactor);
		
		//no overlap
		if(in.getIndexes().getRowIndex()bottomBlockRowIndex
		   || in.getIndexes().getColumnIndex()rightBlockColIndex)
		{
			tempRange.set(-1,-1,-1,-1);
			return tempRange;
		}
		
		//get the index range inside the block
		tempRange.set(0, in.getValue().getNumRows()-1, 0, in.getValue().getNumColumns()-1);
		if(topBlockRowIndex==in.getIndexes().getRowIndex())
			tempRange.rowStart=topRowInTopBlock;
		if(bottomBlockRowIndex==in.getIndexes().getRowIndex())
			tempRange.rowEnd=bottomRowInBottomBlock;
		if(leftBlockColIndex==in.getIndexes().getColumnIndex())
			tempRange.colStart=leftColInLeftBlock;
		if(rightBlockColIndex==in.getIndexes().getColumnIndex())
			tempRange.colEnd=rightColInRightBlock;
		
		return tempRange;
	}
	
	// Reused by both MR and Spark for performing zero out
	public static IndexRange getSelectedRangeForZeroOut(Pair in, int blockRowFactor, int blockColFactor, IndexRange indexRange, long lSrcRowIndex, long lDestRowIndex) 
	{
		int iRowStart, iRowEnd, iColStart, iColEnd;
		
		if(indexRange.rowStart <= lDestRowIndex)
			iRowStart = 0;
		else
			iRowStart = (int) (indexRange.rowStart - in.getKey());
		iRowEnd = (int) Math.min(indexRange.rowEnd - lSrcRowIndex, blockRowFactor)-1;
		
		iColStart = UtilFunctions.computeCellInBlock(indexRange.colStart, blockColFactor);
		iColEnd = UtilFunctions.computeCellInBlock(indexRange.colEnd, blockColFactor);

		return  new IndexRange(iRowStart, iRowEnd, iColStart, iColEnd);
	}
	
	public static long getTotalLength(MetaDataNumItemsByEachReducer metadata) {
		long[] counts=metadata.getNumItemsArray();
		long total=0;
		for(long count: counts)
			total+=count;
		return total;
	}
	
	public static long getLengthForInterQuantile(MetaDataNumItemsByEachReducer metadata, double p)
	{
		long total = UtilFunctions.getTotalLength(metadata);
		long lpos=(long)Math.ceil(total*p);//lower bound is inclusive
		long upos=(long)Math.ceil(total*(1-p));//upper bound is inclusive
		return upos-lpos+1;
	}

	/**
	 * JDK8 floating decimal double parsing, which is generally faster
	 * than <JDK8 parseDouble and works well in multi-threaded tasks.
	 * 
	 * @param str string to parse to double
	 * @return double value
	 */
	public static double parseToDouble(String str)
	{
		//return FloatingDecimal.parseDouble(str);
    	return Double.parseDouble(str);
	}
	
	public static int parseToInt( String str )
	{
		int ret = -1;
		if( str.contains(".") )
			ret = toInt( Double.parseDouble(str) );
		else
			ret = Integer.parseInt(str);
		return ret;
	}
	
	public static long parseToLong( String str )
	{
		long ret = -1;
		if( str.contains(".") )
			ret = toLong( Double.parseDouble(str) );
		else
			ret = Long.parseLong(str);
		return ret;
	}
	
	public static int toInt( double val ) {
		return (int) Math.floor( val + DOUBLE_EPS );
	}
	
	public static long toLong( double val ) {
		return (long) Math.floor( val + DOUBLE_EPS );
	}
	
	public static int toInt(Object obj) {
		return (obj instanceof Long) ?
			((Long)obj).intValue() : ((Integer)obj).intValue();
	}
	
	public static float[] toFloat(double[] data) {
		float[] ret = new float[data.length];
		for( int i=0; i to && incr > 0) || (from < to && incr < 0)) ) {
			throw new RuntimeException("Invalid seq parameters: ("+from+", "+to+", "+incr+")");
		}
		return 1L + (long) Math.floor(to/incr - from/incr);
	}
	
	/**
	 * Obtain sequence list
	 * 
	 * @param low   lower bound (inclusive)
	 * @param up    upper bound (inclusive)
	 * @param incr  increment 
	 * @return list of integers
	 */
	public static List getSeqList(int low, int up, int incr) {
		ArrayList ret = new ArrayList<>();
		for( int i=low; i<=up; i+=incr )
			ret.add(i);
		return ret;
	}
	
	/**
	 * Obtain sequence array
	 * 
	 * @param low   lower bound (inclusive)
	 * @param up    upper bound (inclusive)
	 * @param incr  increment 
	 * @return array of integers
	 */
	public static int[] getSeqArray(int low, int up, int incr) {
		int len = (int) getSeqLength(low, up, incr);
		int[] ret = new int[len];
		for( int i=0, val=low; i 57 )
				return false;
		return true;
	}

	public static byte max( byte[] array )
	{
		byte ret = Byte.MIN_VALUE;
		for( int i=0; iret)?array[i]:ret;
		return ret;	
	}
	
	public static String unquote(String s) {
		if (s != null
				&& s.length() >=2 && ((s.startsWith("\"") && s.endsWith("\"")) 
					|| (s.startsWith("'") && s.endsWith("'")))) {
			s = s.substring(1, s.length() - 1);
		}
		return s;
	}
	
	public static String quote(String s) {
		return "\"" + s + "\"";
	}

	/**
	 * Parses a memory size with optional g/m/k quantifiers into its
	 * number representation.
	 * 
	 * @param arg memory size as readable string
	 * @return byte count of memory size
	 */
	public static long parseMemorySize(String arg) {
		if ( arg.endsWith("g") || arg.endsWith("G") )
			return Long.parseLong(arg.substring(0,arg.length()-1)) * 1024 * 1024 * 1024;
		else if ( arg.endsWith("m") || arg.endsWith("M") )
			return Long.parseLong(arg.substring(0,arg.length()-1)) * 1024 * 1024;
		else if( arg.endsWith("k") || arg.endsWith("K") )
			return Long.parseLong(arg.substring(0,arg.length()-1)) * 1024;
		else 
			return Long.parseLong(arg.substring(0,arg.length()));
	}
	
	/**
	 * Format a memory size with g/m/k quantifiers into its
	 * number representation.
	 * 
	 * @param arg byte count of memory size
	 * @return memory size as readable string
	 */
	public static String formatMemorySize(long arg) {
		if (arg >= 1024 * 1024 * 1024)
			return String.format("%d GB", arg/(1024*1024*1024));
		else if (arg >= 1024 * 1024)
			return String.format("%d MB", arg/(1024*1024));
		else if (arg >= 1024)
			return String.format("%d KB", arg/(1024));
		else
			return String.format("%d", arg);
	}

	public static double getDouble(Object obj) {
		return (obj instanceof Double) ? (Double)obj :
			Double.parseDouble(obj.toString());
	}

	public static boolean isNonZero(Object obj) {
		if( obj instanceof Double ) 
			return ((Double) obj) != 0;
		else {
			//avoid expensive double parsing
			String sobj = obj.toString();
			return (!sobj.equals("0") && !sobj.equals("0.0"));
		}
	}
	
	public static int computeNnz(double[] a, int ai, int len) {
		int lnnz = 0;
		for( int i=ai; i List asList(List... inputs) {
		List ret = new ArrayList<>();
		for( List list : inputs )
			ret.addAll(list);
		return ret;
	}
	
	@SafeVarargs
	public static  Set asSet(List... inputs) {
		Set ret = new HashSet<>();
		for( List list : inputs )
			ret.addAll(list);
		return ret;
	}
	
	@SafeVarargs
	public static  Set asSet(T[]... inputs) {
		Set ret = new HashSet<>();
		for( T[] input : inputs )
			for( T element : input )
				ret.add(element);
		return ret;
	}
	
	@SafeVarargs
	public static  Set asSet(T... inputs) {
		Set ret = new HashSet<>();
		for( T element : inputs )
			ret.add(element);
		return ret;
	}
}