All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nd4j.linalg.api.ops.executioner.OpExecutionerUtil Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
package org.nd4j.linalg.api.ops.executioner;

import lombok.AllArgsConstructor;
import lombok.Data;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.util.ArrayUtil;

import java.util.Arrays;

/**Utility functions for the DefaultOpExecutioner
 * @author Alex Black
 */
public class OpExecutionerUtil {

    private OpExecutionerUtil() {
    }

    /** Can we do the op (X = Op(X)) directly on the arrays without breaking X up into 1d tensors first?
     * In general, this is possible if the elements of X are contiguous in the buffer, OR if every element
     * of X is at position offset+i*elementWiseStride in the buffer
     * */
    public static boolean canDoOpDirectly(INDArray x) {
        if(x.elementWiseStride() < 1)
            return false;
        if(x.isVector()) return true;

        //For a single NDArray all we require is that the elements are contiguous in the buffer or every nth element

        //Full buffer -> implies all elements are contiguous (and match)
        long l1 = x.lengthLong();
        long dl1 = x.data().length();
        if(l1 == dl1)
            return true;

        //Strides are same as a zero offset NDArray -> all elements are contiguous (even if not offset 0)
        int[] shape1 = x.shape();
        int[] stridesAsInit = (x.ordering()=='c' ? ArrayUtil.calcStrides(shape1) : ArrayUtil.calcStridesFortran(shape1));
        boolean stridesSameAsInit = Arrays.equals(x.stride(), stridesAsInit);
        return stridesSameAsInit;
    }

    /** Can we do the transform op (X = Op(X,Y)) directly on the arrays without breaking them up into 1d tensors first? */
    public static boolean canDoOpDirectly(INDArray x, INDArray y){
        if(x.isVector()) return true;
        if(x.ordering() != y.ordering()) return false; //other than vectors, elements in f vs. c NDArrays will never line up
        if(x.elementWiseStride() < 1 || y.elementWiseStride() < 1)
            return false;
        //Full buffer + matching strides -> implies all elements are contiguous (and match)
        //Need strides to match, otherwise elements in buffer won't line up (i.e., c vs. f order arrays)
        long l1 = x.lengthLong();
        long dl1 = x.data().length();
        long l2 = y.lengthLong();
        long dl2 = y.data().length();
        int[] strides1 = x.stride();
        int[] strides2 = y.stride();
        boolean equalStrides = Arrays.equals(strides1, strides2);
        if(l1 == dl1 && l2 == dl2 && equalStrides)
            return true;

        //Strides match + are same as a zero offset NDArray -> all elements are contiguous (and match)
        if(equalStrides) {
            int[] shape1 = x.shape();
            int[] stridesAsInit = (x.ordering()=='c' ? ArrayUtil.calcStrides(shape1) : ArrayUtil.calcStridesFortran(shape1));
            boolean stridesSameAsInit = Arrays.equals(strides1, stridesAsInit);
            return stridesSameAsInit;
        }

        return false;
    }

    /** Can we do the transform op (Z = Op(X,Y)) directly on the arrays without breaking them up into 1d tensors first? */
    public static boolean canDoOpDirectly(INDArray x, INDArray y, INDArray z){
        if(x.isVector()) return true;
        if(x.ordering() != y.ordering() || x.ordering() != z.ordering() ) return false; //other than vectors, elements in f vs. c NDArrays will never line up
        if(x.elementWiseStride() < 1 || y.elementWiseStride() < 1)
            return false;
        //Full buffer + matching strides -> implies all elements are contiguous (and match)
        long l1 = x.lengthLong();
        long dl1 = x.data().length();
        long l2 = y.lengthLong();
        long dl2 = y.data().length();
        long l3 = z.lengthLong();
        long dl3 = z.data().length();
        int[] strides1 = x.stride();
        int[] strides2 = y.stride();
        int[] strides3 = z.stride();
        boolean equalStrides = Arrays.equals(strides1, strides2) && Arrays.equals(strides1,strides3);
        if(l1 == dl1 && l2 == dl2 && l3 == dl3 && equalStrides)
            return true;

        //Strides match + are same as a zero offset NDArray -> all elements are contiguous (and match)
        if(equalStrides) {
            int[] shape1 = x.shape();
            int[] stridesAsInit = (x.ordering() == 'c' ? ArrayUtil.calcStrides(shape1) : ArrayUtil.calcStridesFortran(shape1));
            boolean stridesSameAsInit = Arrays.equals(strides1, stridesAsInit);
            return stridesSameAsInit;
        }

        return false;
    }

    /**
     *
     * Choose tensor dimension for operations with one argument: x=Op(x) or similar
* When doing some operations in parallel, it is necessary to break up * operations along a dimension to * give a set of 1d tensors. The dimension that this is done on is important for performance reasons; * in summary we want to both minimize the number of tensors * , but also minimize the separation between * elements in the buffer (so the resulting operation is efficient - i.e., avoids cache thrashing). * However, achieving both minimal number * of tensors and are not always possible. * @param x NDArray that we want to split * @return The best dimension to split on */ public static int chooseElementWiseTensorDimension(INDArray x) { if(x.isVector()) return ArrayUtil.argMax(x.shape()); //Execute along the vector //doing argMin(max(x.stride(i),y.stride(i))) minimizes the maximum //separation between elements (helps CPU cache) BUT might result in a huge number //of tiny ops - i.e., addi on NDArrays with shape [5,10^6] int opAlongDimensionMinStride = ArrayUtil.argMin(x.stride()); //doing argMax on shape gives us smallest number of largest tensors //but may not be optimal in terms of element separation (for CPU cache etc) int opAlongDimensionMaxLength = ArrayUtil.argMax(x.shape()); //Edge cases: shapes with 1s in them can have stride of 1 on the dimensions of length 1 if(x.isVector() || x.size(opAlongDimensionMinStride) == 1) return opAlongDimensionMaxLength; //Using a heuristic approach here: basically if we get >= 10x as many tensors using the minimum stride //dimension vs. the maximum size dimension, use the maximum size dimension instead //The idea is to avoid choosing wrong dimension in cases like shape=[10,10^6] //Might be able to do better than this with some additional thought int nOpsAlongMinStride = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), opAlongDimensionMinStride)); int nOpsAlongMaxLength = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), opAlongDimensionMaxLength)); if(nOpsAlongMinStride <= 10 * nOpsAlongMaxLength) return opAlongDimensionMinStride; else return opAlongDimensionMaxLength; } /** * Choose tensor dimension for operations with 2 arguments: x=Op(x,y) or similar
* @see #chooseElementWiseTensorDimension(INDArray) */ public static int chooseElementWiseTensorDimension(INDArray x, INDArray y) { if(x.isVector()) return ArrayUtil.argMax(x.shape()); //Execute along the vector //doing argMin(max(x.stride(i),y.stride(i))) minimizes the maximum //separation between elements (helps CPU cache) BUT might result in a huge number //of tiny ops - i.e., addi on NDArrays with shape [5,10^6] int opAlongDimensionMinStride = ArrayUtil.argMinOfMax(x.stride(), y.stride()); //doing argMax on shape gives us smallest number of largest tensors //but may not be optimal in terms of element separation (for CPU cache etc) int opAlongDimensionMaxLength = ArrayUtil.argMax(x.shape()); //Edge case: shapes with 1s in them can have stride of 1 on the dimensions of length 1 if(opAlongDimensionMinStride == opAlongDimensionMaxLength || x.size(opAlongDimensionMinStride)==1) return opAlongDimensionMaxLength; //Using a heuristic approach here: basically if we get >= 10x as many tensors using the minimum stride //dimension vs. the maximum size dimension, use the maximum size dimension instead //The idea is to avoid choosing wrong dimension in cases like shape=[10,10^6] //Might be able to do better than this with some additional thought int nOpsAlongMinStride = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), opAlongDimensionMinStride)); int nOpsAlongMaxLength = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), opAlongDimensionMaxLength)); if(nOpsAlongMinStride <= 10 * nOpsAlongMaxLength) return opAlongDimensionMinStride; else return opAlongDimensionMaxLength; } /**Choose tensor dimension for operations with 3 arguments: z=Op(x,y) or similar
* @see #chooseElementWiseTensorDimension(INDArray) */ public static int chooseElementWiseTensorDimension(INDArray x, INDArray y, INDArray z){ if(x.isVector()) return ArrayUtil.argMax(x.shape()); int opAlongDimensionMinStride = ArrayUtil.argMinOfMax(x.stride(),y.stride(),z.stride()); int opAlongDimensionMaxLength = ArrayUtil.argMax(x.shape()); //Edge case: shapes with 1s in them can have stride of 1 on the dimensions of length 1 if(opAlongDimensionMinStride == opAlongDimensionMaxLength || x.size(opAlongDimensionMinStride)==1) return opAlongDimensionMaxLength; int nOpsAlongMinStride = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), opAlongDimensionMinStride)); int nOpsAlongMaxLength = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), opAlongDimensionMaxLength)); if(nOpsAlongMinStride <= 10 * nOpsAlongMaxLength) return opAlongDimensionMinStride; else return opAlongDimensionMaxLength; } /** Tensor1DStats, used to efficiently iterate through tensors on a matrix (2d NDArray) for element-wise ops * For example, the offset of each 1d tensor can be calculated using only a single tensorAlongDimension method call, * hence is potentially faster than approaches requiring multiple tensorAlongDimension calls.
* Note that this can only (generally) be used for 2d NDArrays. For certain 3+d NDArrays, the tensor starts may not * be in increasing order */ public static Tensor1DStats get1DTensorStats(INDArray array, int...dimension) { int tensorLength = array.size(dimension[0]); //As per tensorssAlongDimension: int numTensors = array.tensorssAlongDimension(dimension); //First tensor always starts with the first element in the NDArray, regardless of dimension int firstTensorOffset = array.offset(); //Next: Need to work out the separation between the start (first element) of each 1d tensor int tensorStartSeparation; int elementWiseStride; //Separation in buffer between elements in the tensor if(numTensors == 1) { tensorStartSeparation = -1; //Not applicable elementWiseStride = array.elementWiseStride(); } else { INDArray secondTensor = array.tensorAlongDimension(1, dimension); tensorStartSeparation = secondTensor.offset() - firstTensorOffset; elementWiseStride = secondTensor.elementWiseStride(); } return new Tensor1DStats(firstTensorOffset,tensorStartSeparation, numTensors,tensorLength,elementWiseStride); } /** Simple class containing values used for calculating various quantities related to 1d tensors.
* offset of ith tensor: firstTensorOffset + i * tensorStartSeparation
* separation between elements in tensor: elementWiseStride
* number of elements in each 1d tensor: tensorLength
* number of 1d tensors: numTensors
*/ @AllArgsConstructor @Data public static class Tensor1DStats { public final int firstTensorOffset; public final int tensorStartSeparation; public final int numTensors; public final int tensorLength; public final int elementWiseStride; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy