All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nd4j.linalg.api.ops.executioner.OpExecutionerUtil Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
/*******************************************************************************
 * Copyright (c) 2015-2018 Skymind, Inc.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/

package org.nd4j.linalg.api.ops.executioner;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import lombok.val;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.CustomOp;
import org.nd4j.linalg.api.ops.Op;
import org.nd4j.linalg.api.ops.impl.accum.MatchCondition;
import org.nd4j.linalg.exception.ND4JIllegalStateException;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.conditions.Conditions;
import org.nd4j.linalg.util.ArrayUtil;

import java.util.Arrays;

/**Utility functions for the DefaultOpExecutioner
 * @author Alex Black
 */
@Slf4j
public class OpExecutionerUtil {

    private OpExecutionerUtil() {}

    /** Can we do the op (X = Op(X)) directly on the arrays without breaking X up into 1d tensors first?
     * In general, this is possible if the elements of X are contiguous in the buffer, OR if every element
     * of X is at position offset+i*elementWiseStride in the buffer
     * */
    public static boolean canDoOpDirectly(INDArray x) {
        if (x.elementWiseStride() < 1)
            return false;
        if (x.isVector())
            return true;

        //For a single NDArray all we require is that the elements are contiguous in the buffer or every nth element

        //Full buffer -> implies all elements are contiguous (and match)
        long l1 = x.lengthLong();
        long dl1 = x.data().length();
        if (l1 == dl1)
            return true;

        //Strides are same as a zero offset NDArray -> all elements are contiguous (even if not offset 0)
        long[] shape1 = x.shape();
        long[] stridesAsInit =
                        (x.ordering() == 'c' ? ArrayUtil.calcStrides(shape1) : ArrayUtil.calcStridesFortran(shape1));
        boolean stridesSameAsInit = Arrays.equals(x.stride(), stridesAsInit);
        return stridesSameAsInit;
    }

    public static void checkForNaN(INDArray z) {
        if (Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.NAN_PANIC
                        && Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.ANY_PANIC)
            return;

        int match = 0;
        if (!z.isScalar()) {
            MatchCondition condition = new MatchCondition(z, Conditions.isNan());
            match = Nd4j.getExecutioner().exec(condition, Integer.MAX_VALUE).getInt(0);
        } else {
            if (z.data().dataType() == DataBuffer.Type.DOUBLE) {
                if (Double.isNaN(z.getDouble(0)))
                    match = 1;
            } else {
                if (Float.isNaN(z.getFloat(0)))
                    match = 1;
            }
        }

        if (match > 0)
            throw new ND4JIllegalStateException("P.A.N.I.C.! Op.Z() contains " + match + " NaN value(s): ");
    }

    public static void checkForAny(INDArray z) {
        checkForNaN(z);
        checkForInf(z);
    }

    public static void checkForInf(INDArray z) {
        if (Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.INF_PANIC
                        && Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.ANY_PANIC)
            return;

        int match = 0;
        if (!z.isScalar()) {
            MatchCondition condition = new MatchCondition(z, Conditions.isInfinite());
            match = Nd4j.getExecutioner().exec(condition, Integer.MAX_VALUE).getInt(0);
        } else {
            if (z.data().dataType() == DataBuffer.Type.DOUBLE) {
                if (Double.isInfinite(z.getDouble(0)))
                    match = 1;
            } else {
                if (Float.isInfinite(z.getFloat(0)))
                    match = 1;
            }
        }

        if (match > 0)
            throw new ND4JIllegalStateException("P.A.N.I.C.! Op.Z() contains " + match + " Inf value(s)");

    }

    public static void checkForNaN(Op op) {
        if (Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.NAN_PANIC
                        && Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.ANY_PANIC)
            return;

        if (op.z() != null && !(op instanceof MatchCondition)) {
            checkForNaN(op.z());
        }
    }

    public static void checkForInf(Op op) {
        if (Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.INF_PANIC
                        && Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.ANY_PANIC)
            return;

        if (op.z() != null && !(op instanceof MatchCondition)) {
            checkForInf(op.z());
        }
    }

    public static void checkForInf(CustomOp op) {
        if (Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.INF_PANIC
                && Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.ANY_PANIC)
            return;

        for (val input: op.inputArguments())
            checkForInf(input);

        for (val output: op.outputArguments())
            checkForInf(output);
    }


    public static void checkForNaN(CustomOp op) {
        if (Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.NAN_PANIC
                && Nd4j.getExecutioner().getProfilingMode() != OpExecutioner.ProfilingMode.ANY_PANIC)
            return;

        for (val input: op.inputArguments())
            checkForNaN(input);

        for (val output: op.outputArguments())
            checkForNaN(output);
    }

    /** Can we do the transform op (X = Op(X,Y)) directly on the arrays without breaking them up into 1d tensors first? */
    public static boolean canDoOpDirectly(INDArray x, INDArray y) {
        if (x.isVector())
            return true;
        if (x.ordering() != y.ordering())
            return false; //other than vectors, elements in f vs. c NDArrays will never line up
        if (x.elementWiseStride() < 1 || y.elementWiseStride() < 1)
            return false;
        //Full buffer + matching strides -> implies all elements are contiguous (and match)
        //Need strides to match, otherwise elements in buffer won't line up (i.e., c vs. f order arrays)
        long l1 = x.lengthLong();
        long dl1 = x.data().length();
        long l2 = y.lengthLong();
        long dl2 = y.data().length();
        long[] strides1 = x.stride();
        long[] strides2 = y.stride();
        boolean equalStrides = Arrays.equals(strides1, strides2);
        if (l1 == dl1 && l2 == dl2 && equalStrides)
            return true;

        //Strides match + are same as a zero offset NDArray -> all elements are contiguous (and match)
        if (equalStrides) {
            long[] shape1 = x.shape();
            long[] stridesAsInit = (x.ordering() == 'c' ? ArrayUtil.calcStrides(shape1)
                            : ArrayUtil.calcStridesFortran(shape1));
            boolean stridesSameAsInit = Arrays.equals(strides1, stridesAsInit);
            return stridesSameAsInit;
        }

        return false;
    }

    /** Can we do the transform op (Z = Op(X,Y)) directly on the arrays without breaking them up into 1d tensors first? */
    public static boolean canDoOpDirectly(INDArray x, INDArray y, INDArray z) {
        if (x.isVector())
            return true;
        if (x.ordering() != y.ordering() || x.ordering() != z.ordering())
            return false; //other than vectors, elements in f vs. c NDArrays will never line up
        if (x.elementWiseStride() < 1 || y.elementWiseStride() < 1)
            return false;
        //Full buffer + matching strides -> implies all elements are contiguous (and match)
        long l1 = x.lengthLong();
        long dl1 = x.data().length();
        long l2 = y.lengthLong();
        long dl2 = y.data().length();
        long l3 = z.lengthLong();
        long dl3 = z.data().length();
        long[] strides1 = x.stride();
        long[] strides2 = y.stride();
        long[] strides3 = z.stride();
        boolean equalStrides = Arrays.equals(strides1, strides2) && Arrays.equals(strides1, strides3);
        if (l1 == dl1 && l2 == dl2 && l3 == dl3 && equalStrides)
            return true;

        //Strides match + are same as a zero offset NDArray -> all elements are contiguous (and match)
        if (equalStrides) {
            long[] shape1 = x.shape();
            long[] stridesAsInit = (x.ordering() == 'c' ? ArrayUtil.calcStrides(shape1)
                            : ArrayUtil.calcStridesFortran(shape1));
            boolean stridesSameAsInit = Arrays.equals(strides1, stridesAsInit);
            return stridesSameAsInit;
        }

        return false;
    }

    /**
     *
     * Choose tensor dimension for operations with one argument: x=Op(x) or similar
* When doing some operations in parallel, it is necessary to break up * operations along a dimension to * give a set of 1d tensors. The dimension that this is done on is important for performance reasons; * in summary we want to both minimize the number of tensors * , but also minimize the separation between * elements in the buffer (so the resulting operation is efficient - i.e., avoids cache thrashing). * However, achieving both minimal number * of tensors and are not always possible. * @param x NDArray that we want to split * @return The best dimension to split on */ public static int chooseElementWiseTensorDimension(INDArray x) { if (x.isVector()) return ArrayUtil.argMax(x.shape()); //Execute along the vector //doing argMin(max(x.stride(i),y.stride(i))) minimizes the maximum //separation between elements (helps CPU cache) BUT might result in a huge number //of tiny ops - i.e., addi on NDArrays with shape [5,10^6] int opAlongDimensionMinStride = ArrayUtil.argMin(x.stride()); //doing argMax on shape gives us smallest number of largest tensors //but may not be optimal in terms of element separation (for CPU cache etc) int opAlongDimensionMaxLength = ArrayUtil.argMax(x.shape()); //Edge cases: shapes with 1s in them can have stride of 1 on the dimensions of length 1 if (x.isVector() || x.size(opAlongDimensionMinStride) == 1) return opAlongDimensionMaxLength; //Using a heuristic approach here: basically if we get >= 10x as many tensors using the minimum stride //dimension vs. the maximum size dimension, use the maximum size dimension instead //The idea is to avoid choosing wrong dimension in cases like shape=[10,10^6] //Might be able to do better than this with some additional thought int nOpsAlongMinStride = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), opAlongDimensionMinStride)); int nOpsAlongMaxLength = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), opAlongDimensionMaxLength)); if (nOpsAlongMinStride <= 10 * nOpsAlongMaxLength) return opAlongDimensionMinStride; else return opAlongDimensionMaxLength; } /** * Choose tensor dimension for operations with 2 arguments: x=Op(x,y) or similar
* @see #chooseElementWiseTensorDimension(INDArray) */ public static long chooseElementWiseTensorDimension(INDArray x, INDArray y) { if (x.isVector()) return ArrayUtil.argMax(x.shape()); //Execute along the vector //doing argMin(max(x.stride(i),y.stride(i))) minimizes the maximum //separation between elements (helps CPU cache) BUT might result in a huge number //of tiny ops - i.e., addi on NDArrays with shape [5,10^6] long opAlongDimensionMinStride = ArrayUtil.argMinOfMax(x.stride(), y.stride()); //doing argMax on shape gives us smallest number of largest tensors //but may not be optimal in terms of element separation (for CPU cache etc) int opAlongDimensionMaxLength = ArrayUtil.argMax(x.shape()); // FIXME: int cast //Edge case: shapes with 1s in them can have stride of 1 on the dimensions of length 1 if (opAlongDimensionMinStride == opAlongDimensionMaxLength || x.size((int)opAlongDimensionMinStride) == 1) return opAlongDimensionMaxLength; //Using a heuristic approach here: basically if we get >= 10x as many tensors using the minimum stride //dimension vs. the maximum size dimension, use the maximum size dimension instead //The idea is to avoid choosing wrong dimension in cases like shape=[10,10^6] //Might be able to do better than this with some additional thought int nOpsAlongMinStride = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), (int) opAlongDimensionMinStride)); int nOpsAlongMaxLength = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), (int) opAlongDimensionMaxLength)); if (nOpsAlongMinStride <= 10 * nOpsAlongMaxLength) return opAlongDimensionMinStride; else return opAlongDimensionMaxLength; } /**Choose tensor dimension for operations with 3 arguments: z=Op(x,y) or similar
* @see #chooseElementWiseTensorDimension(INDArray) */ public static int chooseElementWiseTensorDimension(INDArray x, INDArray y, INDArray z) { if (x.isVector()) return ArrayUtil.argMax(x.shape()); // FIXME: int cast int opAlongDimensionMinStride = (int) ArrayUtil.argMinOfMax(x.stride(), y.stride(), z.stride()); int opAlongDimensionMaxLength = ArrayUtil.argMax(x.shape()); //Edge case: shapes with 1s in them can have stride of 1 on the dimensions of length 1 if (opAlongDimensionMinStride == opAlongDimensionMaxLength || x.size((int) opAlongDimensionMinStride) == 1) return opAlongDimensionMaxLength; int nOpsAlongMinStride = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), (int) opAlongDimensionMinStride)); int nOpsAlongMaxLength = ArrayUtil.prod(ArrayUtil.removeIndex(x.shape(), opAlongDimensionMaxLength)); if (nOpsAlongMinStride <= 10 * nOpsAlongMaxLength) return opAlongDimensionMinStride; else return opAlongDimensionMaxLength; } /** Tensor1DStats, used to efficiently iterate through tensors on a matrix (2d NDArray) for element-wise ops * For example, the offset of each 1d tensor can be calculated using only a single tensorAlongDimension method call, * hence is potentially faster than approaches requiring multiple tensorAlongDimension calls.
* Note that this can only (generally) be used for 2d NDArrays. For certain 3+d NDArrays, the tensor starts may not * be in increasing order */ public static Tensor1DStats get1DTensorStats(INDArray array, int... dimension) { long tensorLength = array.size(dimension[0]); //As per tensorssAlongDimension: long numTensors = array.tensorssAlongDimension(dimension); //First tensor always starts with the first element in the NDArray, regardless of dimension long firstTensorOffset = array.offset(); //Next: Need to work out the separation between the start (first element) of each 1d tensor long tensorStartSeparation; int elementWiseStride; //Separation in buffer between elements in the tensor if (numTensors == 1) { tensorStartSeparation = -1; //Not applicable elementWiseStride = array.elementWiseStride(); } else { INDArray secondTensor = array.tensorAlongDimension(1, dimension); tensorStartSeparation = secondTensor.offset() - firstTensorOffset; elementWiseStride = secondTensor.elementWiseStride(); } return new Tensor1DStats(firstTensorOffset, tensorStartSeparation, numTensors, tensorLength, elementWiseStride); } /** Simple class containing values used for calculating various quantities related to 1d tensors.
* offset of ith tensor: firstTensorOffset + i * tensorStartSeparation
* separation between elements in tensor: elementWiseStride
* number of elements in each 1d tensor: tensorLength
* number of 1d tensors: numTensors
*/ @AllArgsConstructor @Data public static class Tensor1DStats { public final long firstTensorOffset; public final long tensorStartSeparation; public final long numTensors; public final long tensorLength; public final int elementWiseStride; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy