org.nd4j.linalg.api.ops.impl.shape.Gather Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of nd4j-api Show documentation
The newest version!
/*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */

package org.nd4j.linalg.api.ops.impl.shape;

import lombok.val;
import onnx.Onnx;
import org.nd4j.autodiff.samediff.SDIndex;
import org.nd4j.autodiff.samediff.SDVariable;
import org.nd4j.autodiff.samediff.SameDiff;
import org.nd4j.autodiff.samediff.config.SDValue;
import org.nd4j.imports.descriptors.properties.PropertyMapping;
import org.nd4j.imports.graphmapper.tf.TFGraphMapper;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.DynamicCustomOp;
import org.nd4j.linalg.factory.Nd4j;
import org.tensorflow.framework.AttrValue;
import org.tensorflow.framework.GraphDef;
import org.tensorflow.framework.NodeDef;

import java.util.*;

/**
 * Gather op
 */
public class Gather extends DynamicCustomOp {

    protected int[] indices;
    protected int jaxis = 0;

    public Gather() {
    }

    public Gather(SameDiff sameDiff, SDVariable df, SDVariable indices, int axis) {
        this(sameDiff, df, indices, axis, false);
    }

    public Gather(SameDiff sameDiff, SDVariable df, int[] indices, int axis) {
        this(sameDiff, df, indices, axis, false);
    }

    public Gather(SameDiff sameDiff, SDVariable input, int[] indices, int axis, boolean inPlace) {
        super(null, sameDiff, new SDVariable[] {input, sameDiff.constant(Nd4j.createFromArray(indices))}, inPlace);

        addIArgument(axis);
        addIArgument(indices);
        this.jaxis = axis;
        this.indices = indices;
    }

    public Gather(SameDiff sameDiff, SDVariable input, SDVariable indices, int axis, boolean inPlace) {
        super(null, sameDiff, new SDVariable[] {input, indices}, inPlace);
        addIArgument(axis);
        this.jaxis = axis;
    }

    public Gather(INDArray df, int[] indexes, int axis) {
        addInputArgument(df);
        addIArgument(axis);
        addIArgument(indexes);
        this.jaxis = axis;
        this.indices = indices;
    }

    public Gather(INDArray df, INDArray indexes, int axis) {
        addInputArgument(df, indexes);
        addIArgument(axis);
        this.jaxis = axis;
        this.indices = indices;
    }

    @Override
    public String onnxName() {
        return "Gather";
    }


    @Override
    public String[] tensorflowNames() {
        return new String[]{"Gather", "GatherV2"};
    }

    @Override
    public void initFromTensorFlow(NodeDef nodeDef, SameDiff initWith, Map attributesForNode, GraphDef graph) {
        TFGraphMapper.initFunctionFromProperties(nodeDef.getOp(), this, attributesForNode, nodeDef, graph);
    }

    @Override
    public void initFromOnnx(Onnx.NodeProto node, SameDiff initWith, Map attributesForNode, Onnx.GraphProto graph) {

    }

    @Override
    public void configureFromArguments() {
        if(!iArguments.isEmpty()) {
            this.jaxis = iArguments.get(0).intValue();
        }
    }

    @Override
    public Map> mappingsForFunction() {
        Map> ret = new HashMap<>();
        Map map = new HashMap<>();
        val broadcast = PropertyMapping.builder()
                .onnxAttrName("indices")
                .tfInputPosition(1)
                .propertyNames(new String[]{"indices"}).build();

        map.put("indices", broadcast);

        ret.put(tensorflowNames()[0], map);
        ret.put(onnxName(), map);

        Map map2 = new HashMap<>();
        val broadcast2 = PropertyMapping.builder()
                .tfInputPosition(1)
                .propertyNames(new String[]{"indices"}).build();
        map2.put("indices", broadcast2);

        val axis2 = PropertyMapping.builder()
                .tfInputPosition(2)
                .propertyNames(new String[]{"axis"}).build();
        map2.put("axis", axis2);

        ret.put("GatherV2", map2);


        return ret;
    }

    @Override
    public void setPropertiesForFunction(Map properties) {
        if(properties.containsKey("dimensions")) {
            Long dimensions = (Long) properties.get("dimensions");
            this.jaxis = dimensions.intValue();
        }
    }

    @Override
    public String opName() {
        return "gather";
    }

    @Override
    public List doDiff(List i_v) {
        //2 args: input and indices. Plus integer dimension arg
        //Gather backprop is just scatter add
        SDVariable indicesSize = sameDiff.expandDims(args()[1].length(),0);
        SDVariable paramsShape = sameDiff.shape(args()[0]);
        paramsShape = paramsShape.reshape(paramsShape.length());
        SDVariable indicesGrad = sameDiff.zerosLike(arg(1));

        if(jaxis == 0) {
            SDVariable paramsTailShape = paramsShape.getView(SDIndex.interval(sameDiff.constant(1)
                    , sameDiff.constant(1),paramsShape.length()));
            SDVariable valueShape = sameDiff.concat(0,indicesSize,paramsTailShape);
            SDVariable values = sameDiff.reshape(i_v.get(0),valueShape);
            SDVariable indices = sameDiff.flatten(args()[1]);
            SDVariable retGrad = sameDiff.zerosLike(arg());
            SDVariable put = retGrad.put(indices,values,indices).reshape(arg().shape());
            /**
             * TODO: figure out a better way to do a mass assign.
             * We can't match the speed of a sparse gradient so we need to figure out the best way to
             * achieve this with a dense representation.
             *
             * This would ideally be similar to nd4j's put(indices)
             */
            return Arrays.asList(put, indicesGrad);
        } else {
            SDVariable batchDims = sameDiff.constant(0);
            SDVariable outerShape = paramsShape.getView(SDIndex.interval(0,jaxis));
            SDVariable innerShape = paramsShape.getView(
                    SDIndex.interval(sameDiff.constant(jaxis),paramsShape.length()),SDIndex.interval(sameDiff.constant(1),sameDiff.constant(-1)));
            SDVariable valueShape = sameDiff.concat(0,outerShape,
                    sameDiff.constant(-1).castTo(outerShape.dataType()),
                    innerShape.castTo(outerShape.dataType()));


            /**
             * Blow grad up to match values shape, values shape is  not wrong
             */
            SDVariable valuesDims = valueShape.length();
            SDVariable axisDims = outerShape.length();

            SDVariable outerBatchIndices = sameDiff.range(0,0,0,DataType.INT64);
            SDVariable batchAxisIndices = sameDiff.range(batchDims,axisDims, sameDiff.constant(1),DataType.INT64);
            SDVariable innerAxisIndices = sameDiff.range(axisDims.add(1.0),valuesDims,sameDiff.constant(1),DataType.INT64);

            SDVariable indices = sameDiff.reshape(args()[1],indicesSize);

            SDVariable put = sameDiff.unsortedSegmentSum(i_v.get(0), sameDiff.range(sameDiff.constant(0),sameDiff.sizeAt(i_v.get(0),0),sameDiff.constant(1),DataType.INT64), sameDiff.sizeAt(i_v.get(0),0));
            SDVariable values = sameDiff.reshape(put,valueShape);



            SDVariable transposeDims = sameDiff.concat("transposeConcat",0,outerBatchIndices,axisDims,batchAxisIndices,innerAxisIndices);
            SDVariable valuesTranspose = sameDiff.permute(values,transposeDims);

            /**
             * Batch gather grad
             */

            SDVariable paramsGrad = sameDiff.unsortedSegmentSum(valuesTranspose,indices,paramsShape.get(SDIndex.point(jaxis)));
            SDVariable invertTransposeDims = sameDiff.concat(0,outerBatchIndices.castTo(DataType.INT64),batchAxisIndices.add(1).castTo(DataType.INT64),batchDims.castTo(DataType.INT64),innerAxisIndices.castTo(DataType.INT64));
            paramsGrad = sameDiff.permute(paramsGrad,invertTransposeDims);


            return Arrays.asList(paramsGrad, indicesGrad);
        }

    }

    @Override
    public List calculateOutputDataTypes(List dataTypes) {
        //Output type is same as (first) input type
        return Collections.singletonList(dataTypes.get(0));
    }
}