org.deeplearning4j.models.rntn.RNTN Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of deeplearning4j-nlp Show documentation
There is a newer version: 1.0.0-M2.1
/*
 *
 *  * Copyright 2015 Skymind,Inc.
 *  *
 *  *    Licensed under the Apache License, Version 2.0 (the "License");
 *  *    you may not use this file except in compliance with the License.
 *  *    You may obtain a copy of the License at
 *  *
 *  *        http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  *    Unless required by applicable law or agreed to in writing, software
 *  *    distributed under the License is distributed on an "AS IS" BASIS,
 *  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  *    See the License for the specific language governing permissions and
 *  *    limitations under the License.
 *
 */

package org.deeplearning4j.models.rntn;

import static org.nd4j.linalg.indexing.NDArrayIndex.interval;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CountDownLatch;

import org.deeplearning4j.berkeley.Pair;
import org.deeplearning4j.models.embeddings.WeightLookupTable;
import org.deeplearning4j.nn.layers.feedforward.autoencoder.recursive.Tree;
import org.deeplearning4j.models.word2vec.Word2Vec;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.gradient.Gradient;
import org.deeplearning4j.optimize.api.ConvexOptimizer;
import org.deeplearning4j.optimize.api.IterationListener;
import org.deeplearning4j.parallel.Parallelization;
import org.deeplearning4j.util.MultiDimensionalMap;
import org.deeplearning4j.util.MultiDimensionalSet;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.rng.Random;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.indexing.INDArrayIndex;
import org.nd4j.linalg.indexing.NDArrayIndex;
import org.nd4j.linalg.learning.AdaGrad;
import org.nd4j.linalg.ops.transforms.Transforms;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import scala.concurrent.Future;
import akka.actor.ActorSystem;
import akka.dispatch.Futures;
import akka.dispatch.OnComplete;

import com.google.common.util.concurrent.AtomicDouble;
/**
 * Recursive Neural Tensor Network by Socher et. al
 *
 * This is a modified implementation of the sentiment analysis RNTN
 * from Stanford that is intended to work with more general purpose inputs (scene detection with images,
 * labeling
 * series of sentences, among others)
 *
 * This implementation will also be faster in terms of
 * parallelization as well as integration with
 * native matrices/GPUs
 *
 * @author Adam Gibson
 *
 */
@Deprecated
public class RNTN implements Layer {


    protected NeuralNetConfiguration conf;
    protected Collection iterationListeners = new ArrayList<>();
    protected double value = 0;
    private int numOuts = 3;
    //must be same size as word vectors
    private int numHidden = 25;
    private Random rng;
    private boolean useDoubleTensors = true;
    private boolean combineClassification = true;
    private boolean simplifiedModel = true;
    private boolean randomFeatureVectors = true;
    private double scalingForInit = 1.0f;
    private boolean lowerCasefeatureNames;
    protected String activationFunction = "tanh";
    protected String outputActivation = "softmax";
    protected AdaGrad paramAdaGrad;
    protected int numParameters = -1;
    /** Regularization cost for the applyTransformToOrigin matrix  */
    private double regTransformMatrix = 0.001f;

    /** Regularization cost for the classification matrices */
    private double regClassification = 0.0001f;

    /** Regularization cost for the word vectors */
    private double regWordVector = 0.0001f;

    private int inputMiniBatchSize;

    /**
     * How many epochs between resets of the adagrad learning rates.
     * Set to 0 to never reset.
     */
    private int adagradResetFrequency = 1;

    /** Regularization cost for the applyTransformToOrigin INDArray  */
    private double regTransformINDArray = 0.001f;

    /**
     * Nx2N+1, where N is the size of the word vectors
     */
    private MultiDimensionalMap binaryTransform;

    /**
     * 2Nx2NxN, where N is the size of the word vectors
     */
    private MultiDimensionalMap binaryTensors;

    /**
     * CxN+1, where N = size of word vectors, C is the number of classes
     */
    private Map unaryClassification;

    private WeightLookupTable featureVectors;
    private VocabCache vocabCache;

    /**
     * CxN+1, where N = size of word vectors, C is the number of classes
     */
    private MultiDimensionalMap binaryClassification;


    /**
     * Cached here for easy calculation of the model size;
     * MultiDimensionalMap does not return that in O(1) time
     */
    private  int numBinaryMatrices;

    /** How many elements a transformation matrix has */
    private  int binaryTransformSize;
    /** How many elements the binary transformation INd4j have */
    private  int binaryINd4jize;
    /** How many elements a classification matrix has */
    private  int binaryClassificationSize;

    /**
     * Cached here for easy calculation of the model size;
     * MultiDimensionalMap does not return that in O(1) time
     */
    private  int numUnaryMatrices;

    /** How many elements a classification matrix has */
    private  int unaryClassificationSize;

    private INDArray identity;

    private Map classWeights;

    private static final Logger log = LoggerFactory.getLogger(RNTN.class);


    private transient ActorSystem rnTnActorSystem = ActorSystem.create("RNTN");
    protected int index=0;



    private RNTN(int numHidden,
                 Random rng,
                 boolean useDoubleTensors,
                 boolean combineClassification,
                 boolean simplifiedModel,
                 boolean randomFeatureVectors,
                 double scalingForInit,
                 boolean lowerCasefeatureNames,
                 String activationFunction,
                 int adagradResetFrequency,
                 double regTransformINDArray,
                 WeightLookupTable featureVectors,
                 VocabCache vocabCache,
                 int numBinaryMatrices,
                 int binaryTransformSize,
                 int binaryINd4jize,
                 int binaryClassificationSize,
                 int numUnaryMatrices,
                 int unaryClassificationSize,
                 Map classWeights) {
        this.vocabCache = vocabCache;
        this.numHidden = numHidden;
        this.rng = rng;
        this.useDoubleTensors = useDoubleTensors;
        this.combineClassification = combineClassification;
        this.simplifiedModel = simplifiedModel;
        this.randomFeatureVectors = randomFeatureVectors;
        this.scalingForInit = scalingForInit;
        this.lowerCasefeatureNames = lowerCasefeatureNames;
        this.activationFunction = activationFunction;
        this.adagradResetFrequency = adagradResetFrequency;
        this.regTransformINDArray = regTransformINDArray;
        this.featureVectors = featureVectors;
        this.numBinaryMatrices = numBinaryMatrices;
        this.binaryTransformSize = binaryTransformSize;
        this.binaryINd4jize = binaryINd4jize;
        this.binaryClassificationSize = binaryClassificationSize;
        this.numUnaryMatrices = numUnaryMatrices;
        this.unaryClassificationSize = unaryClassificationSize;
        this.classWeights = classWeights;
        init();
    }


    private void init() {

        if(rng == null) {
            rng = Nd4j.getRandom();
        }
        MultiDimensionalSet binaryProductions = MultiDimensionalSet.hashSet();
        if (simplifiedModel) {
            binaryProductions.add("", "");
        } else {
            // TODO
            // figure out what binary productions we have in these trees
            // Note: the current sentiment training data does not actually
            // have any constituent labels
            throw new UnsupportedOperationException("Not yet implemented");
        }

        Set unaryProductions = new HashSet<>();

        if (simplifiedModel) {
            unaryProductions.add("");
        } else {
            // TODO
            // figure out what unary productions we have in these trees (preterminals only, after the collapsing)
            throw new UnsupportedOperationException("Not yet implemented");
        }


        identity = Nd4j.eye(numHidden);

        binaryTransform = MultiDimensionalMap.newTreeBackedMap();
        binaryTensors = MultiDimensionalMap.newTreeBackedMap();
        binaryClassification = MultiDimensionalMap.newTreeBackedMap();

        // When making a flat model (no semantic untying) the
        // basicCategory function will return the same basic category for
        // all labels, so all entries will map to the same matrix
        for (Pair binary : binaryProductions) {
            String left = basicCategory(binary.getFirst());
            String right = basicCategory(binary.getSecond());
            if (binaryTransform.contains(left, right)) {
                continue;
            }

            binaryTransform.put(left, right, randomTransformMatrix());
            if (useDoubleTensors) {
                binaryTensors.put(left, right, randomBinaryINDArray());
            }

            if (!combineClassification) {
                binaryClassification.put(left, right, randomClassificationMatrix());
            }
        }

        numBinaryMatrices = binaryTransform.size();
        binaryTransformSize = numHidden * (2 * numHidden + 1);

        if (useDoubleTensors) {
            binaryINd4jize = numHidden * numHidden * numHidden * 4;
        } else {
            binaryINd4jize = 0;
        }

        binaryClassificationSize = (combineClassification) ? 0 : numOuts * (numHidden + 1);

        unaryClassification = new TreeMap<>();

        // When making a flat model (no semantic untying) the
        // basicCategory function will return the same basic category for
        // all labels, so all entries will map to the same matrix

        for (String unary : unaryProductions) {
            unary = basicCategory(unary);
            if (unaryClassification.containsKey(unary)) {
                continue;
            }
            unaryClassification.put(unary, randomClassificationMatrix());
        }


        binaryClassificationSize = (combineClassification) ? 0 : numOuts * (numHidden + 1);

        numUnaryMatrices = unaryClassification.size();
        unaryClassificationSize = numOuts * (numHidden + 1);



        numUnaryMatrices = unaryClassification.size();
        unaryClassificationSize = numOuts * (numHidden + 1);
        classWeights = new HashMap<>();

    }

    @Override
    public int getIndex() {
        return index;
    }

    @Override
    public void setInput(INDArray input) {

    }

    @Override
    public void setIndex(int index) {
        this.index = index;
    }

    public Collection getListeners() {
        return iterationListeners;
    }

    @Override
    public void setListeners(IterationListener... listeners) {

    }

    public void setListeners(Collection listeners) {
        this.iterationListeners = listeners != null ? listeners : new ArrayList();
    }

    INDArray randomBinaryINDArray() {
        double range = 1.0f / (4.0f * numHidden);
        INDArray ret = Nd4j.rand(new int[]{numHidden,numHidden * 2, numHidden * 2}, -range, range, rng);
        return ret.muli(scalingForInit);
    }

    public INDArray randomTransformMatrix() {
        INDArray binary = Nd4j.create(numHidden, numHidden * 2 + 1);
        // bias column values are initialized zero
        INDArray block = randomTransformBlock();
        INDArrayIndex[] indices = new INDArrayIndex[] {interval(0,block.rows()),interval(0,block.columns())};
        binary.put(indices,block);
        INDArrayIndex[] indices2 = new INDArrayIndex[]{interval(0,block.rows()),interval(numHidden,numHidden + block.columns())};
        binary.put(indices2,randomTransformBlock());
        Nd4j.getBlasWrapper().level1().scal(binary.length(),scalingForInit,binary);
        return binary;
    }

    public INDArray randomTransformBlock() {
        double range = 1.0 /  (Math.sqrt((double) numHidden) * 2.0f);
        INDArray ret = Nd4j.rand(numHidden,numHidden,-range,range,rng).addi(identity);
        return ret;
    }

    /**
     * Returns matrices of the right size for either binary or unary (terminal) classification
     */
    INDArray randomClassificationMatrix() {
        // Leave the bias column with 0 values
        double range = 1.0 / (Math.sqrt((double) numHidden));
        INDArray ret = Nd4j.zeros(numOuts,numHidden + 1);
        INDArray insert = Nd4j.rand(numOuts, numHidden, -range, range, rng);
        ret.put(new INDArrayIndex[] {interval(0,numOuts),interval(0,numHidden)},insert);
        Nd4j.getBlasWrapper().level1().scal(ret.length(), scalingForInit, ret);
        return ret;
    }

    /**
     *
     * Shut down this network actor
     */
    public void shutdown() {
        rnTnActorSystem.shutdown();
    }

    /**
     * Trains the network on this mini batch and waits for the training set to complete
     * @param trainingBatch the trees to iterate on
     */
    public void fit(List trainingBatch) {
        final CountDownLatch c = new CountDownLatch(trainingBatch.size());

        List> futureBatch = fitAsync(trainingBatch);

        for(Future