org.deeplearning4j.models.embeddings.reader.impl.FlatModelUtils Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of deeplearning4j-nlp Show documentation

There is a newer version: 1.0.0-M2.1

package org.deeplearning4j.models.embeddings.reader.impl;

import org.deeplearning4j.berkeley.Counter;
import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.ops.transforms.Transforms;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collection;

/**
 * This model reader is suited for model tests, and for cases where flat scan against elements is required.
 *
 * PLEASE NOTE: This reader does NOT normalize underlying weights, it stays intact
 *
 * @author [email protected]
 */
public class FlatModelUtils extends BasicModelUtils {
    private static final Logger log = LoggerFactory.getLogger(FlatModelUtils.class);

    public FlatModelUtils() {

    }

    /**
     * This method does full scan against whole vocabulary, building descending list of similar words
     * @param label
     * @param n
     * @return
     */
    @Override
    public Collection wordsNearest(String label, int n) {
        Collection collection = wordsNearest(lookupTable.vector(label), n);
        if (collection.contains(label))
            collection.remove(label);
        return collection;
    }

    /**
     * This method does full scan against whole vocabulary, building descending list of similar words
     *
     * @param words
     * @param top
     * @return the words nearest the mean of the words
     */
    @Override
    public Collection wordsNearest(INDArray words, int top) {
        Counter distances = new Counter<>();

        for (String s : vocabCache.words()) {
            INDArray otherVec = lookupTable.vector(s);
            double sim = Transforms.cosineSim(words.dup(), otherVec.dup());
            distances.incrementCount(s, sim);
        }

        distances.keepTopNKeys(top);
        return distances.getSortedKeys();
    }
}