All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.models.embeddings.reader.impl.FlatModelUtils Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
package org.deeplearning4j.models.embeddings.reader.impl;

import org.nd4j.linalg.primitives.Counter;
import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.ops.transforms.Transforms;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collection;

/**
 * This model reader is suited for model tests, and for cases where flat scan against elements is required.
 *
 * PLEASE NOTE: This reader does NOT normalize underlying weights, it stays intact
 *
 * @author [email protected]
 */
public class FlatModelUtils extends BasicModelUtils {
    private static final Logger log = LoggerFactory.getLogger(FlatModelUtils.class);

    public FlatModelUtils() {

    }

    /**
     * This method does full scan against whole vocabulary, building descending list of similar words
     * @param label
     * @param n
     * @return
     */
    @Override
    public Collection wordsNearest(String label, int n) {
        Collection collection = wordsNearest(lookupTable.vector(label), n);
        if (collection.contains(label))
            collection.remove(label);
        return collection;
    }

    /**
     * This method does full scan against whole vocabulary, building descending list of similar words
     *
     * @param words
     * @param top
     * @return the words nearest the mean of the words
     */
    @Override
    public Collection wordsNearest(INDArray words, int top) {
        Counter distances = new Counter<>();

        for (String s : vocabCache.words()) {
            INDArray otherVec = lookupTable.vector(s);
            double sim = Transforms.cosineSim(Transforms.unitVec(words.dup()), Transforms.unitVec(otherVec.dup()));
            distances.incrementCount(s, (float) sim);
        }

        distances.keepTopNElements(top);
        return distances.keySetSorted();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy