org.deeplearning4j.models.embeddings.wordvectors.WordVectors Maven / Gradle / Ivy
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.models.embeddings.wordvectors;
import org.deeplearning4j.models.embeddings.WeightLookupTable;
import org.deeplearning4j.models.embeddings.reader.ModelUtils;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer;
import org.nd4j.linalg.api.ndarray.INDArray;
import java.io.Serializable;
import java.util.Collection;
import java.util.List;
import java.util.Map;
/**
* Word vectors. Handles operations based on the lookup table
* and vocab.
*
* @author Adam Gibson
*/
public interface WordVectors extends Serializable, EmbeddingInitializer {
String getUNK();
void setUNK(String newUNK);
/**
* Returns true if the model has this word in the vocab
* @param word the word to test for
* @return true if the model has the word in the vocab
*/
boolean hasWord(String word);
Collection wordsNearest(INDArray words, int top);
Collection wordsNearestSum(INDArray words, int top);
/**
* Get the top n words most similar to the given word
* @param word the word to compare
* @param n the n to get
* @return the top n words
*/
Collection wordsNearestSum(String word, int n);
/**
* Words nearest based on positive and negative words
* @param positive the positive words
* @param negative the negative words
* @param top the top n words
* @return the words nearest the mean of the words
*/
Collection wordsNearestSum(Collection positive, Collection negative, int top);
/**
* Accuracy based on questions which are a space separated list of strings
* where the first word is the query word, the next 2 words are negative,
* and the last word is the predicted word to be nearest
* @param questions the questions to ask
* @return the accuracy based on these questions
*/
Map accuracy(List questions);
int indexOf(String word);
/**
* Find all words with a similar characters
* in the vocab
* @param word the word to compare
* @param accuracy the accuracy: 0 to 1
* @return the list of words that are similar in the vocab
*/
List similarWordsInVocabTo(String word, double accuracy);
/**
* Get the word vector for a given matrix
* @param word the word to get the matrix for
* @return the ndarray for this word
*/
double[] getWordVector(String word);
/**
* Returns the word vector divided by the norm2 of the array
* @param word the word to get the matrix for
* @return the looked up matrix
*/
INDArray getWordVectorMatrixNormalized(String word);
/**
* Get the word vector for a given matrix
* @param word the word to get the matrix for
* @return the ndarray for this word
*/
INDArray getWordVectorMatrix(String word);
/**
* This method returns 2D array, where each row represents corresponding word/label
*
* @param labels
* @return
*/
INDArray getWordVectors(Collection labels);
/**
* This method returns mean vector, built from words/labels passed in
*
* @param labels
* @return
*/
INDArray getWordVectorsMean(Collection labels);
/**
* Words nearest based on positive and negative words
* @param positive the positive words
* @param negative the negative words
* @param top the top n words
* @return the words nearest the mean of the words
*/
Collection wordsNearest(Collection positive, Collection negative, int top);
/**
* Get the top n words most similar to the given word
* @param word the word to compare
* @param n the n to get
* @return the top n words
*/
Collection wordsNearest(String word, int n);
/**
* Returns the similarity of 2 words
* @param word the first word
* @param word2 the second word
* @return a normalized similarity (cosine similarity)
*/
double similarity(String word, String word2);
/**
* Vocab for the vectors
* @return
*/
VocabCache vocab();
/**
* Lookup table for the vectors
* @return
*/
WeightLookupTable lookupTable();
/**
* Specifies ModelUtils to be used to access model
* @param utils
*/
void setModelUtils(ModelUtils utils);
/**
* Does implementation vectorize words absent in vocabulary
* @return boolean
*/
boolean outOfVocabularySupported();
}