Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.deeplearning4j.text.invertedindex;
import org.nd4j.shade.guava.base.Function;
import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement;
import org.nd4j.common.primitives.Pair;
import java.io.Serializable;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.Executor;
/**
* An inverted index for mapping words to documents
* and documents to words
*/
public interface InvertedIndex extends Serializable {
/**
* Iterate over batches
* @return the batch size
*/
Iterator>> batchIter(int batchSize);
/**
* Iterate over documents
* @return
*/
Iterator> docs();
/**
* Unlock the index
*/
void unlock();
/**
* Cleanup any resources used
*/
void cleanup();
/**
* Sampling for creating mini batches
* @return the sampling for mini batches
*/
double sample();
/**
* Iterates over mini batches
* @return the mini batches created by this vectorizer
*/
Iterator> miniBatches();
/**
* Returns a list of words for a document
* @param index
* @return
*/
List document(int index);
/**
* Returns a list of words for a document
* and the associated label
* @param index
* @return
*/
Pair, String> documentWithLabel(int index);
/**
* Returns a list of words associated with the document
* and the associated labels
* @param index
* @return
*/
Pair, Collection> documentWithLabels(int index);
/**
* Returns the list of documents a vocab word is in
* @param vocabWord the vocab word to get documents for
* @return the documents for a vocab word
*/
int[] documents(T vocabWord);
/**
* Returns the number of documents
* @return
*/
int numDocuments();
/**
* Returns a list of all documents
* @return the list of all documents
*/
int[] allDocs();
/**
* Add word to a document
* @param doc the document to add to
* @param word the word to add
*/
void addWordToDoc(int doc, T word);
/**
* Adds words to the given document
* @param doc the document to add to
* @param words the words to add
*/
void addWordsToDoc(int doc, List words);
/**
* Add word to a document
* @param doc the document to add to
* @param word the word to add
*/
void addLabelForDoc(int doc, T word);
/**
* Adds words to the given document
* @param doc the document to add to
*
*/
void addLabelForDoc(int doc, String label);
/**
* Adds words to the given document
* @param doc the document to add to
* @param words the words to add
* @param label the label for the document
*/
void addWordsToDoc(int doc, List words, String label);
/**
* Adds words to the given document
* @param doc the document to add to
* @param words the words to add
* @param label the label for the document
*/
void addWordsToDoc(int doc, List words, T label);
/**
* Add word to a document
* @param doc the document to add to
* @param word the word to add
*/
void addLabelsForDoc(int doc, List word);
/**
* Adds words to the given document
* @param doc the document to add to
* @param label the labels to add
*
*/
void addLabelsForDoc(int doc, Collection label);
/**
* Adds words to the given document
* @param doc the document to add to
* @param words the words to add
* @param label the label for the document
*/
void addWordsToDoc(int doc, List words, Collection label);
/**
* Adds words to the given document
* @param doc the document to add to
* @param words the words to add
* @param label the label for the document
*/
void addWordsToDocVocabWord(int doc, List words, Collection label);
/**
* Finishes saving data
*/
void finish();
/**
* Total number of words in the index
* @return the total number of words in the index
*/
long totalWords();
/**
* For word vectors, this is the batch size for which to train on
* @return the batch size for which to train on
*/
int batchSize();
/**
* Iterate over each document with a label
* @param func the function to apply
* @param exec executor service for execution
*/
void eachDocWithLabels(Function, Collection>, Void> func, Executor exec);
/**
* Iterate over each document with a label
* @param func the function to apply
* @param exec executor service for execution
*/
void eachDocWithLabel(Function, String>, Void> func, Executor exec);
/**
* Iterate over each document
* @param func the function to apply
* @param exec executor service for execution
*/
void eachDoc(Function, Void> func, Executor exec);
}