All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.datexis.encoder.IEncoder Maven / Gradle / Ivy

package de.datexis.encoder;

import de.datexis.model.Span;
import org.nd4j.linalg.api.ndarray.INDArray;

/**
 * An Encoder converts text (Span) to embedding vectors (INDArray).
 * E.g. word embeddings, bag-of-words
 * @author Sebastian Arnold 
 */
public interface IEncoder {
  
  /**
	 * Get the size of the embedding vector
	 * @return INDArray vector length
	 */
	public long getEmbeddingVectorSize();
  
  /**
	 * Generate a fixed-size vector of a String
	 * @param word
	 * @return Mx1 column vector (INDArray) containing the encoded String
	 */
	public abstract INDArray encode(String word);
  
  /**
	 * Generate a fixed-size vector of a single Span
   * @param span the Span to encode
	 * @return Mx1 column vector (INDArray) containing the encoded Span
	 */
	public abstract INDArray encode(Span span);
  
  /**
   * Encode a fixed-size vector from multiple Spans
   * @param spans the Spans to encode
   * @return Mx1 column vector (INDArray) containing all Spans combined (e.g. average)
   */
  public INDArray encode(Iterable spans);
  
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy