morfologik.stemming.ISequenceEncoder Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of morfologik-stemming Show documentation

Morfologik Stemming APIs.

There is a newer version: 2.1.9

package morfologik.stemming;

import java.nio.ByteBuffer;

/**
 * The logic of encoding one sequence of bytes relative to another sequence of
 * bytes. The "base" form and the "derived" form are typically the stem of
 * a word and the inflected form of a word.
 * 
 * Derived form encoding helps in making the data for the automaton smaller
 * and more repetitive (which results in higher compression rates).
 * 
 * See example implementation for details.
 */
public interface ISequenceEncoder {
  /**
   * Encodes target relative to source,
   * optionally reusing the provided {@link ByteBuffer}.
   * 
   * @param reuse Reuses the provided {@link ByteBuffer} or allocates a new one if there is not enough remaining space. 
   * @param source The source byte sequence.
   * @param target The target byte sequence to encode relative to source 
   * @return Returns the {@link ByteBuffer} with encoded target.
   */
  public ByteBuffer encode(ByteBuffer reuse, ByteBuffer source, ByteBuffer target);

  /**
   * Decodes encoded relative to source,
   * optionally reusing the provided {@link ByteBuffer}.
   * 
   * @param reuse Reuses the provided {@link ByteBuffer} or allocates a new one if there is not enough remaining space. 
   * @param source The source byte sequence.
   * @param encoded The {@linkplain #encode previously encoded} byte sequence. 
   * @return Returns the {@link ByteBuffer} with decoded target.
   */
  public ByteBuffer decode(ByteBuffer reuse, ByteBuffer source, ByteBuffer encoded);

  /**
   * The number of encoded form's prefix bytes that should be ignored (needed for separator lookup). 
   * An ugly workaround for GH-85, should be fixed by prior knowledge of whether the dictionary contains tags;
   * then we can scan for separator right-to-left.
   * 
   * @see "https://github.com/morfologik/morfologik-stemming/issues/85"
   */
  @Deprecated
  public int prefixBytes();
}