smile.sequence.package.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of smile-scala_2.13 Show documentation
smile-scala
The newest version!
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile

import java.util.function.{Function, ToIntFunction}
import smile.data.Tuple
import smile.util.time

/** Sequence labeling algorithms.
  *
  * @author Haifeng Li
  */
package object sequence {
  /** First-order Hidden Markov Model. A hidden Markov model (HMM) is a statistical
    * Markov model in which the system being modeled is assumed to be a Markov
    * process with unobserved (hidden) states. An HMM can be considered as the
    * simplest dynamic Bayesian network.  In a regular Markov model, the state
    * is directly visible to the observer, and therefore the state transition
    * probabilities are the only parameters. In a hidden Markov model, the state is
    * not directly visible, but output, dependent on the state, is visible. Each
    * state has a probability distribution over the possible output tokens.
    * Therefore the sequence of tokens generated by an HMM gives some information
    * about the sequence of states.
    *
    * @param observations the observation sequences, of which symbols take
    *                     values in [0, n), where n is the number of unique symbols.
    * @param labels the state labels of observations, of which states take
    *               values in [0, p), where p is the number of hidden states.
    */
  def hmm(observations: Array[Array[Int]], labels: Array[Array[Int]]): HMM = time("Hidden Markov Model") {
    HMM.fit(observations, labels)
  }

  /** Trains a first-order Hidden Markov Model.
    *
    * @param observations the observation sequences, of which symbols take
    *                     values in [0, n), where n is the number of unique symbols.
    * @param labels the state labels of observations, of which states take
    *               values in [0, p), where p is the number of hidden states.
    */
  def hmm[T <: AnyRef](observations: Array[Array[T]], labels: Array[Array[Int]], ordinal: ToIntFunction[T]): HMMLabeler[T] = time("Hidden Markov Model") {
    HMMLabeler.fit(observations, labels, ordinal)
  }

  /** First-order linear conditional random field. A conditional random field is a
    * type of discriminative undirected probabilistic graphical model. It is most
    * often used for labeling or parsing of sequential data.
    *
    * A CRF is a Markov random field that was trained discriminatively. Therefore it is not necessary
    * to model the distribution over always observed variables, which makes it
    * possible to include arbitrarily complicated features of the observed
    * variables into the model.
    *
    * ====References:====
    *  - J. Lafferty, A. McCallum and F. Pereira. Conditional random fields: Probabilistic models for segmenting and labeling sequence data. ICML, 2001.
    *  - Thomas G. Dietterich, Guohua Hao, and Adam Ashenfelter. Gradient Tree Boosting for Training Conditional Random Fields. JMLR, 2008.
    *
    * @param sequences the observation attribute sequences.
    * @param labels sequence labels.
    * @param ntrees the number of trees/iterations.
    * @param maxDepth the maximum depth of the tree.
    * @param maxNodes the maximum number of leaf nodes in the tree.
    * @param nodeSize the number of instances in a node below which the tree will
    * not split, setting nodeSize = 5 generally gives good results.
    * @param shrinkage the shrinkage parameter in (0, 1] controls the learning rate of procedure.
    */
  def crf(sequences: Array[Array[Tuple]], labels: Array[Array[Int]], ntrees: Int = 100, maxDepth: Int = 20, maxNodes: Int = 100, nodeSize: Int = 5, shrinkage: Double = 1.0): CRF = time("CRF") {
    CRF.fit(sequences, labels, ntrees, maxDepth, maxNodes, nodeSize, shrinkage)
  }

  /** First-order linear conditional random field. A conditional random field is a
    * type of discriminative undirected probabilistic graphical model. It is most
    * often used for labeling or parsing of sequential data.
    *
    * A CRF is a Markov random field that was trained discriminatively. Therefore it is not necessary
    * to model the distribution over always observed variables, which makes it
    * possible to include arbitrarily complicated features of the observed
    * variables into the model.
    *
    * ====References:====
    *  - J. Lafferty, A. McCallum and F. Pereira. Conditional random fields: Probabilistic models for segmenting and labeling sequence data. ICML, 2001.
    *  - Thomas G. Dietterich, Guohua Hao, and Adam Ashenfelter. Gradient Tree Boosting for Training Conditional Random Fields. JMLR, 2008.
    *
    * @param sequences the observation attribute sequences.
    * @param labels sequence labels.
    * @param ntrees the number of trees/iterations.
    * @param maxDepth the maximum depth of the tree.
    * @param maxNodes the maximum number of leaf nodes in the tree.
    * @param nodeSize the number of instances in a node below which the tree will
    * not split, setting nodeSize = 5 generally gives good results.
    * @param shrinkage the shrinkage parameter in (0, 1] controls the learning rate of procedure.
    */
  def gcrf[T <: AnyRef](sequences: Array[Array[T]], labels: Array[Array[Int]], features: Function[T, Tuple], ntrees: Int = 100, maxDepth: Int = 20, maxNodes: Int = 100, nodeSize: Int = 5, shrinkage: Double = 1.0): CRFLabeler[T] = time("CRF") {
    CRFLabeler.fit(sequences, labels, features, ntrees, maxDepth, maxNodes, nodeSize, shrinkage)
  }

  /** Hacking scaladoc [[https://github.com/scala/bug/issues/8124 issue-8124]].
    * The user should ignore this object. */
  object $dummy
}