smile.sequence.CRF Maven / Gradle / Ivy
The newest version!
/*******************************************************************************
* Copyright (c) 2010 Haifeng Li
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package smile.sequence;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import smile.data.Attribute;
import smile.data.NominalAttribute;
import smile.data.NumericAttribute;
import smile.math.Math;
import smile.regression.RegressionTree;
import smile.sort.QuickSort;
import smile.util.MulticoreExecutor;
/**
* First-order linear conditional random field. A conditional random field is a
* type of discriminative undirected probabilistic graphical model. It is most
* often used for labeling or parsing of sequential data. A CRF is a Markov
* random field that was trained discriminatively. Therefore it is not necessary
* to model the distribution over always observed variables, which makes it
* possible to include arbitrarily complicated features of the observed
* variables into the model.
*
*
References
*
* - J. Lafferty, A. McCallum and F. Pereira.
* Conditional random fields: Probabilistic models for segmenting and labeling
* sequence data. ICML, 2001.
* - Thomas G. Dietterich, Guohua Hao, and
* Adam Ashenfelter. Gradient Tree Boosting for Training Conditional Random
* Fields. JMLR, 2008.
*
*
* @author Haifeng Li
*/
public class CRF implements SequenceLabeler {
/**
* The number of classes.
*/
private int numClasses;
/**
* The number of sparse binary features.
*/
private int numFeatures = -1;
/**
* The potential functions for each class.
*/
private TreePotentialFunction[] potentials;
/**
* True if using Viterbi algorithm for sequence labeling.
*/
private boolean viterbi = false;
/**
* Returns a feature set with the class label of previous position.
*
* @param features the indices of the nonzero features.
* @param label the class label of previous position as a feature.
*/
public double[] featureset(double[] features, int label) {
double[] fs = new double[features.length + 1];
System.arraycopy(features, 0, fs, 0, features.length);
fs[features.length] = label;
return fs;
}
/**
* Returns a feature set with the class label of previous position.
*
* @param features the indices of the nonzero features.
* @param label the class label of previous position as a feature.
*/
public int[] featureset(int[] features, int label) {
int[] fs = new int[features.length + 1];
System.arraycopy(features, 0, fs, 0, features.length);
fs[features.length] = numFeatures + label;
return fs;
}
/**
* Regression tree based potential function.
*/
class TreePotentialFunction {
/**
* Constructor.
*
* @param eta the learning rate for each tree when calculate potential
* function.
*/
public TreePotentialFunction(double eta) {
this.eta = eta;
}
/**
* Computes potential function score without exp.
*/
public double f(double[] features) {
double score = 0.0;
for (RegressionTree tree : trees) {
score += eta * tree.predict(features);
}
return score;
}
/**
* Computes potential function score without exp.
*/
public double f(int[] features) {
double score = 0.0;
for (RegressionTree tree : trees) {
score += eta * tree.predict(features);
}
return score;
}
/*
* Add another tree for the potential function.
*/
public void add(RegressionTree tree) {
trees.add(tree);
}
/**
* Scale parameter in boosting.
*/
private double eta;
/**
* Gradient regression tree boosting.
*/
private List trees = new ArrayList();
}
/**
* Dynamic programming table entry in forward-backward algorithm.
*/
class TrellisNode {
/**
* Forward variable.
*/
double alpha = 1.0;
/**
* Backward variable.
*/
double beta = 1.0;
/**
* Conditional samples.
*/
double[][] samples;
/**
* Conditional samples.
*/
int[][] sparseSamples;
/**
* Residual of conditional samples as regression tree training target.
*/
double[] target = new double[numClasses];
/**
* Potential function values scores[k] = F(k, X)
*/
double[] scores = new double[numClasses];
/**
* Exp of potential function values
*/
double[] expScores = new double[numClasses];
/**
* The generation of cached score.
*/
int age = 0;
TrellisNode(boolean sparse) {
if (sparse) {
sparseSamples = new int[numClasses][];
} else {
samples = new double[numClasses][];
}
}
}
/**
* Constructor.
*
* @param numClasses the number of classes.
* @param eta the learning rate of potential function.
*/
private CRF(int numClasses, double eta) {
this.numClasses = numClasses;
potentials = new TreePotentialFunction[numClasses];
for (int i = 0; i < numClasses; i++) {
potentials[i] = new TreePotentialFunction(eta);
}
}
/**
* Constructor.
*
* @param numFeatures the number of sparse binary features.
* @param numClasses the number of classes.
* @param eta the learning rate of potential function.
*/
private CRF(int numFeatures, int numClasses, double eta) {
this.numFeatures = numFeatures;
this.numClasses = numClasses;
potentials = new TreePotentialFunction[numClasses];
for (int i = 0; i < numClasses; i++) {
potentials[i] = new TreePotentialFunction(eta);
}
}
/**
* Returns true if using Viterbi algorithm for sequence labeling.
*/
public boolean isViterbi() {
return viterbi;
}
/**
* Sets if using Viterbi algorithm for sequence labeling. Viterbi algorithm
* returns the whole sequence label that has the maximum probability, which
* makes sense in applications (e.g.part-of-speech tagging) that require
* coherent sequential labeling. The forward-backward algorithm labels a
* sequence by individual prediction on each position. This usually produces
* better accuracy although the results may not be coherent.
*/
public CRF setViterbi(boolean viterbi) {
this.viterbi = viterbi;
return this;
}
@Override
public int[] predict(double[][] x) {
if (viterbi) {
return predictViterbi(x);
} else {
return predictForwardBackward(x);
}
}
public int[] predict(int[][] x) {
if (viterbi) {
return predictViterbi(x);
} else {
return predictForwardBackward(x);
}
}
/**
* Returns the most likely label sequence given the feature sequence by the
* forward-backward algorithm.
*
* @param x a sequence of sparse features taking values in [0, p) about each
* position of original sequence, where p is the number of features.
* @return the most likely label sequence.
*/
private int[] predictForwardBackward(double[][] x) {
int n = x.length; // length of sequence
TrellisNode[][] trellis = getTrellis(x);
double[] scaling = new double[n];
forward(trellis, scaling);
backward(trellis);
int[] label = new int[n];
double[] p = new double[numClasses];
for (int i = 0; i < n; i++) {
for (int j = 0; j < numClasses; j++) {
p[j] = trellis[i][j].alpha * trellis[i][j].beta;
}
double max = Double.NEGATIVE_INFINITY;
for (int j = 0; j < numClasses; j++) {
if (max < p[j]) {
max = p[j];
label[i] = j;
}
}
}
return label;
}
/**
* Returns the most likely label sequence given the feature sequence by the
* Viterbi algorithm.
*
* @param x a sequence of sparse features taking values in [0, p) about each
* position of original sequence, where p is the number of features.
* @return the most likely label sequence.
*/
private int[] predictViterbi(double[][] x) {
int n = x.length;
double[][] trellis = new double[n][numClasses];
int[][] psy = new int[n][numClasses];
int p = x[0].length; // dimension
// forward
double[] features = featureset(x[0], numClasses);
for (int j = 0; j < numClasses; j++) {
trellis[0][j] = potentials[j].f(features);
psy[0][j] = 0;
}
for (int t = 1; t < n; t++) {
System.arraycopy(x[t], 0, features, 0, p);
for (int i = 0; i < numClasses; i++) {
double max = Double.NEGATIVE_INFINITY;
int maxPsy = 0;
for (int j = 0; j < numClasses; j++) {
features[p] = j;
double delta = potentials[i].f(features) + trellis[t - 1][j];
if (max < delta) {
max = delta;
maxPsy = j;
}
}
trellis[t][i] = max;
psy[t][i] = maxPsy;
}
}
// trace back
int[] label = new int[n];
double max = Double.NEGATIVE_INFINITY;
for (int i = 0; i < numClasses; i++) {
if (max < trellis[n - 1][i]) {
max = trellis[n - 1][i];
label[n - 1] = i;
}
}
for (int t = n - 1; t-- > 0;) {
label[t] = psy[t + 1][label[t + 1]];
}
return label;
}
/**
* Returns the most likely label sequence given the feature sequence by the
* forward-backward algorithm.
*
* @param x a sequence of sparse features taking values in [0, p) about each
* position of original sequence, where p is the number of features.
* @return the most likely label sequence.
*/
private int[] predictForwardBackward(int[][] x) {
int n = x.length; // length of sequence
TrellisNode[][] trellis = getTrellis(x);
double[] scaling = new double[n];
forward(trellis, scaling);
backward(trellis);
int[] label = new int[n];
double[] p = new double[numClasses];
for (int i = 0; i < n; i++) {
for (int j = 0; j < numClasses; j++) {
p[j] = trellis[i][j].alpha * trellis[i][j].beta;
}
double max = Double.NEGATIVE_INFINITY;
for (int j = 0; j < numClasses; j++) {
if (max < p[j]) {
max = p[j];
label[i] = j;
}
}
}
return label;
}
/**
* Returns the most likely label sequence given the feature sequence by the
* Viterbi algorithm.
*
* @param x a sequence of sparse features taking values in [0, p) about each
* position of original sequence, where p is the number of features.
* @return the most likely label sequence.
*/
private int[] predictViterbi(int[][] x) {
int n = x.length;
double[][] trellis = new double[n][numClasses];
int[][] psy = new int[n][numClasses];
int p = x[0].length; // dimension
// forward
int[] features = featureset(x[0], numClasses);
for (int j = 0; j < numClasses; j++) {
trellis[0][j] = potentials[j].f(features);
psy[0][j] = 0;
}
for (int t = 1; t < n; t++) {
System.arraycopy(x[t], 0, features, 0, p);
for (int i = 0; i < numClasses; i++) {
double max = Double.NEGATIVE_INFINITY;
int maxPsy = 0;
for (int j = 0; j < numClasses; j++) {
features[p] = numFeatures + j;
double delta = potentials[i].f(features) + trellis[t - 1][j];
if (max < delta) {
max = delta;
maxPsy = j;
}
}
trellis[t][i] = max;
psy[t][i] = maxPsy;
}
}
// trace back
int[] label = new int[n];
double max = Double.NEGATIVE_INFINITY;
for (int i = 0; i < numClasses; i++) {
if (max < trellis[n - 1][i]) {
max = trellis[n - 1][i];
label[n - 1] = i;
}
}
for (int t = n - 1; t-- > 0;) {
label[t] = psy[t + 1][label[t + 1]];
}
return label;
}
/**
* Trainer for CRF.
*/
public static class Trainer {
/**
* The number of classes.
*/
private int numClasses;
/**
* The number of sparse binary features.
*/
private int numFeatures = -1;
/**
* The feature attributes.
*/
private Attribute[] attributes;
/**
* The maximum number of leaf nodes in the tree.
*/
private int maxLeaves = 100;
/**
* The learning rate of potential function.
*/
private double eta = 1.0;
/**
* The number of iterations.
*/
private int iters = 100;
/**
* Constructor.
*
* @param numClasses the maximum number of classes.
*/
public Trainer(Attribute[] attributes, int numClasses) {
if (numClasses < 2) {
throw new IllegalArgumentException("Invalid number of classes: " + numClasses);
}
this.numClasses = numClasses;
this.attributes = new Attribute[attributes.length + 1];
System.arraycopy(attributes, 0, this.attributes, 0, attributes.length);
String[] values = new String[numClasses + 1];
for (int i = 0; i <= numClasses; i++) {
values[i] = Integer.toString(i);
}
this.attributes[attributes.length] = new NominalAttribute("Previous Position Label", values);
}
/**
* Constructor.
*
* @param numFeatures the number of sparse binary features.
* @param numClasses the maximum number of classes.
*/
public Trainer(int numFeatures, int numClasses) {
if (numFeatures < 2) {
throw new IllegalArgumentException("Invalid number of features: " + numClasses);
}
if (numClasses < 2) {
throw new IllegalArgumentException("Invalid number of classes: " + numClasses);
}
this.numFeatures = numFeatures;
this.numClasses = numClasses;
}
/**
* Sets the maximum number of leaf nodes in the tree.
*
* @param maxLeaves the maximum number of leaf nodes in the tree.
*/
public Trainer setMaximumLeafNodes(int maxLeaves) {
if (maxLeaves < 2) {
throw new IllegalArgumentException("Invalid number of leaf nodes: " + maxLeaves);
}
this.maxLeaves = maxLeaves;
return this;
}
public Trainer setLearningRate(double eta) {
if (eta <= 0.0) {
throw new IllegalArgumentException("Invalid learning rate: " + eta);
}
this.eta = eta;
return this;
}
public Trainer setNumIterations(int iters) {
if (iters < 1) {
throw new IllegalArgumentException("Invalid number of iterations: " + iters);
}
this.iters = iters;
return this;
}
public CRF train(double[][][] sequences, int[][] labels) {
CRF crf = new CRF(numClasses, eta);
double[][] scaling = new double[sequences.length][];
TrellisNode[][][] trellis = new TrellisNode[sequences.length][][];
for (int i = 0; i < sequences.length; i++) {
scaling[i] = new double[sequences[i].length];
trellis[i] = crf.getTrellis(sequences[i]);
}
List gradientTasks = new ArrayList();
for (int i = 0; i < sequences.length; i++) {
gradientTasks.add(new GradientTask(crf, trellis[i], scaling[i], labels[i]));
}
List boostingTasks = new ArrayList();
for (int i = 0; i < numClasses; i++) {
boostingTasks.add(new BoostingTask(crf.potentials[i], trellis, i));
}
for (int iter = 0; iter < iters; iter++) {
try {
MulticoreExecutor.run(gradientTasks);
MulticoreExecutor.run(boostingTasks);
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
return crf;
}
public CRF train(int[][][] sequences, int[][] labels) {
CRF crf = new CRF(numFeatures, numClasses, eta);
double[][] scaling = new double[sequences.length][];
TrellisNode[][][] trellis = new TrellisNode[sequences.length][][];
for (int i = 0; i < sequences.length; i++) {
scaling[i] = new double[sequences[i].length];
trellis[i] = crf.getTrellis(sequences[i]);
}
List gradientTasks = new ArrayList();
for (int i = 0; i < sequences.length; i++) {
gradientTasks.add(new GradientTask(crf, trellis[i], scaling[i], labels[i]));
}
List boostingTasks = new ArrayList();
for (int i = 0; i < numClasses; i++) {
boostingTasks.add(new BoostingTask(crf.potentials[i], trellis, i));
}
for (int iter = 0; iter < iters; iter++) {
try {
MulticoreExecutor.run(gradientTasks);
MulticoreExecutor.run(boostingTasks);
} catch (Exception e) {
System.err.println(e.getMessage());
}
}
return crf;
}
/**
* Calculate gradients with forward-backward algorithm.
*/
class GradientTask implements Callable
© 2015 - 2025 Weber Informatics LLC | Privacy Policy