gate.plugin.learningframework.mallet.PipeScaleMeanVarAll Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of learningframework Show documentation
Show all versions of learningframework Show documentation
A GATE plugin that provides many different machine learning
algorithms for a wide range of NLP-related machine learning tasks like
text classification, tagging, or chunking.
/*
* Copyright (c) 2015-2016 The University Of Sheffield.
*
* This file is part of gateplugin-LearningFramework
* (see https://github.com/GateNLP/gateplugin-LearningFramework).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this software. If not, see .
*/
package gate.plugin.learningframework.mallet;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.Instance;
import gate.plugin.learningframework.mbstats.FVStatsMeanVarAll;
import gate.plugin.learningframework.mbstats.PerFeatureStats;
import gate.util.GateRuntimeException;
import java.io.Serializable;
import java.util.List;
/**
* Pipe for normalizing features so they have mean 1 and standard deviation 1.
*
* If a feature only has one value the variance is 0 so it is impossible to scale
* to variance 1.
*
*/
public class PipeScaleMeanVarAll extends Pipe implements Serializable {
protected double means[];
protected double variances[];
protected boolean normalize[];
/**
* Constructor from alphabet and stats.
* @param alphabet alphabet
* @param stats feature stats
*/
public PipeScaleMeanVarAll(Alphabet alphabet, FVStatsMeanVarAll stats) {
super(alphabet, null);
List pfss = stats.getStats();
int n = pfss.size();
means = new double[n];
variances = new double[n];
normalize = new boolean[n];
for(int i=0; i-->