cc.mallet.util.Univariate Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mallet Show documentation
Show all versions of mallet Show documentation
MALLET is a Java-based package for statistical natural language processing,
document classification, clustering, topic modeling, information extraction,
and other machine learning applications to text.
The newest version!
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Andrew McCallum [email protected]
*/
package cc.mallet.util;
import java.util.logging.*;
import cc.mallet.util.MalletLogger;
// Obtained from http://www.stat.vt.edu/~sundar/java/code/Univariate.html
// August 2002
/** * @(#)Univariate.java * * DAMAGE (c) 2000 by Sundar Dorai-Raj
* * @author Sundar Dorai-Raj
* * Email: [email protected]
* * This program is free software; you can redistribute it and/or
* * modify it under the terms of the GNU General Public License
* * as published by the Free Software Foundation; either version 2
* * of the License, or (at your option) any later version,
* * provided that any use properly credits the author.
* * This program is distributed in the hope that it will be useful,
* * but WITHOUT ANY WARRANTY; without even the implied warranty of
* * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* * GNU General Public License for more details at http://www.gnu.org * * */
public class Univariate {
private static Logger logger = MalletLogger.getLogger(Univariate.class.getName());
private double[] x,sortx;
private double[] summary=new double[6];
private boolean isSorted=false;
public double[] five=new double[5];
private int n;
private double mean,variance,stdev;
private double median,min,Q1,Q3,max;
public Univariate(double[] data) {
x=(double[])data.clone();
n=x.length;
createSummaryStats();
}
private void createSummaryStats() {
int i;
mean=0;
for(i=0;i1) variance=(sumxx-n*mean*mean)/(n-1);
stdev=Math.sqrt(variance);
}
public double[] summary() {
summary[0]=n;
summary[1]=mean;
summary[2]=variance;
summary[3]=stdev;
summary[4]=Math.sqrt(variance/n);
summary[5]=mean/summary[4];
return(summary);
}
public double mean() {
return(mean);
}
public double variance() {
return(variance);
}
public double stdev() {
return(stdev);
}
public double SE() {
return(Math.sqrt(variance/n));
}
public double max() {
if(!isSorted) sortx=sort();
return(sortx[n-1]);
}
public double min() {
if(!isSorted) sortx=sort();
return(sortx[0]);
}
public double median() {
return(quant(0.50));
}
public double quant(double q) {
if(!isSorted) sortx=sort();
if (q > 1 || q < 0)
return (0);
else {
double index=(n+1)*q;
if (index-(int)index == 0)
return sortx[(int)index - 1];
else
return q*sortx[(int)Math.floor(index)-1]+(1-q)*sortx[(int)Math.ceil(index)-1];
}
}
public double[] sort() {
sortx=(double[])x.clone();
int incr=(int)(n*.5);
while (incr >= 1) {
for (int i=incr;i=incr && tempt) {
index=i;
found=true;
}
return(index);
}
public int[] between(double t1,double t2) {
int[] indices=new int[2];
indices[0]=compare(t1);
indices[1]=compare(t2);
return(indices);
}
public int indexOf(double element) {
int index=-1;
for(int i=0;i