All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.util.Univariate Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */




/** 
   @author Andrew McCallum [email protected]
 */

package cc.mallet.util;

import java.util.logging.*;

import cc.mallet.util.MalletLogger;

// Obtained from http://www.stat.vt.edu/~sundar/java/code/Univariate.html
// August 2002

/** * @(#)Univariate.java * * DAMAGE (c) 2000 by Sundar Dorai-Raj
  * * @author Sundar Dorai-Raj
  * * Email: [email protected]
  * * This program is free software; you can redistribute it and/or
  * * modify it under the terms of the GNU General Public License 
  * * as published by the Free Software Foundation; either version 2 
  * * of the License, or (at your option) any later version, 
  * * provided that any use properly credits the author. 
  * * This program is distributed in the hope that it will be useful,
  * * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  * * GNU General Public License for more details at http://www.gnu.org * * */

public class Univariate {
	private static Logger logger = MalletLogger.getLogger(Univariate.class.getName());
  private double[] x,sortx;
  private double[] summary=new double[6];
  private boolean isSorted=false;
  public double[] five=new double[5];
  private int n;
  private double mean,variance,stdev;
  private double median,min,Q1,Q3,max;

  public Univariate(double[] data) {
    x=(double[])data.clone();
    n=x.length;
    createSummaryStats();
  }

  private void createSummaryStats() {
    int i;
    mean=0;
    for(i=0;i1) variance=(sumxx-n*mean*mean)/(n-1);
    stdev=Math.sqrt(variance);
  }

  public double[] summary() {
    summary[0]=n;
    summary[1]=mean;
    summary[2]=variance;
    summary[3]=stdev;
    summary[4]=Math.sqrt(variance/n);
    summary[5]=mean/summary[4];
    return(summary);
  }


  public double mean() {
    return(mean);
  }

  public double variance() {
    return(variance);
  }

  public double stdev() {
    return(stdev);
  }

  public double SE() {
    return(Math.sqrt(variance/n));
  }

  public double max() {
    if(!isSorted) sortx=sort();
    return(sortx[n-1]);
  }

  public double min() {
    if(!isSorted) sortx=sort();
    return(sortx[0]);
  }
  
  public double median() {
    return(quant(0.50));
  }
    
  public double quant(double q) {
    if(!isSorted) sortx=sort();
    if (q > 1 || q < 0)
      return (0);
    else {
      double index=(n+1)*q;
      if (index-(int)index == 0)
        return sortx[(int)index - 1];
      else
        return q*sortx[(int)Math.floor(index)-1]+(1-q)*sortx[(int)Math.ceil(index)-1];
    }
  }

  public double[] sort() {
    sortx=(double[])x.clone();
    int incr=(int)(n*.5);
    while (incr >= 1) {
      for (int i=incr;i=incr && tempt) {
        index=i;
        found=true;
      }
    return(index);
  }

  public int[] between(double t1,double t2) {
    int[] indices=new int[2];
    indices[0]=compare(t1);
    indices[1]=compare(t2);
    return(indices);
  }

  public int indexOf(double element) {
    int index=-1;
    for(int i=0;i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy