All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.itc.irst.tcc.sre.util.ApproximateRandomizationProcedure Maven / Gradle / Ivy

/*
 * ApproximateRandomizationProcedure.java 1.0 01/06/2004
 *
 * Copyright 2002-2004 by  ITC-irst
 * via Sommarive 18 - Povo, 38050 Trento (I)
 * All rights reserved.
 *
 * This software is the confidential and proprietary information
 * of ITC-irst. ("Confidential Information").  You
 * shall not disclose such Confidential Information and shall use
 * it only in accordance with the terms of the license agreement
 * you entered into with ITC-irst.
 */
package org.itc.irst.tcc.sre.util;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.SortedMap;
import java.util.TreeMap;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * TO DO
 * 
 * @author Claudio Giuliano
 * @version %I%, %G%
 * @since 1.0
 */
public class ApproximateRandomizationProcedure {
    /**
     * Define a static logger variable so that it references the Logger instance
     * named ApproximateRandomizationProcedure.
     */
    static Logger logger = LoggerFactory
            .getLogger(ApproximateRandomizationProcedure.class.getName());

    //
    private int iterations;

    //
    private double confidence;

    //
    private DecimalFormat formatter = new DecimalFormat("###,###.####");

    /**
     * Creates a ApproximateRandomizationProcedure object.
     */
    public ApproximateRandomizationProcedure(File goldFile, File baselineFile,
            File preferredFile, int n, double c) throws IOException,
            IndexOutOfBoundsException {
        logger.debug("ApproximateRandomizationProcedure.ApproximateRandomizationProcedure: ");
        logger.debug("gold:" + goldFile);
        logger.debug("baseline:" + baselineFile);
        logger.debug("preferred:" + preferredFile);

        iterations = n;
        confidence = c;

        List gold = new Output(goldFile).getList();
        List baseline = new Output(baselineFile, goldFile).getList();
        List preferred = new Output(preferredFile, goldFile).getList();

        logger.debug("gold.size:" + gold.size());
        logger.debug("baseline.size:" + baseline.size());
        logger.debug("preferred.size:" + preferred.size());

        Evaluator[] baselineEval = evalAll(gold, baseline);
        Evaluator[] preferredEval = evalAll(gold, preferred);

        logger.debug("baseline.length:" + baselineEval.length);
        logger.debug("preferred.length:" + preferredEval.length);

        double t = test(baselineEval, preferredEval);

        if (t < confidence)
            logger.info("reject null hyphotesis (significant)");
        else
            logger.info("accept null hyphotesis (insignificant)");

    } // end constructor

    //
    private double test(Evaluator[] baselineEval, Evaluator[] preferredEval) {
        double bs = calculateScore(baselineEval);
        double ps = calculateScore(preferredEval);
        double d = Math.abs(ps - bs);
        double mean = 0;
        double variance = 0;
        double sum = 0;
        double ssum = 0;
        logger.info("original score bs, ps,d: " + formatter.format(bs * 100)
                + "%, " + formatter.format(ps * 100) + "%, "
                + formatter.format(d * 100) + "%");

        // p - p-value. In general, the lowest the p-value,
        // the less probable it is that that the null
        // hypothesis holds. That is, the two systems are
        // are significantly different.

        double p = 0;

        // c - number of times that the pseudostatistic is
        // greater or equal to the true statistic
        int c = 0;
        for (int i = 0; i < iterations; i++) {
            Evaluator[] baselineEvalCopy = new Evaluator[baselineEval.length];
            Evaluator[] preferredEvalCopy = new Evaluator[preferredEval.length];

            System.arraycopy(baselineEval, 0, baselineEvalCopy, 0,
                    baselineEval.length);
            System.arraycopy(preferredEval, 0, preferredEvalCopy, 0,
                    preferredEvalCopy.length);

            swap(baselineEvalCopy, preferredEvalCopy, new Random(i * 27));
            bs = calculateScore(baselineEvalCopy);
            ps = calculateScore(preferredEvalCopy);

            double di = Math.abs(ps - bs);
            sum += di;
            ssum += Math.pow(di, 2);
            // logger.debug("score at " + i + " bs, ps,d: " +
            // formatter.format(bs) + ", " + formatter.format(ps) + ", " +
            // formatter.format(di) + ", (" + formatter.format(d) + ")");

            if (di >= d)
                c++;

        } // end for i

        mean = sum / iterations;
        variance = (iterations * ssum - Math.pow(sum, 2)) / iterations
                * (iterations - 1);

        p = (double) (c + 1) / (iterations + 1);
        logger.info("mean " + mean + ", " + Math.sqrt(variance));
        logger.info(p + " = (" + c + " + 1) / (" + iterations + " +  1)");

        return p;
    } // end test

    //
    private double calculateScore(Evaluator[] m) {
        // logger.info("calculateScore");

        int obj = 0, ans = 0, tp = 0, fp = 0, fn = 0;

        for (int i = 0; i < m.length; i++) {
            // obj += m[i].getTotal();
            // ans += m[i].getAnswers();
            tp += m[i].getTP();
            fp += m[i].getFP();
            fn += m[i].getFN();
        } // end for i

        // precision
        double p = (double) tp / (tp + fp);

        // recall
        double r = (double) tp / (tp + fn);

        // f1
        double f1 = (2 * p * r) / (p + r);

        // logger.info(obj + " " + ans + " " + tp + " " + fp + " " + fn + " " +
        // r + " " + p + " " + f1);
        return f1;
    } // end calculateScore

    //
    private void swap(Evaluator[] y, Evaluator[] z, Random rdm) {
        // logger.info("swap");
        int count = 0;
        for (int i = 0; i < y.length; i++) {
            double p = rdm.nextDouble();
            if (p < 0.5) {
                Evaluator t = y[i];
                y[i] = z[i];
                z[i] = t;
                count++;
            }
        } // end for i

        // logger.info("swapped " + count + " out of " + y.length);
    } // end swap

    //
    private Evaluator[] evalAll(List gold, List other)
            throws IndexOutOfBoundsException {
        // logger.info("evalAll");
        Evaluator[] eval = new Evaluator[gold.size()];
        for (int i = 0; i < gold.size(); i++) {
            List x = (List) gold.get(i);
            List y = (List) other.get(i);
            eval[i] = new Evaluator(x, y);
            // logger.info(i + " " + eval[i]);
        } // end for i

        return eval;
    } // end evalAll

    //
    public static void main(String args[]) throws Exception {
        long begin, end;

        begin = System.currentTimeMillis();

        if (args.length < 5) {
            System.err
                    .println("java -mx512M org.itc.irst.tcc.sre.util.ApproximateRandomizationProcedure gold baseline preferred iterations confidence-level");
            System.exit(0);
        }

        String g = args[0];
        String b = args[1];
        String p = args[2];
        int n = Integer.parseInt(args[3]);
        double c = Double.parseDouble(args[4]);

        new ApproximateRandomizationProcedure(new File(g), new File(b),
                new File(p), n, c);

        //
        end = System.currentTimeMillis();
        logger.info("evaluation done in " + (end - begin) + " ms");

    } // end main
} // end class ApproximateRandomizationProcedure

//
class Output {
    /**
     * Define a static logger variable so that it references the Logger instance
     * named Output.
     */
    static Logger logger = LoggerFactory.getLogger(Output.class.getName());

    //
    private SortedMap map;

    //
    Output(File ans) throws IOException {
        map = new TreeMap();
        read(ans);
    } // end constructor

    //
    Output(File ans, File ref) throws IOException {
        map = new TreeMap();
        read(ans, ref);
    } // end constructor

    //
    private void read(File ans) throws IOException {
        LineNumberReader lr = new LineNumberReader(new FileReader(ans));
        String line = null;
        while ((line = lr.readLine()) != null) {
            String[] s = line.split("\t");
            String id = s[1].substring(0, s[1].indexOf('-'));
            put(new Integer(id), new Double(s[0]));
        } // end while

        lr.close();
    } // end read

    //
    private void read(File ans, File ref) throws IOException {
        LineNumberReader ar = new LineNumberReader(new FileReader(ans));
        LineNumberReader rr = new LineNumberReader(new FileReader(ref));

        String a = null, r = null;
        while (((a = ar.readLine()) != null) && ((r = rr.readLine()) != null)) {
            // logger.debug("a: " + a);
            // logger.debug("r: " + r);
            String[] s = r.split("\t");
            // logger.debug("s[1]: " + s[1]);
            String id = s[1].substring(0, s[1].indexOf('-'));
            // logger.debug("id: " + id);
            put(new Integer(id), new Double(a.trim()));
        } // end while

        ar.close();
        rr.close();
    } // end read

    //
    private void put(Integer sentID, Double pred) {
        List list = (List) map.get(sentID);
        if (list == null) {
            list = new ArrayList();
        }

        list.add(pred);
        map.put(sentID, list);
    } // end put

    //
    public List getList() {
        List result = new ArrayList();
        Iterator it = map.keySet().iterator();
        while (it.hasNext()) {
            Integer id = (Integer) it.next();
            List list = (List) map.get(id);
            // logger.debug(id + " : " + list.size());
            result.add(list);
        }

        return result;
    } // end getList

} // end class Output




© 2015 - 2025 Weber Informatics LLC | Privacy Policy