All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.forester.application.goac Maven / Gradle / Ivy

Go to download

Applications and software libraries for evolutionary biology and comparative genomics research

There is a newer version: 1.039
Show newest version
// $Id:
// FORESTER -- software libraries and applications
// for evolutionary biology research and applications.
//
// Copyright (C) 2009 Christian M. Zmasek
// Copyright (C) 2009 Burnham Institute for Medical Research
// All rights reserved
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
// WWW: https://sites.google.com/site/cmzmasek/home/software/forester

package org.forester.application;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;

import org.forester.go.GoId;
import org.forester.go.GoTerm;
import org.forester.go.GoUtils;
import org.forester.go.OBOparser;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.CommandLineArguments;
import org.forester.util.DescriptiveStatistics;
import org.forester.util.ForesterUtil;
import org.forester.util.GeneralTable;

public class goac {

    private static final String ALL           = "{ALL}";
    final static private String HELP_OPTION_1 = "help";
    final static private String HELP_OPTION_2 = "h";
    final static private String PRG_NAME      = "goac";
    final static private String PRG_VERSION   = "1.03";
    final static private String PRG_DATE      = "2010.04.21";
    final static private String E_MAIL        = "[email protected]";
    final static private String WWW           = "www.phylosoft.org/forester/goac";

    private static void addStats( final SortedMap> data_to_be_analyzed,
                                  final GeneralTable table ) {
        for( final String go : table.getColumnIdentifiers() ) {
            final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
            for( final String label : data_to_be_analyzed.keySet() ) {
                if ( !label.equals( ALL ) ) {
                    final Double value = table.getValue( go, label );
                    stats.addValue( value == null ? 0.0 : value );
                }
            }
            table.setValue( go, "{AVG}", stats.arithmeticMean() );
            table.setValue( go, "{SUM}", stats.getSum() );
            table.setValue( go, "{MED}", stats.median() );
            if ( stats.getN() > 1 ) {
                table.setValue( go, "{SD}", stats.sampleStandardDeviation() );
            }
            else {
                table.setValue( go, "{SD}", new Double( 0 ) );
            }
            table.setValue( go, "{MIN}", stats.getMin() );
            table.setValue( go, "{MAX}", stats.getMax() );
        }
    }

    public static void main( final String args[] ) {
        CommandLineArguments cla = null;
        try {
            cla = new CommandLineArguments( args );
        }
        catch ( final Exception e ) {
            ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
        }
        if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length == 0 ) ) {
            printHelp();
            System.exit( 0 );
        }
        final List allowed_options = new ArrayList();
        if ( cla.getNumberOfNames() != 3 ) {
            printHelp();
            System.exit( -1 );
        }
        final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
        if ( dissallowed_options.length() > 0 ) {
            ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
        }
        final File obofile = cla.getFile( 0 );
        final File query_superterms_file = cla.getFile( 1 );
        final File exp_file = cla.getFile( 2 );
        final OBOparser parser = new OBOparser( obofile, OBOparser.ReturnType.BASIC_GO_TERM );
        List all_go_terms = null;
        try {
            all_go_terms = parser.parse();
        }
        catch ( final IOException e ) {
            ForesterUtil.fatalError( PRG_NAME, e.toString() );
        }
        final Map goid_to_term_map = GoUtils.createGoIdToGoTermMap( all_go_terms );
        final List query_superterms_ids = new ArrayList();
        SortedMap> query_superterms_id_raw = null;
        try {
            query_superterms_id_raw = GoUtils.parseGoIds( query_superterms_file, "#", "" );
        }
        catch ( final IOException e ) {
            ForesterUtil.printErrorMessage( PRG_NAME, e.getMessage() );
            System.exit( -1 );
        }
        final List queries = query_superterms_id_raw.get( "" );
        for( final GoId id : queries ) {
            if ( !goid_to_term_map.containsKey( id ) ) {
                ForesterUtil.printErrorMessage( PRG_NAME, "\"" + id + "\" not present in \"" + obofile + "\"" );
                System.exit( -1 );
            }
            query_superterms_ids.add( id );
        }
        SortedMap> data_to_be_analyzed = null;
        try {
            data_to_be_analyzed = GoUtils.parseGoIds( exp_file, "#", ">" );
        }
        catch ( final IOException e ) {
            ForesterUtil.printErrorMessage( PRG_NAME, e.getMessage() );
            System.exit( -1 );
        }
        final List all_ids = new ArrayList();
        for( final String label : data_to_be_analyzed.keySet() ) {
            final List experiment_set_ids = data_to_be_analyzed.get( label );
            for( final GoId go_id : experiment_set_ids ) {
                if ( !goid_to_term_map.containsKey( go_id ) ) {
                    ForesterUtil.printErrorMessage( PRG_NAME, "GO id [" + go_id + "] not found in GO id to term map" );
                    System.exit( -1 );
                }
                all_ids.add( go_id );
            }
        }
        if ( data_to_be_analyzed.size() > 1 ) {
            data_to_be_analyzed.put( ALL, all_ids );
        }
        final GeneralTable table_counts = new GeneralTable();
        final GeneralTable table_percentage = new GeneralTable();
        for( final String label : data_to_be_analyzed.keySet() ) {
            System.out.println();
            System.out.println( label + "\t\t\t\t" );
            final List experiment_set_ids = data_to_be_analyzed.get( label );
            Map counts_id = null;
            try {
                counts_id = GoUtils.countCategoriesId( query_superterms_ids, experiment_set_ids, goid_to_term_map );
            }
            catch ( final Exception e ) {
                ForesterUtil.printErrorMessage( PRG_NAME, e.getMessage() );
                System.exit( -1 );
            }
            int sum = 0;
            for( final GoId id : counts_id.keySet() ) {
                sum += counts_id.get( id );
            }
            if ( sum > 0 ) {
                table_counts.setValue( "{total}", label, ( double ) sum );
            }
            for( final GoId id : counts_id.keySet() ) {
                final int counts = counts_id.get( id );
                double percentage = 0.0;
                if ( sum > 0 ) {
                    percentage = ( 100.0 * counts ) / ( sum );
                }
                System.out.println( counts + "\t" + counts + "/" + sum + "\t" + percentage + "\t" + id + "\t"
                        + goid_to_term_map.get( id ).getName() );
                table_counts.setValue( goid_to_term_map.get( id ).getName(), label, ( double ) counts );
                table_percentage.setValue( goid_to_term_map.get( id ).getName(), label, percentage );
            }
        }
        addStats( data_to_be_analyzed, table_counts );
        addStats( data_to_be_analyzed, table_percentage );
        System.out.println();
        System.out.println();
        System.out.println();
        System.out.println( table_counts.toString( ForesterUtil.FORMATTER_3 ) );
        System.out.println();
        System.out.println();
        System.out.println();
        System.out.println( table_percentage.toString( ForesterUtil.FORMATTER_3 ) );
        System.out.println();
    }

    private static void printHelp() {
        ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
        System.out.println( "Usage:" );
        System.out.println();
        System.out
        .println( PRG_NAME
                  + "   " );
        System.out.println();
        System.out.println();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy