All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.bio.program.sax.HSPSummaryHelper Maven / Gradle / Ivy

There is a newer version: 1.9.7
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.bio.program.sax;

import java.util.HashMap;
import java.util.StringTokenizer;

import org.xml.sax.SAXException;


/**
 * A Helper class for parsing HSP summary sections...
 * e.g. things like
 *
 * Score =  326 bits (826), Expect = 2e-89
 *
 * Should be passed in a comma separated list.
 *
 * Primary author -
 *                 Simon Brocklehurst (CAT)
 * Other authors  -
 *                 Tim Dilks          (CAT)
 *                 Colin Hardman      (CAT)
 *                 Stuart Johnston    (CAT)
 *                 Mathieu Wiepert    (Mayo Foundation)
 *
 * Copyright 2000 Cambridge Antibody Technology Group plc.
 * 
 *
 * This code released to the biojava project, May 2000
 * under the LGPL license.
 *
 * @author Cambridge Antibody Technology Group plc
 * @author Greg Cox
 * @version 0.1
 *
 */
final class HSPSummaryHelper {

    private HSPSummaryHelper() {
    }

    /**
     * Takes a comma separated String of Blast-like HSP summary
     * information e.g.
     *
     * Score =  210 bits (454), Expect = 6e-53, Frame = +2 / +2
     *
     * and sets the contents of the HashMap of key value pairs viz. e.g.
     *
     * key: bitScore       -     value: 210
     * key: expectValue    -     value: 6e-53
     *
     * @param poLine     A String representation of the HSP Summary.
     * @param HashMap    To put key/value info in.
     * @param oVersion a BlastLikeVersionSupport value
     * @exception SAXException if an error occurs
     */
    public static void parse(String poLine, HashMap poMap,
              BlastLikeVersionSupport poVersion)
    throws SAXException {

    int iProgram = poVersion.getProgram();

    if ( (iProgram == BlastLikeVersionSupport.NCBI_BLASTN) ||
         (iProgram == BlastLikeVersionSupport.NCBI_BLASTX) ||
         (iProgram == BlastLikeVersionSupport.NCBI_BLASTP) ||
         (iProgram == BlastLikeVersionSupport.NCBI_TBLASTN) ||
         (iProgram == BlastLikeVersionSupport.NCBI_TBLASTX) ) {

        parseNCBIBlast(poLine,poMap,poVersion);
        return;
    }

    if ( (iProgram == BlastLikeVersionSupport.WU_BLASTN) ||
         (iProgram == BlastLikeVersionSupport.WU_BLASTX) ||
         (iProgram == BlastLikeVersionSupport.WU_BLASTP) ||
         (iProgram == BlastLikeVersionSupport.WU_TBLASTN) ||
         (iProgram == BlastLikeVersionSupport.WU_TBLASTX) ) {

        parseWUBlast(poLine,poMap,poVersion);
        return;
    }

    if ( (iProgram == BlastLikeVersionSupport.GCG_BLASTN)) {
        //Similar enough to use NCBI parser for now
        parseNCBIBlast(poLine,poMap,poVersion);
        return;
    }
    //If get here, then program is not supported.
    throw (new SAXException(
        "Failed attempting to parse an HSP Summary because program ".
        concat(poVersion.getProgramString()).
            concat(" is not supported.")));
    }

    static void parseNCBIBlast(String poLine, HashMap poMap,
                   BlastLikeVersionSupport poVersion) {

    String oToken;
    String oToken2;
    String oKey;
    String oValue;
    char[]  aoTmpArray;
    StringTokenizer oSt;
    StringTokenizer oSt2;
    StringTokenizer oTmp;
    StringBuffer    oTmpBuffer = new StringBuffer();
    poMap.clear();

    //System.out.println(">>>>" + poLine);

    //Tokenize on commas, and make lower case...
    oSt = new StringTokenizer(poLine.toLowerCase(),",");

    while (oSt.hasMoreTokens()) {

        oToken = oSt.nextToken().trim();

        oSt2 = new StringTokenizer(oToken);

        while (oSt2.hasMoreTokens()) {

        oToken2 = oSt2.nextToken().trim();

        //now grab info on a case-by-case basis
        //and put into HashMap...

        //NCBI-BLAST, WU-BLAST
        if (oToken2.equals("score")) {

            oKey = "score";
            //assume "Token = value ..."
            oSt2.nextToken(); //skip =
            oValue = oSt2.nextToken(); //grab score

            poMap.put(oKey,oValue);
            break;
        }

        //NCBI Blast, WU-BLAST
        if (oToken2.startsWith("expect")) {
            //could be "expect" or "expect(2) etc."
            oKey = "expectValue";
            //assume " Token = value"
            oSt2.nextToken(); //skip =
            oValue = oSt2.nextToken();

            poMap.put(oKey,oValue);
            break;
        }

        //NCBI Blast, WU-BLAST

        if (oToken2.equals("identities")) {

            //assume " identities = 129/168 (76%)"
            oSt2.nextToken(); //skip =

            //this token is 129/157
            oTmp = new StringTokenizer(oSt2.nextToken(),"/");
            oKey = "numberOfIdentities";
            oValue = oTmp.nextToken();
            poMap.put(oKey,oValue);

            oKey = "alignmentSize";
            oValue = oTmp.nextToken();
            poMap.put(oKey,oValue);

            //here next token is (76%)

            oTmp = new StringTokenizer(oSt2.nextToken(),"(%)");
            oKey = "percentageIdentity";
            oValue = oTmp.nextToken();
            poMap.put(oKey,oValue);

            break;
        }

        //NCBI Blast, WU-BLAST
        if (oToken2.equals("positives")) {

            //assume " positives = 129/168 (76%)"
            oSt2.nextToken(); //skip =

            //this token is 129/157
            oTmp = new StringTokenizer(oSt2.nextToken(),"/");
            oKey = "numberOfPositives";
            oValue = oTmp.nextToken();
            poMap.put(oKey,oValue);


            //here next token is like (76%)

            oTmp = new StringTokenizer(oSt2.nextToken(),"(%)");
            oKey = "percentagePositives";
            oValue = oTmp.nextToken();
            poMap.put(oKey,oValue);


            break;
        }


        //NCBI Blast, WU-BLAST
        if (oToken2.equals("strand")) {

            //assume " strand = plus / minus"
            oSt2.nextToken(); //skip =

            //this token is "plus"

            oKey = "queryStrand";
            oValue = oSt2.nextToken();
            poMap.put(oKey,oValue);

            oSt2.nextToken(); //skip "/"

            oKey = "hitStrand";
            oValue = oSt2.nextToken();
            poMap.put(oKey,oValue);

            break;
        }



        if (oToken2.equals("frame")) {

            //assume " Frame = +3 " for blastx and tblastn
            //assume " Frame = +3 / -1" for tblastx

            oSt2.nextToken(); //skip =

            if (poVersion.getProgram() ==
            BlastLikeVersionSupport.NCBI_BLASTX) {
            oKey = "queryFrame";
            aoTmpArray = oSt2.nextToken().toCharArray();
            oTmpBuffer.setLength(0);
            if (aoTmpArray[0] == '+') {
                oTmpBuffer.append("plus");
            } else {
                oTmpBuffer.append("minus");
            }
            oTmpBuffer.append(aoTmpArray[1]);
            oValue = oTmpBuffer.substring(0);
            poMap.put(oKey,oValue);
            break;
            }


            if (poVersion.getProgram() ==
            BlastLikeVersionSupport.NCBI_TBLASTN) {
            oKey = "hitFrame";
            aoTmpArray = oSt2.nextToken().toCharArray();
            oTmpBuffer.setLength(0);
            if (aoTmpArray[0] == '+') {
                oTmpBuffer.append("plus");
            } else {
                oTmpBuffer.append("minus");
            }
            oTmpBuffer.append(aoTmpArray[1]);
            oValue = oTmpBuffer.substring(0);
            poMap.put(oKey,oValue);
            break;
            }

            if (poVersion.getProgram() ==
            BlastLikeVersionSupport.NCBI_TBLASTX) {
            oKey = "queryFrame";
            aoTmpArray = oSt2.nextToken().toCharArray();
            oTmpBuffer.setLength(0);
            if (aoTmpArray[0] == '+') {
                oTmpBuffer.append("plus");
            } else {
                oTmpBuffer.append("minus");
            }
            oTmpBuffer.append(aoTmpArray[1]);
            oValue = oTmpBuffer.substring(0);
            poMap.put(oKey,oValue);

            //skip "/"

            oSt2.nextToken();

            oKey = "hitFrame";
            aoTmpArray = oSt2.nextToken().toCharArray();
            oTmpBuffer.setLength(0);
            if (aoTmpArray[0] == '+') {
                oTmpBuffer.append("plus");
            } else {
                oTmpBuffer.append("minus");
            }
            oTmpBuffer.append(aoTmpArray[1]);
            oValue = oTmpBuffer.substring(0);
            poMap.put(oKey,oValue);

            break;
            }


        }




        } //end loop over

        //System.out.println(oToken);
    } //end loop over "score = 119 bits" type tokens

    }


    static void parseWUBlast(String poLine, HashMap poMap,
                   BlastLikeVersionSupport poVersion) {

    String oToken;
    String oToken2;
    String oKey;
    String oValue;
    char[]  aoTmpArray;
    StringTokenizer oSt;
    StringTokenizer oSt2;
    StringTokenizer oTmp;
    StringBuffer    oTmpBuffer = new StringBuffer();
    poMap.clear();

    //System.out.println(">>>>" + poLine);

    //Tokenize on commas, and make lower case...
    oSt = new StringTokenizer(poLine.toLowerCase(),",");

    while (oSt.hasMoreTokens()) {

        oToken = oSt.nextToken().trim();

        oSt2 = new StringTokenizer(oToken);

        while (oSt2.hasMoreTokens()) {

        oToken2 = oSt2.nextToken().trim();

        //now grab info on a case-by-case basis
        //and put into HashMap...

        //NCBI-BLAST, WU-BLAST
        if (oToken2.equals("score")) {

            oKey = "score";
            //assume "Token = value ..."
            oSt2.nextToken(); //skip =
            oValue = oSt2.nextToken(); //grab score

            poMap.put(oKey,oValue);
            break;
        }

        //NCBI Blast, WU-BLAST
        if (oToken2.startsWith("expect")) {
            //could be "expect" or "expect(2) etc."
            oKey = "expectValue";
            //assume " Token = value"
            oSt2.nextToken(); //skip =
            oValue = oSt2.nextToken();

            poMap.put(oKey,oValue);
            break;
        }

        //NCBI Blast, WU-BLAST

        if (oToken2.equals("identities")) {

            //assume " identities = 129/168 (76%)"
            oSt2.nextToken(); //skip =

            //this token is 129/157
            oTmp = new StringTokenizer(oSt2.nextToken(),"/");
            oKey = "numberOfIdentities";
            oValue = oTmp.nextToken();
            poMap.put(oKey,oValue);

            oKey = "alignmentSize";
            oValue = oTmp.nextToken();
            poMap.put(oKey,oValue);

            //here next token is (76%)

            oTmp = new StringTokenizer(oSt2.nextToken(),"(%)");
            oKey = "percentageIdentity";
            oValue = oTmp.nextToken();
            poMap.put(oKey,oValue);

            break;
        }

        //NCBI Blast, WU-BLAST
        if (oToken2.equals("positives")) {

            //assume " positives = 129/168 (76%)"
            oSt2.nextToken(); //skip =

            //this token is 129/157
            oTmp = new StringTokenizer(oSt2.nextToken(),"/");
            oKey = "numberOfPositives";
            oValue = oTmp.nextToken();
            poMap.put(oKey,oValue);


            //here next token is like (76%)

            oTmp = new StringTokenizer(oSt2.nextToken(),"(%)");
            oKey = "percentagePositives";
            oValue = oTmp.nextToken();
            poMap.put(oKey,oValue);


            break;
        }


        //NCBI Blast, WU-BLAST
        if (oToken2.equals("strand")) {

            //assume " strand = plus / minus"
            oSt2.nextToken(); //skip =

            //this token is "plus"

            oKey = "queryStrand";
            oValue = oSt2.nextToken();
            poMap.put(oKey,oValue);

            oSt2.nextToken(); //skip "/"

            oKey = "hitStrand";
            oValue = oSt2.nextToken();
            poMap.put(oKey,oValue);

            break;
        }



        if (oToken2.equals("frame")) {

            //assume " Frame = +3 " for blastx and tblastn
            //assume " Frame = +3 / -1" for tblastx

            oSt2.nextToken(); //skip =

            if (poVersion.getProgram() ==
            BlastLikeVersionSupport.WU_BLASTX) {
            oKey = "queryFrame";
            aoTmpArray = oSt2.nextToken().toCharArray();
            oTmpBuffer.setLength(0);
            if (aoTmpArray[0] == '+') {
                oTmpBuffer.append("plus");
            } else {
                oTmpBuffer.append("minus");
            }
            oTmpBuffer.append(aoTmpArray[1]);
            oValue = oTmpBuffer.substring(0);
            poMap.put(oKey,oValue);
            break;
            }


            if (poVersion.getProgram() ==
            BlastLikeVersionSupport.WU_TBLASTN) {
            oKey = "hitFrame";
            aoTmpArray = oSt2.nextToken().toCharArray();
            oTmpBuffer.setLength(0);
            if (aoTmpArray[0] == '+') {
                oTmpBuffer.append("plus");
            } else {
                oTmpBuffer.append("minus");
            }
            oTmpBuffer.append(aoTmpArray[1]);
            oValue = oTmpBuffer.substring(0);
            poMap.put(oKey,oValue);
            break;
            }

            if (poVersion.getProgram() ==
            BlastLikeVersionSupport.WU_TBLASTX) {
            oKey = "queryFrame";
            aoTmpArray = oSt2.nextToken().toCharArray();
            oTmpBuffer.setLength(0);
            if (aoTmpArray[0] == '+') {
                oTmpBuffer.append("plus");
            } else {
                oTmpBuffer.append("minus");
            }
            oTmpBuffer.append(aoTmpArray[1]);
            oValue = oTmpBuffer.substring(0);
            poMap.put(oKey,oValue);

            //skip "/"

            oSt2.nextToken();

            oKey = "hitFrame";
            aoTmpArray = oSt2.nextToken().toCharArray();
            oTmpBuffer.setLength(0);
            if (aoTmpArray[0] == '+') {
                oTmpBuffer.append("plus");
            } else {
                oTmpBuffer.append("minus");
            }
            oTmpBuffer.append(aoTmpArray[1]);
            oValue = oTmpBuffer.substring(0);
            poMap.put(oKey,oValue);

            break;
            }


        }




        } //end loop over

        //System.out.println(oToken);
    } //end loop over "score = 119 bits" type tokens

    }



}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy