All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.bio.program.sax.HmmerAlignmentSAXParser Maven / Gradle / Ivy

There is a newer version: 1.9.7
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.bio.program.sax;

import java.io.BufferedReader;
import java.util.StringTokenizer;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/**
 * A reusable class for parsing Hmmr Aligment Section
 *
 * Primary author -
 *                 Colin Hardman      (CAT)
 * Other authors  -
 *                 Tim Dilks          (CAT)
 *                 Simon Brocklehurst (CAT)
 *                 Stuart Johnston    (CAT)
 *                 Lawerence Bower    (CAT)
 *                 Derek Crockford    (CAT)
 *                 Neil Benn          (CAT)
 *
 * Copyright 2001 Cambridge Antibody Technology Group plc.
 * 
 *
 * This code released to the biojava project, May 2001
 * under the LGPL license.
 *
 * @author Cambridge Antibody Technology Group plc
 * @author Greg Cox
 * @version 0.1
 *
 */
class HmmerAlignmentSAXParser extends AbstractNativeAppSAXParser {


    private BufferedReader       oContents;
    private AttributesImpl       oAtts              = new AttributesImpl();
    private QName                oAttQName          = new QName(this);
    private String               oLine;

    /**
     * Creates a new domain section parser.
     *
     * @param poVersion BlastLikeVersionSupport
     * @param poNamespacePrefix - the namespace prefix
     */
    HmmerAlignmentSAXParser( BlastLikeVersionSupport poVersion,
			    String poNamespacePrefix ) {
	this.setNamespacePrefix(poNamespacePrefix);
	//For XSLT Parser Compliance
	this.addPrefixMapping("biojava","http://www.biojava.org");

    }

    /**
     * Parse the buffer from the current position, until aligments
     * are parsed.
     *
     * @param poContents BufferedReader to parse.
     * @param poLine - the value of the current line.
     * @exception SAXException if an error occurs
     */
    public String parse( BufferedReader poContents, String poLine )
	throws SAXException {

	oContents = poContents;

	try {
	    oLine = poLine;

	    /*
	     * skip till linestarts with
	     * 'Alignments of top-scoring domains'
	     * while not line starts with 'Histogram' or '//'
	     *    case state = 0 if line not start with " "
	     *                grab to ":"
	     *                if key then set state 1, grab domain number
	     *                     else error
	     *
	     *               = 1 search for "*->" = index1
	     *                   search for "<" = index2, state 3
	     *                      else index2 = last nonwhitespace
	     *                           state =2
	     *                   grab index1+3 to index2 -> hmmmatch
	     *                   skip next line
	     *                   grab index1+3 to index2 -> seqmatch
	     *
	     *               = 2 search for "<-*" = index2, state 3
	     *                      else index2 = last nonwhitespace
	     *                   grab index1+3 to index2 -> hmmmatch
	     *                   skip next line
	     *                   grab index1 to index2 -> seqmatch
	     *
	     *               = 3 find the indexes of all '-' in seqmatch
	     *                   and store in domain's gaparray
	     *                   likewise for gaps ('.') in hmmmatch
	     *                   skip next and state = 0
	     *
	     */
	    oLine = oContents.readLine(); // skip 'Alignments...'
	    int state = 0;
	    StringBuffer oSequenceMatch = new StringBuffer();
	    StringBuffer oHmmMatch = new StringBuffer();
	    StringBuffer oMarkup = new StringBuffer();
	    int iAlignLen = 0;
	    String oScore = "";
	    String oEvalue = "";
	    String oRawSummary = "";
	    String oIdString = "";
	    String oFrom = "";
	    String oTo = "";
	    int index1 = 0;
	    int index2 = 0;

	    if ( oLine.trim().equals( "[no hits above thresholds]" ) ) {
		oLine = oContents.readLine();
		return oLine;
	    }

	    while ( (!oLine.trim().equals("//")) // hmmpfam
		    &&
		    (!oLine.trim().startsWith("Histogram of all scores:"))
		    // hmmsearch
		    ) {

		switch (state) {
		case 0:
		    if(!(oLine.trim().equals(""))){
			if(!(oLine.startsWith(" "))){

			    oMarkup.setLength( 0 );
			    oHmmMatch.setLength( 0 );
			    oSequenceMatch.setLength( 0 );

			    oRawSummary = oLine;
			    oIdString =
				oLine.substring(0, oLine.indexOf(":"));

			    StringTokenizer st =
				new StringTokenizer
				    (oLine.substring( oLine.indexOf(":") +1 ),
				     ",:" );
			    st.nextToken(); // metadata

			    String lenString = st.nextToken(); // from x to y
			    String scoreString = st.nextToken();
			    String eString = st.nextToken();

			    st = new StringTokenizer( lenString );
			    st.nextToken();  // from

			    oFrom = st.nextToken();
			    int iFrom = Integer.parseInt( oFrom );
			    st.nextToken();  // to
			    oTo = st.nextToken();
			    int iTo   = Integer.parseInt( oTo );

			    // score
			    st = new StringTokenizer( scoreString );
			    st.nextToken(); // score
			    oScore = st.nextToken();

			    st = new StringTokenizer( eString, "=" );
			    st.nextToken(); // e
			    oEvalue = st.nextToken();

			    oAtts.clear();
			    iAlignLen = (iTo-iFrom+1);

// 			    oAttQName.setQName("sequenceLength");
// 			    oAtts.addAttribute(oAttQName.getURI(),
// 					       oAttQName.getLocalName(),
// 					       oAttQName.getQName(),
// 					       "CDATA", iAlignLen + "");

			    this.startElement
				(new QName(this,this.prefix("Hit")),
				 (Attributes)oAtts);

			    oAtts.clear();
			    oAttQName.setQName("id");
			    oAtts.addAttribute(oAttQName.getURI(),
					       oAttQName.getLocalName(),
					       oAttQName.getQName(),
					       "CDATA", oIdString );

			    oAttQName.setQName("metaData");
			    oAtts.addAttribute(oAttQName.getURI(),
					       oAttQName.getLocalName(),
					       oAttQName.getQName(),
					       "CDATA", "none" );

			    this.startElement
				(new QName(this,this.prefix("HitId")),
				 (Attributes)oAtts);
			    this.endElement(new QName(this,this.prefix
						      ("HitId")));
			    // no hit description

			    oAtts.clear();
			    this.startElement
				(new QName(this,this.prefix("HSPCollection")),
				 (Attributes)oAtts);
			    this.startElement
				(new QName(this,this.prefix("HSP")),
				 (Attributes)oAtts);

			    // have to parse the aligment before
			    // can get number of positivies and identities.
			    // all goes into raw output.
			    state = 1;
			}
		    }
		    break;
		case 1:
		    index1 = oLine.indexOf("*->");
		    //	  index2 = oLine.indexOf("<-*");
		    index2 = oLine.indexOf("<");
		    if (index2 == -1) {
			index2 = oLine.trim().length() + index1;
			state =2;
		    }else{
			state =3;
		    }
		    oHmmMatch.append( oLine.substring(index1+3, index2) );
		    oLine = oContents.readLine();
		    oMarkup.append( oLine.substring(index1+3, index2) );
		    oLine = oContents.readLine();
		    oSequenceMatch.append( oLine.substring(index1+3, index2) );
		    break;
		case 2:
		    oLine = oContents.readLine(); 	  // skip blank
		    //	  index2 = oLine.indexOf("<-*");
		    index2 = oLine.indexOf("<");
		    if (index2 == -1) {
			index2 = oLine.trim().length() +index1;
		    }else{
			state =3;
		    }
		    oHmmMatch.append( oLine.substring( index1, index2 ) );
		    oLine = oContents.readLine();
		    oMarkup.append( oLine.substring(index1, index2) );
		    oLine = oContents.readLine();
		    oSequenceMatch.append( oLine.substring(index1, index2) );
		    //	  System.out.println(oSequenceMatch);
		    break;
		case 3:
		    //*****************************************************/

		    String oMarkupString = oMarkup.substring(0);
		    int iNumberOfPlus = this.countChar( oMarkupString, '+' );
		    int iNumberOfSpaces = this.countChar( oMarkupString, ' ' );

		    String oSequenceString = oSequenceMatch.substring(0);
		    String oHmmString      = oHmmMatch.substring(0);

		    int iNumberOfGaps = 0;
    //		    iNumberOfGaps +=  this.countChar( oHmmString, '.' );
		    iNumberOfGaps +=  this.countChar( oSequenceString, '-' );

		    int iAlignSize = (iAlignLen+iNumberOfGaps);

		    int iNumberOfPositives  = iAlignSize -  iNumberOfSpaces;
		    int iNumberOfIdentities = iNumberOfPositives - iNumberOfPlus;

// 		    System.err.println( "iAlignLen =\t" + iAlignLen );
// 		    System.err.println( "iNoGaps   =\t" + iNumberOfGaps );
// 		    System.err.println( "iAlignSize=\t" + iAlignSize );
// 		    System.err.println( "iNoSpaces =\t" + iNumberOfSpaces );
// 		    System.err.println( "iNoOfPlus =\t" + iNumberOfPlus );
// 		    System.err.println( "iNoOfPosi =\t" + iNumberOfPositives );

		    oAtts.clear();

		    oAttQName.setQName("score");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA", oScore );

		    oAttQName.setQName("expectValue");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA", oEvalue );

		    oAttQName.setQName("numberOfIdentities");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA", Integer.toString
				       ( iNumberOfIdentities ) );

		    oAttQName.setQName("alignmentSize");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA",  Integer.toString
				       ( iAlignSize) );

		    oAttQName.setQName("percentageIdentity");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA",
				       Integer.toString
				       (
					((int)(((double)iNumberOfIdentities/
						(double)iAlignSize)*100))
					)
				       );

		    oAttQName.setQName("numberOfPositives");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA", Integer.toString
				       ( iNumberOfPositives ) );

		    oAttQName.setQName("percentagePositives");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA",
				       Integer.toString
				       (
					((int)(((double)iNumberOfPositives/
						(double)iAlignSize)*100))
					)
				       );

		    oAttQName.setQName("numberOfGaps");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA", Integer.toString
				       ( iNumberOfGaps ) );

		    this.startElement(new QName(this,this.prefix("HSPSummary")),
				      (Attributes)oAtts);

		    //Raw HSPSummary Data
		    oAtts.clear();
		    oAttQName.setQName("xml:space");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "NMTOKEN","preserve");
		    this.startElement(new QName(this,this.prefix("RawOutput")),
				      (Attributes)oAtts);

		    char[] aoChars = oRawSummary.toCharArray();
		    this.characters( aoChars, 0, aoChars.length );

		    this.endElement(new QName(this,this.prefix("RawOutput")));
		    this.endElement(new QName(this,this.prefix("HSPSummary")));

		    // ALIGNMENT

		    oAtts.clear();
		    this.startElement(new QName(this,this.prefix
						("BlastLikeAlignment")),
				      (Attributes)oAtts);

		    oAtts.clear();
		    oAttQName.setQName("startPosition");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA", oFrom );

		    oAttQName.setQName("stopPosition");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA", oTo );

		    this.startElement(new QName(this,this.prefix
						("QuerySequence")),
				      (Attributes)oAtts);
		    aoChars = oSequenceString.toCharArray();
		    this.characters(aoChars,0,aoChars.length);
		    this.endElement(new QName(this,this.prefix
					      ("QuerySequence")));

		    //Match consensus
		    oAtts.clear();
		    oAttQName.setQName("xml:space");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "NMTOKEN","preserve");
		    this.startElement(new QName(this,this.prefix
						("MatchConsensus")),
				      (Attributes)oAtts);
		    aoChars = oMarkupString.toCharArray();
		    this.characters(aoChars,0,aoChars.length);

		    this.endElement(new QName(this,this.prefix
					      ("MatchConsensus")));

		    //HitSequence

		    oAtts.clear();
		    oAttQName.setQName("startPosition");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA", "100" );

		    oAttQName.setQName("stopPosition");
		    oAtts.addAttribute(oAttQName.getURI(),
				       oAttQName.getLocalName(),
				       oAttQName.getQName(),
				       "CDATA", "200" );

		    this.startElement(new QName(this,this.prefix("HitSequence")),
				      (Attributes)oAtts);
		    aoChars = oHmmString.toCharArray();
		    this.characters(aoChars,0,aoChars.length);
		    this.endElement(new QName(this,this.prefix("HitSequence")));

		    //end Alignment
		    this.endElement(new QName(this, this.prefix
					      (this.prefix
					       ("BlastLikeAlignment"))));
		    this.endElement(new QName(this,this.prefix("HSP")));
		    this.endElement(new QName(this,this.prefix
					      ("HSPCollection")));
		    this.endElement(new QName(this,this.prefix("Hit")));


		    state =0;
		    ; break;
		default: System.out.println("Can't reach here");
		    break;
		}
		oLine = oContents.readLine();
	    } // end while

	} catch (java.io.IOException x) {
	    System.out.println(x.getMessage());
	    System.out.println("File read interupted");
	} // end try/catch

	return oLine;
    }

    int countChar( String poString, char pcChar ) {

	int index = -1;
        int iCount = 0;
        while ( ( index = poString.indexOf( pcChar, index+1 ) ) != -1 ) {
            iCount++;
        }
        return iCount;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy