Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.biojava.bio.program.sax.HmmerAlignmentSAXParser Maven / Gradle / Ivy
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.bio.program.sax;
import java.io.BufferedReader;
import java.util.StringTokenizer;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
/**
* A reusable class for parsing Hmmr Aligment Section
*
* Primary author -
* Colin Hardman (CAT)
* Other authors -
* Tim Dilks (CAT)
* Simon Brocklehurst (CAT)
* Stuart Johnston (CAT)
* Lawerence Bower (CAT)
* Derek Crockford (CAT)
* Neil Benn (CAT)
*
* Copyright 2001 Cambridge Antibody Technology Group plc.
*
*
* This code released to the biojava project, May 2001
* under the LGPL license.
*
* @author Cambridge Antibody Technology Group plc
* @author Greg Cox
* @version 0.1
*
*/
class HmmerAlignmentSAXParser extends AbstractNativeAppSAXParser {
private BufferedReader oContents;
private AttributesImpl oAtts = new AttributesImpl();
private QName oAttQName = new QName(this);
private String oLine;
/**
* Creates a new domain section parser.
*
* @param poVersion BlastLikeVersionSupport
* @param poNamespacePrefix - the namespace prefix
*/
HmmerAlignmentSAXParser( BlastLikeVersionSupport poVersion,
String poNamespacePrefix ) {
this.setNamespacePrefix(poNamespacePrefix);
//For XSLT Parser Compliance
this.addPrefixMapping("biojava","http://www.biojava.org");
}
/**
* Parse the buffer from the current position, until aligments
* are parsed.
*
* @param poContents BufferedReader
to parse.
* @param poLine - the value of the current line.
* @exception SAXException if an error occurs
*/
public String parse( BufferedReader poContents, String poLine )
throws SAXException {
oContents = poContents;
try {
oLine = poLine;
/*
* skip till linestarts with
* 'Alignments of top-scoring domains'
* while not line starts with 'Histogram' or '//'
* case state = 0 if line not start with " "
* grab to ":"
* if key then set state 1, grab domain number
* else error
*
* = 1 search for "*->" = index1
* search for "<" = index2, state 3
* else index2 = last nonwhitespace
* state =2
* grab index1+3 to index2 -> hmmmatch
* skip next line
* grab index1+3 to index2 -> seqmatch
*
* = 2 search for "<-*" = index2, state 3
* else index2 = last nonwhitespace
* grab index1+3 to index2 -> hmmmatch
* skip next line
* grab index1 to index2 -> seqmatch
*
* = 3 find the indexes of all '-' in seqmatch
* and store in domain's gaparray
* likewise for gaps ('.') in hmmmatch
* skip next and state = 0
*
*/
oLine = oContents.readLine(); // skip 'Alignments...'
int state = 0;
StringBuffer oSequenceMatch = new StringBuffer();
StringBuffer oHmmMatch = new StringBuffer();
StringBuffer oMarkup = new StringBuffer();
int iAlignLen = 0;
String oScore = "";
String oEvalue = "";
String oRawSummary = "";
String oIdString = "";
String oFrom = "";
String oTo = "";
int index1 = 0;
int index2 = 0;
if ( oLine.trim().equals( "[no hits above thresholds]" ) ) {
oLine = oContents.readLine();
return oLine;
}
while ( (!oLine.trim().equals("//")) // hmmpfam
&&
(!oLine.trim().startsWith("Histogram of all scores:"))
// hmmsearch
) {
switch (state) {
case 0:
if(!(oLine.trim().equals(""))){
if(!(oLine.startsWith(" "))){
oMarkup.setLength( 0 );
oHmmMatch.setLength( 0 );
oSequenceMatch.setLength( 0 );
oRawSummary = oLine;
oIdString =
oLine.substring(0, oLine.indexOf(":"));
StringTokenizer st =
new StringTokenizer
(oLine.substring( oLine.indexOf(":") +1 ),
",:" );
st.nextToken(); // metadata
String lenString = st.nextToken(); // from x to y
String scoreString = st.nextToken();
String eString = st.nextToken();
st = new StringTokenizer( lenString );
st.nextToken(); // from
oFrom = st.nextToken();
int iFrom = Integer.parseInt( oFrom );
st.nextToken(); // to
oTo = st.nextToken();
int iTo = Integer.parseInt( oTo );
// score
st = new StringTokenizer( scoreString );
st.nextToken(); // score
oScore = st.nextToken();
st = new StringTokenizer( eString, "=" );
st.nextToken(); // e
oEvalue = st.nextToken();
oAtts.clear();
iAlignLen = (iTo-iFrom+1);
// oAttQName.setQName("sequenceLength");
// oAtts.addAttribute(oAttQName.getURI(),
// oAttQName.getLocalName(),
// oAttQName.getQName(),
// "CDATA", iAlignLen + "");
this.startElement
(new QName(this,this.prefix("Hit")),
(Attributes)oAtts);
oAtts.clear();
oAttQName.setQName("id");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", oIdString );
oAttQName.setQName("metaData");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", "none" );
this.startElement
(new QName(this,this.prefix("HitId")),
(Attributes)oAtts);
this.endElement(new QName(this,this.prefix
("HitId")));
// no hit description
oAtts.clear();
this.startElement
(new QName(this,this.prefix("HSPCollection")),
(Attributes)oAtts);
this.startElement
(new QName(this,this.prefix("HSP")),
(Attributes)oAtts);
// have to parse the aligment before
// can get number of positivies and identities.
// all goes into raw output.
state = 1;
}
}
break;
case 1:
index1 = oLine.indexOf("*->");
// index2 = oLine.indexOf("<-*");
index2 = oLine.indexOf("<");
if (index2 == -1) {
index2 = oLine.trim().length() + index1;
state =2;
}else{
state =3;
}
oHmmMatch.append( oLine.substring(index1+3, index2) );
oLine = oContents.readLine();
oMarkup.append( oLine.substring(index1+3, index2) );
oLine = oContents.readLine();
oSequenceMatch.append( oLine.substring(index1+3, index2) );
break;
case 2:
oLine = oContents.readLine(); // skip blank
// index2 = oLine.indexOf("<-*");
index2 = oLine.indexOf("<");
if (index2 == -1) {
index2 = oLine.trim().length() +index1;
}else{
state =3;
}
oHmmMatch.append( oLine.substring( index1, index2 ) );
oLine = oContents.readLine();
oMarkup.append( oLine.substring(index1, index2) );
oLine = oContents.readLine();
oSequenceMatch.append( oLine.substring(index1, index2) );
// System.out.println(oSequenceMatch);
break;
case 3:
//*****************************************************/
String oMarkupString = oMarkup.substring(0);
int iNumberOfPlus = this.countChar( oMarkupString, '+' );
int iNumberOfSpaces = this.countChar( oMarkupString, ' ' );
String oSequenceString = oSequenceMatch.substring(0);
String oHmmString = oHmmMatch.substring(0);
int iNumberOfGaps = 0;
// iNumberOfGaps += this.countChar( oHmmString, '.' );
iNumberOfGaps += this.countChar( oSequenceString, '-' );
int iAlignSize = (iAlignLen+iNumberOfGaps);
int iNumberOfPositives = iAlignSize - iNumberOfSpaces;
int iNumberOfIdentities = iNumberOfPositives - iNumberOfPlus;
// System.err.println( "iAlignLen =\t" + iAlignLen );
// System.err.println( "iNoGaps =\t" + iNumberOfGaps );
// System.err.println( "iAlignSize=\t" + iAlignSize );
// System.err.println( "iNoSpaces =\t" + iNumberOfSpaces );
// System.err.println( "iNoOfPlus =\t" + iNumberOfPlus );
// System.err.println( "iNoOfPosi =\t" + iNumberOfPositives );
oAtts.clear();
oAttQName.setQName("score");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", oScore );
oAttQName.setQName("expectValue");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", oEvalue );
oAttQName.setQName("numberOfIdentities");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", Integer.toString
( iNumberOfIdentities ) );
oAttQName.setQName("alignmentSize");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", Integer.toString
( iAlignSize) );
oAttQName.setQName("percentageIdentity");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA",
Integer.toString
(
((int)(((double)iNumberOfIdentities/
(double)iAlignSize)*100))
)
);
oAttQName.setQName("numberOfPositives");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", Integer.toString
( iNumberOfPositives ) );
oAttQName.setQName("percentagePositives");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA",
Integer.toString
(
((int)(((double)iNumberOfPositives/
(double)iAlignSize)*100))
)
);
oAttQName.setQName("numberOfGaps");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", Integer.toString
( iNumberOfGaps ) );
this.startElement(new QName(this,this.prefix("HSPSummary")),
(Attributes)oAtts);
//Raw HSPSummary Data
oAtts.clear();
oAttQName.setQName("xml:space");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"NMTOKEN","preserve");
this.startElement(new QName(this,this.prefix("RawOutput")),
(Attributes)oAtts);
char[] aoChars = oRawSummary.toCharArray();
this.characters( aoChars, 0, aoChars.length );
this.endElement(new QName(this,this.prefix("RawOutput")));
this.endElement(new QName(this,this.prefix("HSPSummary")));
// ALIGNMENT
oAtts.clear();
this.startElement(new QName(this,this.prefix
("BlastLikeAlignment")),
(Attributes)oAtts);
oAtts.clear();
oAttQName.setQName("startPosition");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", oFrom );
oAttQName.setQName("stopPosition");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", oTo );
this.startElement(new QName(this,this.prefix
("QuerySequence")),
(Attributes)oAtts);
aoChars = oSequenceString.toCharArray();
this.characters(aoChars,0,aoChars.length);
this.endElement(new QName(this,this.prefix
("QuerySequence")));
//Match consensus
oAtts.clear();
oAttQName.setQName("xml:space");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"NMTOKEN","preserve");
this.startElement(new QName(this,this.prefix
("MatchConsensus")),
(Attributes)oAtts);
aoChars = oMarkupString.toCharArray();
this.characters(aoChars,0,aoChars.length);
this.endElement(new QName(this,this.prefix
("MatchConsensus")));
//HitSequence
oAtts.clear();
oAttQName.setQName("startPosition");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", "100" );
oAttQName.setQName("stopPosition");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", "200" );
this.startElement(new QName(this,this.prefix("HitSequence")),
(Attributes)oAtts);
aoChars = oHmmString.toCharArray();
this.characters(aoChars,0,aoChars.length);
this.endElement(new QName(this,this.prefix("HitSequence")));
//end Alignment
this.endElement(new QName(this, this.prefix
(this.prefix
("BlastLikeAlignment"))));
this.endElement(new QName(this,this.prefix("HSP")));
this.endElement(new QName(this,this.prefix
("HSPCollection")));
this.endElement(new QName(this,this.prefix("Hit")));
state =0;
; break;
default: System.out.println("Can't reach here");
break;
}
oLine = oContents.readLine();
} // end while
} catch (java.io.IOException x) {
System.out.println(x.getMessage());
System.out.println("File read interupted");
} // end try/catch
return oLine;
}
int countChar( String poString, char pcChar ) {
int index = -1;
int iCount = 0;
while ( ( index = poString.indexOf( pcChar, index+1 ) ) != -1 ) {
iCount++;
}
return iCount;
}
}