Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.bio.program.sax;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.StringTokenizer;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
/**
* A reusable class for SAX parsing Blast-alignments...
*
* Primary author -
* Simon Brocklehurst (CAT)
* Other authors -
* Tim Dilks (CAT)
* Colin Hardman (CAT)
* Stuart Johnston (CAT)
* Mathieu Wiepert (Mayo Foundation)
*
* Copyright 2000 Cambridge Antibody Technology Group plc.
*
*
* This code released to the biojava project, May 2000
*
* @author Cambridge Antibody Technology Group plc
* @author Greg Cox
* @version 0.1
*
*/
final class BlastLikeAlignmentSAXParser extends AbstractNativeAppSAXParser {
private AttributesImpl oAtts = new AttributesImpl();
private QName oAttQName = new QName(this);
private char[] aoChars;
private ArrayList oAlignment;
private String oLine;
private String oSeq;
private StringBuffer oQuery = new StringBuffer();
private StringBuffer oHit = new StringBuffer();
private StringBuffer oMatchConsensus = new StringBuffer();
private StringBuffer oStartId = new StringBuffer();
private StringBuffer oStopId = new StringBuffer();
private StringBuffer oHitStartId = new StringBuffer();
private StringBuffer oHitStopId = new StringBuffer();
private StringBuffer oQueryStartId = new StringBuffer();
private StringBuffer oQueryStopId = new StringBuffer();
private StringTokenizer oSt;
private String oParsedSeq;
private int iOffset;
private int iEnd;
private boolean tJustDoneConsensus;
private static final int STARTUP = 0;
private static final int ON_FIRST_SEGMENT = 1;
private static final int DONE_FIRST_SEGMENT = 2;
public BlastLikeAlignmentSAXParser(String poNamespacePrefix) {
this.changeState(STARTUP);
this.setNamespacePrefix(poNamespacePrefix);
this.addPrefixMapping("biojava", "http://www.biojava.org");
}
public void parse(ArrayList poAlignment)
throws SAXException {
oAlignment = poAlignment;
oAtts.clear();
this.startElement(new QName(this,this.prefix("BlastLikeAlignment")),
(Attributes)oAtts);
this.changeState(ON_FIRST_SEGMENT);
//for a new alignment initialise
oQuery.setLength(0);
oQueryStartId.setLength(0);
oQueryStopId.setLength(0);
oHit.setLength(0);
oHitStartId.setLength(0);
oHitStopId.setLength(0);
oMatchConsensus.setLength(0);
tJustDoneConsensus = false;
//Loop over all alignment lines
int iAlSize = oAlignment.size();
for (int i = 0; i < iAlSize;i++) {
//System.out.println(oAlignment.get(i));
oLine = (String)oAlignment.get(i);
this.parseLine(oLine);
}
//at this point alignment is parsed
// System.out.println("QueryStart:"+oQueryStartId);
// System.out.println("QueryStop:"+oQueryStopId);
// System.out.println("HitStart:"+oHitStartId);
// System.out.println("HitStop:"+oHitStopId);
// System.out.println("Query:"+oQuery);
// System.out.println("Match:"+oMatchConsensus);
// System.out.println("Hit :"+oHit);
//output elements
//QuerySequence
oAtts.clear();
oAttQName.setQName("startPosition");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA",oQueryStartId.substring(0));
oAttQName.setQName("stopPosition");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA",oQueryStopId.substring(0));
this.startElement(new QName(this,this.prefix("QuerySequence")),
(Attributes)oAtts);
aoChars = oQuery.substring(0).toCharArray();
this.characters(aoChars,0,aoChars.length);
this.endElement(new QName(this,this.prefix("QuerySequence")));
//Match consensus
oAtts.clear();
oAttQName.setQName("xml:space");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"NMTOKEN","preserve");
this.startElement(new QName(this,this.prefix("MatchConsensus")),
(Attributes)oAtts);
aoChars = oMatchConsensus.substring(0).toCharArray();
this.characters(aoChars,0,aoChars.length);
this.endElement(new QName(this,this.prefix("MatchConsensus")));
//HitSequence
oAtts.clear();
oAttQName.setQName("startPosition");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA",oHitStartId.substring(0));
oAttQName.setQName("stopPosition");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA",oHitStopId.substring(0));
this.startElement(new QName(this,this.prefix("HitSequence")),
(Attributes)oAtts);
aoChars = oHit.substring(0).toCharArray();
this.characters(aoChars,0,aoChars.length);
this.endElement(new QName(this,this.prefix("HitSequence")));
//end Alignment
this.endElement(new QName(this,
this.prefix(this.prefix("BlastLikeAlignment"))));
}
/**
* Describe 'parseLine' method here.
*
* @param poLine -
*/
private void parseLine(String poLine) throws SAXException{
poLine = poLine.toUpperCase();
if ( (poLine.startsWith("QUERY:")) ||
(poLine.startsWith("SBJCT:")) ) {
oSt = new StringTokenizer(poLine,":");
//there should be two tokens at this point
if (oSt.countTokens() != 2) {
throw (new SAXException(
"Failed to parse a line in a BlastLikeAlignment" +
" due it having an unexpected format." +
" The line is shown below.\n" +
poLine));
}
//get here if Query line OK
//skip first token (i.e. "Query")
oSt.nextToken();
//next token is the alignment - make it uppercase.
oSeq = oSt.nextToken().trim();
//System.out.println(oSeq);
//To get numbers robustly, tokenize on letters, gaps, and unknowns
oSt = new StringTokenizer(oSeq," ABCDEFGHIJKLMNOPQRSTUVWXYZ-*");
//System.out.println("Token Count----->" + oSt.countTokens());
//throw exception if there number of tokens is not two
//(these correspond to start and stop ids
if (oSt.countTokens() != 2) {
throw (new SAXException(
"Failed to parse a line of an alignment due to it having" +
" an unexpected character."));
}
//here if tokens for start and stop OK
oStartId.setLength(0);
oStartId.append(oSt.nextToken().trim());
oStopId.setLength(0);
oStopId.append(oSt.nextToken().trim());
//System.out.println("StartId="+oStartId+" : "+"StopId="+oStopId);
//To get sequence robustly, tokenize on numbers only
oSt = new StringTokenizer(oSeq,"0123456789");
//System.out.println("Token Count----->" + oSt.countTokens());
if (oSt.countTokens() != 1) {
throw (new SAXException(
"Failed to parse a line of an alignment due to it having" +
" an unexpected character."));
}
oParsedSeq = oSt.nextToken().trim();
//System.out.println(oParsedSeq);
//get info for consensus only on Query: lines
iOffset = poLine.indexOf(oParsedSeq);
iEnd = iOffset + oParsedSeq.length();
//System.out.println("Offset="+iOffset+" : End="+iEnd);
//end if this is a query or sbjct line
} else {
//here if on a consensus sequence line
//only time should get here is if
//a Query: line has just been parsed.
//deal with software that doesn't output spaces to end of
//consensus line
if (iEnd <= poLine.length()) {
oParsedSeq = poLine.substring(iOffset,iEnd);
} else {
int iLen = iEnd - poLine.length();
char[] oPadding = new char[ iLen ];
Arrays.fill( oPadding,
0,
iLen,
' ' );
oParsedSeq = poLine.substring( iOffset).concat
( new String( oPadding ) );
}
}
//get startIds for query and subject
if (iState == ON_FIRST_SEGMENT) {
//here if on first block of an alignment
if (poLine.startsWith("QUERY:")) {
oQueryStartId.append(oStartId);
oQueryStopId.append(oStopId);
oQuery.append(oParsedSeq);
tJustDoneConsensus = false;
return;
}
if (poLine.startsWith("SBJCT:")) {
oHitStartId.append(oStartId);
oHitStopId.append(oStopId);
oHit.append(oParsedSeq);
if (!tJustDoneConsensus) {
//handle rare case of a totally blank
//consensus line
char[] oPadding = new char[ iEnd-iOffset ];
Arrays.fill( oPadding,
0,
iEnd-iOffset,
' ' );
oMatchConsensus.append( new String( oPadding) );
}
tJustDoneConsensus = false;
//here finished with block
this.changeState(DONE_FIRST_SEGMENT);
return;
}
oMatchConsensus.append(oParsedSeq);
tJustDoneConsensus = true;
} //end if onFirstSegment
//if inside the alignment, set the stopids each time
//so that they are correct for multi-block alignments
if (iState == DONE_FIRST_SEGMENT) {
if (poLine.startsWith("QUERY:")) {
oQueryStopId.setLength(0);
oQueryStopId.append(oStopId);
oQuery.append(oParsedSeq);
tJustDoneConsensus = false;
return;
}
if (poLine.startsWith("SBJCT:")) {
oHitStopId.setLength(0);
oHitStopId.append(oStopId);
oHit.append(oParsedSeq);
//here finished with block
if (!tJustDoneConsensus) {
//handle rare case of a totally blank
//consensus line
char[] oPadding = new char[ iEnd-iOffset ];
Arrays.fill( oPadding,
0,
iEnd-iOffset,
' ' );
oMatchConsensus.append( new String( oPadding) );
}
tJustDoneConsensus = false;
return;
}
//get here if on a match consensus
oMatchConsensus.append(oParsedSeq);
tJustDoneConsensus = true;
return;
} //end if inAlignment
}
}