Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.bio.program.sax;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.StringTokenizer;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
/**
* A SAX-like parser for dealing with native NCBI-Blast,Wu-Blastoutput,
* and HMMER output (a single dataset). That is this class allows
* native BLAST-like format files to be processed as if they were in
* an XML format i.e. it sends messages to a user-written XML Handler.
*
* This class has package-level visibility, and is used
* by generic BlastLikeSAXParser objects.
*
* Some functionality is delegated to Part objects within the class
* whose implementations are selected on-the-fly according
* the the program/version that produced the output.
* SummaryLineHelperIF
*
* NB Support for HMMER is currently only partial and likely to change
* without notice as more functionality is added.
*
* Copyright 2000 Cambridge Antibody Technology Group plc.
*
*
* This code released to the biojava project, May 2000
* under the LGPL license.
*
* @author Simon Brocklehurst (CAT)
* @author Tim Dilks (CAT)
* @author Colin Hardman (CAT)
* @author Stuart Johnston (CAT)
* @author Mathieu Wiepert (Mayo Foundation)
* @author Keith James (Sanger Institute)
* @author Mark Schreiber (NITD)
* @author Travis Banks (AAFC)
* @version 0.2
*
* @see BlastLikeSAXParser
*/
final class BlastSAXParser extends AbstractNativeAppSAXParser {
private BufferedReader oContents;
private AttributesImpl oAtts = new AttributesImpl();
private QName oAttQName = new QName(this);
private ArrayList oBuffer = new ArrayList();
private char[] aoChars;
private char[] aoLineSeparator;
private String[] aoKeys;
private String[] aoArrayType = new String[1];
private HashMap oMap = new HashMap();
private int iVer;
private BlastLikeVersionSupport oVersion;
private HitSectionSAXParser oHits;
private SummaryLineHelperIF oSummaryLineHelper;
private String oQueryId;
private String oDatabaseId;
private String oQueryLength; // patch from Michael Gang
private static final int STARTUP = 0;
private static final int IN_TRAILER = 1;
private static final int AT_END = 2;
private static final int IN_HEADER = 3;
private static final int IN_SUMMARY = 4;
private static final int FINISHED_HITS = 5;
private boolean tDoneSummary = false;
/**
* Creates a new BlastSAXParser instance.
* @param poNamespacePrefix the namespace prefix to use
* @param poVersion a BlastLikeVersionSupport value.
* @exception SAXException if an error occurs
*/
BlastSAXParser(BlastLikeVersionSupport poVersion,
String poNamespacePrefix) throws SAXException {
oVersion = poVersion;
this.setNamespacePrefix(poNamespacePrefix);
this.addPrefixMapping("biojava","http://www.biojava.org");
oHits = new HitSectionSAXParser(oVersion,this.getNamespacePrefix());
this.changeState(STARTUP);
aoLineSeparator = System.getProperty("line.separator").toCharArray();
//Beginnings of using a Builder type pattern to create
//parser from part objects. Only for done
//Summary sections at present according to program type
//at present. Significant benefit would be
//for detail, allowing choice of part object optimised
//for speed of processing etc.
this.choosePartImplementations();
}
/**
* Parse the blast data and emit SAX events.
* @param poContents The BufferedReader that will read the BLAST
* output
* @param poLine The first line of the BLAST record
* @throws org.xml.sax.SAXException If the input is malformed
* @return The last line of the BLAST data
*/
/*
* ALL REAL PARSING OF THE DATA OCCURS HERE IE CREATION OF THE BLASTDATASET ELEMENTS
*/
public String parse(BufferedReader poContents, String poLine)
throws SAXException {
oHits = new HitSectionSAXParser(oVersion,this.getNamespacePrefix());
String oLine = null;
oQueryId = "";
oDatabaseId = "";
oContents = poContents;
//First deal with first line which must be the start of
//a new Blast output
//For a brand new collection, check for the start of a
//new BlastDataSet
//look for characteristic of start of dataset
if (oVersion.isStartOfDataSet(poLine)) {
//just figure out whether it's a new DataSet
//or not. i The onNewBlastDatSet method
//takes care of the rest...
this.onNewBlastDataSet(poLine);
// this.changeState(STARTUP);
} else {
throw new SAXException("unexpected poLine parameter, expecting start of BLAST like record");
//return poLine;
}
//now parse stream...
try {
oLine = oContents.readLine();
while ((oLine != null) &&
(!checkNewBlastLikeDataSet(oLine))) {
//System.out.println(oLine);\
//interpret line and send messages accordingly
this.interpret(oLine);
oLine = oContents.readLine();
} // end while
} catch (java.io.IOException x) {
System.out.println(x.getMessage());
System.out.println("File read interrupted");
} // end try/catch
//Now close open elements...
if (iState == IN_TRAILER) {
this.emitRawOutput(oBuffer);
this.endElement(new QName(this,this.prefix("Trailer")));
this.changeState(AT_END);
}
this.endElement(new QName(this,this.prefix("BlastLikeDataSet")));
return oLine;
}
/**
* Deal with line according to state parser is in.
*
* @param poLine A line of Blast output
*/
private void interpret(String poLine) throws SAXException {
if (iState == IN_HEADER) {
//accumulate header information
//things that can end header section
//start of summmary section
//start of detail section
//start of trailer when there are no hits
if (poLine.startsWith("Query=")) {
StringTokenizer st = new StringTokenizer(poLine);
// Skip the first token
st.nextToken();
if (st.hasMoreTokens())
oQueryId = st.nextToken();
}
if (poLine.matches("^\\s+\\(\\d+\\sletters\\)\\s*$")) {
StringTokenizer st = new StringTokenizer(poLine);
oQueryLength = st.nextToken().substring(1);
}
if (poLine.startsWith("Database:")) {
int i = poLine.indexOf(":");
oDatabaseId = poLine.substring(i + 1);
while(true){
try {
poLine = oContents.readLine();
if (poLine.startsWith("Searching")) {
break;
} else if (poLine.startsWith("Results of")) {
// in PSI-blast is this line...
System.err.println("this looks like a PSI-blast file, this is currently not supported, yet!");
break;
} else {
oDatabaseId = oDatabaseId.concat(poLine);
}
} catch(java.io.IOException x){
System.err.println(x.getMessage());
System.err.println("File read interrupted");
}
}
}
if ((poLine.startsWith("Sequences producing significant alignments")) ||
(poLine.startsWith("Sequences producing High-scoring Segment Pairs")) ||
(poLine.startsWith(" ***** No hits found ******")) ||
(poLine.startsWith("-------- "))) {
this.emitRawOutput(oBuffer);
this.emitHeaderIds();
oAtts.clear();
this.endElement(new QName(this,this.prefix("Header")));
if (poLine.startsWith(" ***** No hits found ******")) {
oAtts.clear();
this.startElement(new QName(this,this.prefix("Trailer")),
(Attributes) oAtts);
this.changeState(IN_TRAILER);
oBuffer.clear();
return;
}
//change state
this.changeState(IN_SUMMARY);
oAtts.clear();
this.startElement(new QName(this,this.prefix("Summary")),
(Attributes)oAtts);
//eat a blank line if there is one...
// this.interpret(oLine);
//read next line
try {
poLine = oContents.readLine();
} catch (java.io.IOException x) {
System.err.println(x.getMessage());
System.err.println("File read interrupted");
} // end try/catch
if (poLine.trim().equals("")) {
//System.out.println("BLANK LINE");
} else {
//recursively interpret it...
this.interpret(poLine);
}
return;
} //end check start of summary
//Currently doesn't handle output that starts
//with a detail section i.e. no summary.
//This is a BUG/FEATURE.
oBuffer.add(poLine);
} //end if inHeader state
//Deal with summary state
if (iState == IN_SUMMARY) {
//check to see if end of summary
//has been reached...
//(signal is a blank line for NCBIBlast and Wu-Blast,
//(signal is either a blank line or
//Signal is \\End of List for GCG
// HMMR has a longer summary section to include the
// domain summary and the check for a blank line
// will prematurely end the summary section so
// skip this check for HMMR
int iProgram = oVersion.getProgram();
if (iProgram == BlastLikeVersionSupport.HMMER) {
if (poLine.trim().equals("")) {
return; // skip
}
//HMMER-specific
if (poLine.startsWith("Parsed for domains:")) {
//signifies domain summary info
// System.err.println( "Last-->" + oAfterHmmr + "<--" );
return;
}
} else if ((poLine.trim().equals("")) ||
(poLine.trim().startsWith("[no more scores")) ||
(poLine.trim().startsWith("\\")) ) {
//Can't change state, because we still want to
//check for start of detail...
//Forgive non-standard white-space
//between end of summary and start of detail
if (!tDoneSummary) {
tDoneSummary = true;
this.endElement(new QName(this,this.prefix("Summary")));
}
return; //return before attempting to parse Summary Line
}
if (poLine.startsWith(">")) {
//signifies start of detail section
this.hitsSectionReached(poLine);
return;
}
//need to check that we've end of summary
//'cos could be spurious data between end of
//summary and start of detail e.g. multi-line WARNINGs
//at end of Summary section in Wu-Blast.
if (!tDoneSummary) {
this.parseSummaryLine(poLine);
}
return;
}
//State set to this when parsing of Hits finished
if (iState == FINISHED_HITS) {
//check end of detail section
this.endElement(new QName(this,this.prefix("Detail")));
oAtts.clear();
this.startElement(new QName(this,this.prefix("Trailer")),
(Attributes)oAtts);
//change state to Trailer and initialse Buffer
this.changeState(IN_TRAILER);
oBuffer.clear();
return;
} //end if finishedHists
if (iState == IN_TRAILER) {
oBuffer.add(poLine);
}
}
/**
* This method is called when a line indicating that
* that a new BlastLikeDataSet has been reached.
*
* NB This class deals NCBI-BLAST WU-BLAST and HMMER:
*
* o flavours of NCBI Blast (e.g. blastn, blastp etc)
* o flavours of WU Blast (e.g. blastn, blastp etc)
*
* When this method is called, the line will look something line:
*
* BLASTN 2.0.11 [Jan-20-2000] for NCBI Blast
*
* The above would be parsed to program ncbi-blastn, and version number
* 2.0.11
*
* @param poLine -
*/
private void onNewBlastDataSet(String poLine) throws SAXException {
if (!oVersion.isSupported()) {
throw (new SAXException(
"Program " +
oVersion.getProgramString() + " Version " +
oVersion.getVersionString() +
" is not supported by the biojava blast-like parsing framework"));
}
oAtts.clear();
oAttQName.setQName("program");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA",oVersion.getProgramString());
oAttQName.setQName("version");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA",oVersion.getVersionString());
this.startElement(new QName(this,this.prefix("BlastLikeDataSet")),
(Attributes)oAtts);
//change state to reflect the fact we're in the Header
iState = IN_HEADER;
oBuffer.clear();
oAtts.clear();
this.startElement(new QName(this,this.prefix("Header")),
(Attributes)oAtts);
}
/**
* Describe constructor here.
*
* @param oArrayList -
*/
private void emitRawOutput(ArrayList poList) throws SAXException {
oAtts.clear();
oAttQName.setQName("xml:space");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"NMTOKEN","preserve");
this.startElement(new QName (this,this.prefix("RawOutput")),
(Attributes)oAtts);
//Cycle through ArrayList and send character array data to
//XML ContentHandler.
int iTmpListSize = poList.size();
for (int i = 0; i < iTmpListSize; i++) {
//System.out.println("RAW:" + (String)poList.get(i));
aoChars = ((String)poList.get(i)).toCharArray();
this.characters(aoLineSeparator,0,1);
this.characters(aoChars,0,aoChars.length);
}
this.endElement(new QName(this,this.prefix("RawOutput")));
}
/**
* Parses a summary line. Actually parsing functionality
* is delegated to static method of a reusable Helper Class.
*
* For NCBI Blast, a summary line looks something like:
*
* U00431 Mus musculus HMG-1 mRNA, complete cds. 353 7e-95
*
* UO0431 is typically a database accession code
* Mus musculs.. is a description of the hit (this is optional)
*
* 353 is a bit score
*
* 7e-95 is an E Value
*
*/
private void parseSummaryLine(String poLine) throws SAXException {
//Also remember in header add query attribute and database type?
//Should split this out into different implementations
//according to program and version
oSummaryLineHelper.parse(poLine,oMap,oVersion);
//Eat a line for GCG, which has two lines for a summary.
//The first line becomes the beginning of the description
if (iVer == BlastLikeVersionSupport.GCG_BLASTN) {
try {
poLine = oContents.readLine();
oSummaryLineHelper.parse(poLine,oMap,oVersion);
} catch (IOException x) {
System.out.println(x.getMessage());
System.out.println("GCG File read interrupted");
} // end try/catch
}
/* Note have to do this check a hmmer can have empty summary
* sections and oMap.keySet().toArray will return an array
* of size 1 with a null first element if the map is empty.
*/
if ( oMap.size() == 0 ) {
return;
}
aoKeys = (String[])(oMap.keySet().toArray(aoArrayType));
oAtts.clear();
//output contents of Map as either elements or
//attribute lists
//two passes first get HitsSummary element started
for (int i = 0; i < aoKeys.length; i++) {
if ((aoKeys[i].equals("hitId")) ||
(aoKeys[i].equals("hitDescription")) ) {
//do nothing
} else {
oAttQName.setQName(aoKeys[i]);
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA",(String)oMap.get(aoKeys[i]));
}
}
this.startElement(new QName(this,this.prefix("HitSummary")),
(Attributes)oAtts);
for (int i = 0; i < aoKeys.length; i++) {
if (aoKeys[i].equals("hitId")) {
oAtts.clear();
oAttQName.setQName("id");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA",(String)oMap.get(aoKeys[i]));
oAttQName.setQName("metaData");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA","none");
this.startElement(new QName(this,this.prefix("HitId")),
(Attributes)oAtts);
this.endElement(new QName(this,this.prefix("HitId")));
} else if (aoKeys[i].equals("hitDescription")) {
oAtts.clear();
this.startElement(new QName(this,this.prefix("HitDescription")),
(Attributes)oAtts);
aoChars = ((String)oMap.get(aoKeys[i])).toCharArray();
this.characters(aoChars,0,aoChars.length);
this.endElement(new QName(this,this.prefix("HitDescription")));
}
//System.out.print(aoKeys[i] + ": ");
//System.out.println(oMap.get(aoKeys[i]));
}
this.endElement(new QName(this,this.prefix("HitSummary")));
oMap.clear();
}
/**
* Fires the QueryId and DatabaseId events.
*/
private void emitHeaderIds() throws SAXException {
// Set attributes for QueryId element
oAtts.clear();
oAttQName.setQName("id");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", oQueryId);
oAttQName.setQName("metaData");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", "none");
oAttQName.setQName("queryLength");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", oQueryLength);
// Fire the QueryId element
this.startElement(new QName(this,this.prefix("QueryId")),
(Attributes) oAtts);
this.endElement(new QName(this,this.prefix("QueryId")));
// Set attributes for DatabaseId element
oAtts.clear();
oAttQName.setQName("id");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", oDatabaseId);
oAttQName.setQName("metaData");
oAtts.addAttribute(oAttQName.getURI(),
oAttQName.getLocalName(),
oAttQName.getQName(),
"CDATA", "none");
// Fire the DatabaseId element
this.startElement(new QName(this,this.prefix("DatabaseId")),
(Attributes) oAtts);
this.endElement(new QName(this,this.prefix("DatabaseId")));
}
/**
* From the specified line, hand over
* parsing of stream to a helperclass
*
* @param poLine -
* @exception SAXException thrown if
* @exception thrown if
*/
private void hitsSectionReached(String poLine)
throws SAXException {
//Parse Contents stream up to end of Hits
oHits.setContentHandler(oHandler);
//this returns when end of hits section reached...
oAtts.clear();
this.startElement(new QName(this,this.prefix("Detail")),
(Attributes)oAtts);
int iProgram = oVersion.getProgram();
if ((iProgram == BlastLikeVersionSupport.NCBI_BLASTN) ||
(iProgram == BlastLikeVersionSupport.NCBI_BLASTX) ||
(iProgram == BlastLikeVersionSupport.NCBI_BLASTP) ||
(iProgram == BlastLikeVersionSupport.NCBI_TBLASTN) ||
(iProgram == BlastLikeVersionSupport.NCBI_TBLASTX)) {
ArrayList hitEndSymbols=new ArrayList();
hitEndSymbols.add("Database");
hitEndSymbols.add("TBLAST");
hitEndSymbols.add("BLAST");
oHits.parse(oContents,poLine,hitEndSymbols);
}
if ((iProgram == BlastLikeVersionSupport.WU_BLASTN) ||
(iProgram == BlastLikeVersionSupport.WU_BLASTX) ||
(iProgram == BlastLikeVersionSupport.WU_BLASTP) ||
(iProgram == BlastLikeVersionSupport.WU_TBLASTN) ||
(iProgram == BlastLikeVersionSupport.WU_TBLASTX)) {
ArrayList hitEndSymbols=new ArrayList();
hitEndSymbols.add("Parameters:");
oHits.parse(oContents,poLine,hitEndSymbols);
}
//Same as NCBI, left here for organization I suppose
if (iProgram == BlastLikeVersionSupport.GCG_BLASTN) {
ArrayList hitEndSymbols=new ArrayList();
hitEndSymbols.add("Database:");
oHits.parse(oContents,poLine,hitEndSymbols);
}
this.changeState(FINISHED_HITS);
}
/**
* Checks to see if a line of Blast like output
* represents the start of a new BlastLike data set.
* Current supports:
* o ncbi-blast all variants
* o wu-blast all variants
*
* @param poLine A String representation of the line
* @return boolean true if it is a new dataset, false if not.
*/
private boolean checkNewBlastLikeDataSet(String poLine) {
if ((poLine.startsWith("BLAST")) ||(poLine.startsWith("TBLAST"))) {
return true;
} else {
return false;
}
}
/**
* Choose particular implementations of Part objects
* that comprise the aggregate object according
* to version/type of program
*
* @param nil -
*/
private void choosePartImplementations() throws SAXException {
iVer = oVersion.getProgram();
if ((iVer == BlastLikeVersionSupport.NCBI_BLASTN) ||
(iVer == BlastLikeVersionSupport.NCBI_BLASTX) ||
(iVer == BlastLikeVersionSupport.NCBI_BLASTP) ||
(iVer == BlastLikeVersionSupport.NCBI_TBLASTN) ||
(iVer == BlastLikeVersionSupport.NCBI_TBLASTX)) {
oSummaryLineHelper =
(SummaryLineHelperIF) new NcbiBlastSummaryLineHelper();
return;
}
if ((iVer == BlastLikeVersionSupport.WU_BLASTN) ||
(iVer == BlastLikeVersionSupport.WU_BLASTX) ||
(iVer == BlastLikeVersionSupport.WU_BLASTP) ||
(iVer == BlastLikeVersionSupport.WU_TBLASTN) ||
(iVer == BlastLikeVersionSupport.WU_TBLASTX)) {
oSummaryLineHelper =
(SummaryLineHelperIF) new WuBlastSummaryLineHelper();
return;
}
if (iVer == BlastLikeVersionSupport.HMMER) {
oSummaryLineHelper =
(SummaryLineHelperIF) new HmmerSummaryLineHelper();
return;
}
if (iVer == BlastLikeVersionSupport.GCG_BLASTN) {
oSummaryLineHelper =
(SummaryLineHelperIF) new GCGBlastSummaryLineHelper();
return;
}
//If get to here, no implementation available
//Exception to help SAX parser writers track down
//problems writing software to support the framework
throw (new SAXException("Could not choose a suitable implementation of the ".
concat("SummaryLineHelperIF for program ").
concat(oVersion.getProgramString()).
concat(" version ").
concat(oVersion.getVersionString())));
}
}