Maven / Gradle / Ivy
* BioJava development code
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
import org.biojava.utils.stax.DelegationManager;
import org.biojava.utils.stax.StAXContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.DefaultHandler;
* This class parses NCBI Blast XML output.
* It has two modes:-
* i) single output document mode: this takes a document
* containing a single BlastOutput element and parses it.
* This is generated when a single query is searched against
* a sequence database.
* ii) multiple query document mode: unfortunately, NCBI
* BLAST concatenates the results of multiple searches in
* one file. This leads to an ill-formed document that violates
* every XML format known to the human race and other nearby
* civilisations. This parser will take a bowdlerised version of
* this output that is wrapped in a blast_aggregate element.
* The massaged form is generated by stripping the XML element and
* DOCTYPE elements and wrapping all the classes in a single
* blast_aggregate element. In Linux, this can be done with:-
* #!/bin/sh
* # Converts a Blast XML output to something vaguely well-formed
* # for parsing.
* # Use: blast_aggregate
* # strips all <?xml> and <!DOCTYPE> tags
* # encapsulates the multiple <BlastOutput> elements into <blast_aggregator>
* sed '/>?xml/d' $1 | sed '/<!DOCTYPE/d' | sed '1i\
* <blast_aggregate>
* $a\
* </blast_aggregate>' > $2
* @author David Huen
public class BlastXMLParser
extends StAXFeatureHandler
boolean firstTime = true;
// constructor
public BlastXMLParser()
// this is the base element class
this.staxenv = this;
// System.out.println("staxenv " + staxenv);
// just set a DefaultHandler: does nothing worthwhile.
this.listener = new DefaultHandler();
* sets the ContentHandler for this object
public void setContentHandler(org.xml.sax.ContentHandler listener)
this.listener = listener;
* we override the superclass startElement method so we can determine the
* the start tag type and use it to set up delegation for the superclass.
public void startElement(
String nsURI,
String localName,
String qName,
Attributes attrs,
DelegationManager dm)
throws SAXException
// System.out.println("localName is " + localName);
if (firstTime) {
// what kind of tag do we have?
if (localName.equals("BlastOutput")) {
// this is a well-formed XML document from NCBI BLAST
// pertaining to one search result
new ElementRecognizer.ByLocalName("BlastOutput"),
new StAXHandlerFactory() {
public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
return new BlastOutputHandler(staxenv);
else if (localName.equals("blast_aggregate")) {
// this is my phony aggregate document that exists to
// legitimise otherwise ill-formed output from NCBI Blast
super.addHandler(new ElementRecognizer.ByLocalName("blast_aggregate"),
new StAXHandlerFactory() {
public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
return new BlastAggregator(staxenv);
else {
throw new SAXException("illegal element " + localName);
firstTime = false;
// setup the root element of the output
AttributesImpl bldscAttrs = new AttributesImpl();
bldscAttrs.addAttribute("", "xmlns", "xmlns", CDATA, "");
bldscAttrs.addAttribute(biojavaUri, "biojava", "xmlns:biojava", CDATA, "");
listener.startElement(biojavaUri, "BlastLikeDataSetCollection", biojavaUri + ":BlastLikeDataSetCollection", bldscAttrs);
// now invoke delegation
// super.startElement(nsURI, localName, qName, attrs, dm);
// perform delegation
// we must delegate only on features that are directly attached.
// if I do not check that that's so, any element of a kind I delegate
// on will be detected any depth within unrecognized tags.
if (level == 1) {
// System.out.println("StaxFeaturehandler.startElement starting. localName: " + localName + " " + level);
for (int i = handlers.size() - 1; i >= 0; --i) {
Binding b = (Binding) handlers.get(i);
if (b.recognizer.filterStartElement(nsURI, localName, qName, attrs)) {
// call the element specific handler now.
// remember that if we we have a delegation failure we pass here too!
if (level == 1) {
startElementHandler(nsURI, localName, qName, attrs);
public void endElementHandler(
String nsURI,
String localName,
String qName,
StAXContentHandler handler)
throws SAXException
listener.endElement(biojavaUri, "BlastLikeDataSetCollection", biojavaUri + ":BlastLikeDataSetCollection");