All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.bio.program.sax.blastxml.HspHandler Maven / Gradle / Ivy

There is a newer version: 1.9.7
Show newest version
/**
 *  BioJava development code This code may be freely distributed and modified
 *  under the terms of the GNU Lesser General Public Licence. This should be
 *  distributed with the code. If you do not have a copy, see:
 *  http://www.gnu.org/copyleft/lesser.html Copyright for this code is held
 *  jointly by the individual authors. These should be listed in
 *
 *@author    doc comments. For more information on the BioJava project and its
 *      aims, or to join the biojava-l mailing list, visit the home page at:
 *      http://www.biojava.org/
 */
package org.biojava.bio.program.sax.blastxml;

import org.biojava.bio.seq.io.game.ElementRecognizer;
import org.biojava.utils.stax.StAXContentHandler;
import org.biojava.utils.stax.StringElementHandlerBase;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/**
 * This class handles the  element in NCBI Blast XML.
 * It generates events of the type handled by SearchContentHandler.
 * Most events will be generated just using the element local name
 * as the key and the CDATA as the value.
 *
 * The events do not conform to the BlastLikeDataSetCollection DTD
 * conformity is achieved with an adaptor class that intercepts
 * certain event types and translates them to that used in the
 * above DTD.  This was done to focus all the changes necessary
 * to achieve conformity in the adaptor class.
 */
class HspHandler
    extends StAXFeatureHandler
{
    // create static factory class that makes an instance
    // of this class.
    public final static StAXHandlerFactory HSP_HANDLER_FACTORY
             =
        new StAXHandlerFactory() {
            public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                return new HspHandler(staxenv);
            }
        };

    // local constants
    private static final String bitScore = "bitScore";
    private static final String score = "score";
    private static final String expectValue = "expectValue";
    private static final String numberOfIdentities = "numberOfIdentities";
    private static final String numberOfPositives = "numberOfPositives";
    private static final String alignmentSize = "alignmentSize";
    private static final String queryFrame = "queryFrame";
    private static final String hitFrame = "hitFrame";
    private static final String queryStrand = "queryStrand";
    private static final String hitStrand = "hitStrand";
    private static final String percentageIdentity = "percentageIdentity";

    // class variables
    AttributesImpl hspAttrs;
    AttributesImpl alignAttrs;

    // variables for temp storage
    int iNumberOfIdentities = Integer.MIN_VALUE;
    int iAlignmentSize = Integer.MIN_VALUE;
    String sHsp_qseq = null;
    String sHsp_hseq = null;
    String sHsp_midline = null;
    String sHsp_hit_from = null;
    String sHsp_hit_to = null;
    String sHsp_query_from = null;
    String sHsp_query_to = null;


    // constructor
    public HspHandler(StAXFeatureHandler staxenv)
    {
        super(staxenv);
//        System.out.println("HspHandler staxenv " + staxenv);
        // delegate handling of 
//        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_num"),
//            SubHitPropertyHandler.SUBHIT_PROPERTY_HANDLER_FACTORY);

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_bit-score"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            hspAttrs.addAttribute(biojavaUri, bitScore, bitScore, CDATA, s);
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_score"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            hspAttrs.addAttribute(biojavaUri, score, score, CDATA, s);
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_evalue"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            hspAttrs.addAttribute(biojavaUri, expectValue, expectValue, CDATA, s);
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_query-from"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            sHsp_query_from = s.trim();
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_query-to"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            sHsp_query_to = s.trim();
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_hit-from"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            sHsp_hit_from = s.trim();
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_hit-to"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            sHsp_hit_to = s.trim();
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_query-frame"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {

                        public void setStringValue(String s) throws SAXException {
                            // save this to compute the percentage identity later.
                            int frameNo = Integer.parseInt(s.trim());

                            // convert the frame to the required format and return it
                            if (hitSequenceType.equals("protein")) {
                                hspAttrs.addAttribute(biojavaUri, queryFrame, queryFrame, CDATA, 
                                    stringifyFrame(frameNo));
                            }
                            else if (hitSequenceType.equals("dna")) {
                                // for some peculiar reason, when Hsp_hit-frame is reversed, it is
                                // the query frame sequence that is depicted inverted!!
                                // I assume it works the other way too although that never happens.
                                hspAttrs.addAttribute(biojavaUri, hitStrand, hitStrand, CDATA,
                                    stringifyStrand(frameNo));
                            }
                            else throw new SAXException("illegal sequence type");
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_hit-frame"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {

                        public void setStringValue(String s) throws SAXException {
                            // save this to compute the percentage identity later.
                            int frameNo = Integer.parseInt(s.trim());

                            // convert the frame to the required format and return it
                            if (hitSequenceType.equals("protein")) {
                                hspAttrs.addAttribute(biojavaUri, hitFrame, hitFrame, CDATA, 
                                    stringifyFrame(frameNo));
                            }
                            else if (hitSequenceType.equals("dna")) {
                                // for some peculiar reason, when Hsp_hit-frame is reversed, it is
                                // the query frame sequence that is depicted inverted!!
                                hspAttrs.addAttribute(biojavaUri, queryStrand, queryStrand, CDATA,
                                    stringifyStrand(frameNo));
                            }
                            else throw new SAXException("illegal sequence type");
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_identity"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            iNumberOfIdentities = Integer.parseInt(s.trim());
                            hspAttrs.addAttribute(biojavaUri, numberOfIdentities, numberOfIdentities, CDATA, s);
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_positive"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            hspAttrs.addAttribute(biojavaUri, numberOfPositives, numberOfPositives, CDATA, s);
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_align-len"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            iAlignmentSize = Integer.parseInt(s.trim());
                            hspAttrs.addAttribute(biojavaUri, alignmentSize, alignmentSize, CDATA, s);
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_qseq"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            sHsp_qseq = s;
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_hseq"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            sHsp_hseq = s;
                        }
                    };
                }
            }
        );

        // delegate handling of 
        super.addHandler(new ElementRecognizer.ByLocalName("Hsp_midline"),
            new StAXHandlerFactory() {
                public StAXContentHandler getHandler(StAXFeatureHandler staxenv) {
                    return new StringElementHandlerBase() {
                        public void setStringValue(String s) {
                            sHsp_midline = s;
                        }
                    };
                }
            }
        );


    }

    private String stringifyFrame(int frame) throws SAXException {
        switch (frame) {
            case -3: return "minus3";
            case -2: return "minus2";
            case -1: return "minus1";
            case 1: return "plus1";
            case 2: return "plus2";
            case 3: return "plus3";           
            default: throw new SAXException("illegal frame number encountered. ("+frame+")");
        }
    }

    private String stringifyStrand(int strand) throws SAXException {
        if (strand > 0) return "plus";
        else if (strand < 0) return "minus";
        else throw new SAXException("illegal strand number encountered.");
    }

    public void startElementHandler(
            String nsURI,
            String localName,
            String qName,
            Attributes attrs)
             throws SAXException 
    { 
        // create an AttributesImpl to save the attributes to
        hspAttrs = new AttributesImpl();
    }


    public void endElementHandler(
            String nsURI,
            String localName,
            String qName,
            StAXContentHandler handler)
             throws SAXException
    {
        // only generate the element if key parts are present
        if ((iNumberOfIdentities == Integer.MIN_VALUE)
            || (iAlignmentSize == Integer.MIN_VALUE)
            || (sHsp_qseq == null)
            || (sHsp_hseq == null)
            || (sHsp_midline == null)
            || (sHsp_hit_from == null)
            || (sHsp_hit_to == null)
            || (sHsp_query_from == null)
            || (sHsp_query_to == null) ) {
            throw new SAXException(" is non-compliant.");
        }

        // compute percentage identity and report it
        hspAttrs.addAttribute(biojavaUri, 
            percentageIdentity, 
            percentageIdentity, 
            CDATA, 
            Float.toString( ((float) (100 * iNumberOfIdentities)) / ((float) iAlignmentSize))
            );

        // generate start of 
        staxenv.listener.startElement(biojavaUri, "HSP", biojavaUri + ":HSP", new AttributesImpl());

            // generate 
            staxenv.listener.startElement(biojavaUri, "HSPSummary", biojavaUri + ":" + "HSPSummary", hspAttrs);
            staxenv.listener.endElement(biojavaUri, "HSPSummary", biojavaUri + ":" + "HSPSummary");

            // generate the 
            staxenv.listener.startElement(biojavaUri, "BlastLikeAlignment", biojavaUri + ":BlastLikeAlignment", new AttributesImpl());

                // generate start of 
                AttributesImpl queryAttrs = new AttributesImpl();
                queryAttrs.addAttribute(biojavaUri, "startPosition", "startPosition", CDATA, sHsp_query_from);
                queryAttrs.addAttribute(biojavaUri, "stopPosition", "stopPosition", CDATA, sHsp_query_to);
                staxenv.listener.startElement(biojavaUri, "QuerySequence", biojavaUri + ":QuerySequence", queryAttrs);

                // pass the sequence symbol tokens over
                staxenv.listener.characters(sHsp_qseq.toCharArray(), 0, sHsp_qseq.length());

                // generate end of 
                staxenv.listener.endElement(biojavaUri, "QuerySequence", biojavaUri + ":QuerySequence");

                // generate start of 
                AttributesImpl matchAttrs = new AttributesImpl();
                matchAttrs.addAttribute("xml", "space", "xml:space", CDATA, "preserve");
                staxenv.listener.startElement(biojavaUri, "MatchConsensus", biojavaUri + ":MatchConsensus", matchAttrs);

                // pass the sequence symbol tokens over
                staxenv.listener.characters(sHsp_midline.toCharArray(), 0, sHsp_midline.length());

                // generate end of 
                staxenv.listener.endElement(biojavaUri, "MatchConsensus", biojavaUri + ":MatchConsensus");

                // generate start of 
                AttributesImpl hitAttrs = new AttributesImpl();
                hitAttrs.addAttribute(biojavaUri, "startPosition", "startPosition", CDATA, sHsp_hit_from);
                hitAttrs.addAttribute(biojavaUri, "stopPosition", "stopPosition", CDATA, sHsp_hit_to);
                staxenv.listener.startElement(biojavaUri, "HitSequence", "HitSequence", hitAttrs);

                // pass the sequence symbol tokens over
                staxenv.listener.characters(sHsp_hseq.toCharArray(), 0, sHsp_hseq.length());

                // generate end of 
                staxenv.listener.endElement(biojavaUri, "HitSequence", biojavaUri + ":HitSequence");

            // generate end of     
            staxenv.listener.endElement(biojavaUri, "BlastLikeAlignment", biojavaUri + ":BlastLikeAlignment"); 

        // generate end of 
        staxenv.listener.endElement(biojavaUri, "HSP", biojavaUri + ":HSP");
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy