All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.genome.query.BlastXMLQuery Maven / Gradle / Ivy

There is a newer version: 7.1.3
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.genome.query;


import org.biojava.nbio.core.util.XMLHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

import java.util.ArrayList;
import java.util.LinkedHashMap;

/**
 *
 * @author Scooter Willis 
 */
public class BlastXMLQuery {

	private static final Logger logger = LoggerFactory.getLogger(BlastXMLQuery.class);

	Document blastDoc = null;

	public BlastXMLQuery(String blastFile) throws Exception {
		logger.info("Start read of {}", blastFile);
		blastDoc = XMLHelper.loadXML(blastFile);
		logger.info("Read finished");
	}

	public LinkedHashMap> getHitsQueryDef(double maxEScore) throws Exception {
		LinkedHashMap> hitsHashMap = new LinkedHashMap>();
		logger.info("Query for hits");
		ArrayList elementList = XMLHelper.selectElements(blastDoc.getDocumentElement(), "BlastOutput_iterations/Iteration[Iteration_hits]");
		logger.info("{} hits", elementList.size());

		for (Element element : elementList) {
			Element iterationquerydefElement = XMLHelper.selectSingleElement(element, "Iteration_query-def");
			String querydef = iterationquerydefElement.getTextContent();
			Element iterationHitsElement = XMLHelper.selectSingleElement(element, "Iteration_hits");
			ArrayList hitList = XMLHelper.selectElements(iterationHitsElement, "Hit");
			for (Element hitElement : hitList) {
				Element hitaccessionElement = XMLHelper.selectSingleElement(hitElement, "Hit_accession");
				String hitaccession = hitaccessionElement.getTextContent();
				Element hithspsElement = XMLHelper.selectSingleElement(hitElement, "Hit_hsps");
				ArrayList hspList = XMLHelper.selectElements(hithspsElement, "Hsp");
				for (Element hspElement : hspList) {
					Element evalueElement = XMLHelper.selectSingleElement(hspElement, "Hsp_evalue");
					String value = evalueElement.getTextContent();
					double evalue = Double.parseDouble(value);
					if (evalue <= maxEScore) {
						ArrayList hits = hitsHashMap.get(querydef);
						if (hits == null) {
							hits = new ArrayList();
							hitsHashMap.put(querydef, hits);
						}
						hits.add(hitaccession);
					}
				}
			}
		}

		return hitsHashMap;
	}

	public static void main(String[] args) {
		try {
			BlastXMLQuery blastXMLQuery = new BlastXMLQuery("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/c1-454Scaffolds-hits-uniprot_fungi.xml");
			LinkedHashMap> hits = blastXMLQuery.getHitsQueryDef(1E-10);
			logger.info("Hits: {}", hits);
		} catch (Exception e) {
			logger.error("Execution: ", e);
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy