All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.protmod.io.ProteinModificationXmlReader Maven / Gradle / Ivy

/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 * Created on Jun 1, 2010
 * Author: Jianjiong Gao
 *
 */

package org.biojava.nbio.protmod.io;

import org.biojava.nbio.protmod.*;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;

/**
 *
 * @author Jianjiong Gao
 * @since 3.0
 */
public final class ProteinModificationXmlReader {
	/**
	 * This is a utility class and thus cannot be instantialized.
	 */
	private ProteinModificationXmlReader() {}

	/**
	 * Read protein modifications from XML file and register them.
	 * @param isXml {@link InputStream} of the XML file.
	 * @throws IOException if failed to read the XML file.
	 * @throws ParserConfigurationException if parse errors occur.
	 * @throws SAXException the {@link DocumentBuilder} cannot be created.
	 */
	public static void registerProteinModificationFromXml(InputStream isXml)
			throws IOException, ParserConfigurationException, SAXException {
		if (isXml==null) {
			throw new IllegalArgumentException("Null argument.");
		}

		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
		DocumentBuilder builder = factory.newDocumentBuilder();
		Document doc = builder.parse(isXml);

		NodeList modNodes = doc.getElementsByTagName("Entry");
		int modSize = modNodes.getLength();
		List nodes;
		for (int iMod=0; iMod> infoNodes = getChildNodes(modNode);

			// ID
			nodes = infoNodes.get("Id");
			if (nodes==null || nodes.size()!=1) {
				throw new RuntimeException("Each modification must have exact " +
						"one  field.");
			}
			String id = nodes.get(0).getTextContent();

			// modification category
			nodes = infoNodes.get("Category");
			if (nodes==null || nodes.size()!=1) {
				throw new RuntimeException("Each modification must have exact " +
						"one  field. See Modification "+id+".");
			}
			ModificationCategory cat = ModificationCategory.getByLabel(
					nodes.get(0).getTextContent());
			if (cat==null) {
				throw new RuntimeException(nodes.get(0).getTextContent()+
					" is not defined as an modification category." +
					" See Modification "+id+".");
			}

			// occurrence type
			nodes = infoNodes.get("Occurrence");
			if (nodes==null || nodes.size()!=1) {
				throw new RuntimeException("Each modification must have exact " +
						"one  field. See Modification "+id+".");
			}
			ModificationOccurrenceType occType = ModificationOccurrenceType
				.getByLabel(nodes.get(0).getTextContent());
			if (occType==null) {
				throw new RuntimeException(nodes.get(0).getTextContent()+
					" is not defined as an modification occurence type." +
					" See Modification "+id+".");
			}

			// condition
			ModificationCondition condition = null;
			{
				nodes = infoNodes.get("Condition");
				if (nodes==null || nodes.size()!=1) {
					throw new RuntimeException("Each modification must have exact " +
							"one  field. See Modification "+id+".");
				}

				Node compsNode = nodes.get(0);

				// keep track of the labels of component indices
				Map mapLabelComp = new HashMap<>();

				Map> compInfoNodes = getChildNodes(compsNode);

				// components
				List compNodes = compInfoNodes.get("Component");
				int sizeComp = compNodes.size();
				List comps = new ArrayList<>(sizeComp);
				for (int iComp=0; iComp compIds = new HashSet<>();
					List compIdNodes = getChildNodes(compNode).get("Id");
					if (compIdNodes!=null) {
						for (Node compIdNode : compIdNodes) {
							NamedNodeMap compIdNodeAttr = compIdNode.getAttributes();
							Node compIdSource = compIdNodeAttr.getNamedItem("source");
							if (compIdSource!=null && "PDBCC".equals(compIdSource.getTextContent())) {
								String strComps = compIdNode.getTextContent();
								if (strComps.isEmpty()) {
									throw new RuntimeException("Empty component." +
											" See Modification "+id+".");
								}
								compIds.addAll(Arrays.asList(strComps.split(",")));
							}
						}
					}

					if (compIds.isEmpty()) {
						throw new RuntimeException("Each component must have a PDBCC ID." +
								" See Modification "+id+".");
					}

					// terminal
					boolean nTerminal = false;
					boolean cTerminal = false;
					List compTermNode = getChildNodes(compNode).get("Terminal");
					if (compTermNode!=null) {
						if (compTermNode.size()!=1) {
							throw new RuntimeException("Only one  condition is allowed for " +
									"each component. See Modification "+id+".");
						}
						String nc = compTermNode.get(0).getTextContent();
						if ("N".equals(nc)) {
							nTerminal = true;
						} else if ("C".equals(nc)) {
							cTerminal = true;
						} else {
							throw new RuntimeException("Only N or C is allowed for ." +
									" See Modification "+id+".");
						}
					}

					// register
					Component comp = Component.of(compIds, nTerminal, cTerminal);
					comps.add(comp);
					mapLabelComp.put(label, comps.size()-1);
				}

				// bonds
				List bondNodes = compInfoNodes.get("Bond");
				List linkages = null;
				if (bondNodes!=null) {
					int sizeBonds = bondNodes.size();
					linkages = new ArrayList<>(sizeBonds);
					for (int iBond=0; iBond> bondChildNodes = getChildNodes(bondNode);
						if (bondChildNodes==null) {
							throw new RuntimeException("Each bond must contain two atoms" +
									" See Modification "+id+".");
						}

						List atomNodes = bondChildNodes.get("Atom");
						if (atomNodes==null || atomNodes.size()!=2) {
							throw new RuntimeException("Each bond must contain two atoms" +
									" See Modification "+id+".");
						}

						// atom 1
						NamedNodeMap atomNodeAttrs = atomNodes.get(0).getAttributes();
						Node compNode = atomNodeAttrs.getNamedItem("component");
						if (compNode==null) {
							throw new RuntimeException("Each atom must on a component." +
									" See Modification "+id+".");
						}
						String labelComp1 = compNode.getTextContent();
						int iComp1 = mapLabelComp.get(labelComp1);

						Node labelNode = atomNodeAttrs.getNamedItem("atom");
						String labelAtom1 = labelNode==null?null:labelNode.getTextContent();

						String atom1 = atomNodes.get(0).getTextContent();
						if (atom1.isEmpty()) {
							throw new RuntimeException("Each atom must have a name. Please use wildcard * if unknown." +
									" See Modification "+id+".");
						}
						List potentialAtoms1 = Arrays.asList(atom1.split(","));

						// atom 2
						atomNodeAttrs = atomNodes.get(1).getAttributes();
						compNode = atomNodeAttrs.getNamedItem("component");
						if (compNode==null) {
							throw new RuntimeException("Each atom must on a component." +
									" See Modification "+id+".");
						}
						String labelComp2 = compNode.getTextContent();
						int iComp2 = mapLabelComp.get(labelComp2);

						labelNode = atomNodeAttrs.getNamedItem("atom");
						String labelAtom2 = labelNode==null?null:labelNode.getTextContent();

						String atom2 = atomNodes.get(1).getTextContent();
						if (atom2.isEmpty()) {
							throw new RuntimeException("Each atom must have a name. Please use wildcard * if unknown." +
									" See Modification "+id+".");
						}
						List potentialAtoms2 = Arrays.asList(atom2.split(","));

						// add linkage
						ModificationLinkage linkage = new ModificationLinkage(comps,
								iComp1, potentialAtoms1, labelAtom1,
								iComp2, potentialAtoms2, labelAtom2);
						linkages.add(linkage);
					}
				}

				condition = new ModificationConditionImpl(comps, linkages);
			} // end of condition

			ProteinModificationImpl.Builder modBuilder =
				new ProteinModificationImpl.Builder(id, cat, occType, condition);

			// description
			nodes = infoNodes.get("Description");
			if (nodes!=null && !nodes.isEmpty()) {
				modBuilder.setDescription(nodes.get(0).getTextContent());
			}

			// cross references
			nodes = infoNodes.get("CrossReference");
			if (nodes!=null) {
				for (Node node:nodes) {
					Map> xrefInfoNodes = getChildNodes(node);

					// source
					List xrefNode = xrefInfoNodes.get("Source");
					if (xrefNode==null || xrefNode.size()!=1) {
						throw new RuntimeException("Error in XML file: " +
							"a cross reference must contain exactly one  field." +
							" See Modification "+id+".");
					}
					String xrefDb = xrefNode.get(0).getTextContent();

					// id
					xrefNode = xrefInfoNodes.get("Id");
					if (xrefNode==null || xrefNode.size()!=1) {
						throw new RuntimeException("Error in XML file: " +
							"a cross reference must contain exactly one  field." +
							" See Modification "+id+".");
					}
					String xrefId = xrefNode.get(0).getTextContent();

					// name
					String xrefName = null;
					xrefNode = xrefInfoNodes.get("Name");
					if (xrefNode!=null && !xrefNode.isEmpty()) {
						xrefName = xrefNode.get(0).getTextContent();
					}

					if ("PDBCC".equals(xrefDb)) {
						modBuilder.setPdbccId(xrefId).setPdbccName(xrefName);
					} else if ("RESID".equals(xrefDb)) {
						modBuilder.setResidId(xrefId).setResidName(xrefName);
					} else if ("PSI-MOD".equals(xrefDb)) {
						modBuilder.setPsimodId(xrefId).setPsimodName(xrefName);
					}
				}
			} // end of cross references

			// formula
			nodes = infoNodes.get("Formula");
			if (nodes!=null && !nodes.isEmpty()) {
				modBuilder.setFormula(nodes.get(0).getTextContent());
			}

			// keywords
			nodes = infoNodes.get("Keyword");
			if (nodes!=null && !nodes.isEmpty()) {
				for (Node node : nodes) {
					modBuilder.addKeyword(node.getTextContent());
				}
			}

			ProteinModificationRegistry.register(modBuilder.build());
		}
	}

	/**
	 * Utility method to group child nodes by their names.
	 * @param parent parent node.
	 * @return Map from name to child nodes.
	 */
	private static Map> getChildNodes(Node parent) {
		if (parent==null)
			return Collections.emptyMap();

		Map> children = new HashMap<>();

		NodeList nodes = parent.getChildNodes();
		int nNodes = nodes.getLength();
		for (int i=0; i namesakes = children.get(name);
			if (namesakes==null) {
				namesakes = new ArrayList<>();
				children.put(name, namesakes);
			}
			namesakes.add(node);
		}

		return children;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy