org.biojava.nbio.protmod.io.ProteinModificationXmlReader Maven / Gradle / Ivy
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
* Created on Jun 1, 2010
* Author: Jianjiong Gao
*
*/
package org.biojava.nbio.protmod.io;
import org.biojava.nbio.protmod.*;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
/**
*
* @author Jianjiong Gao
* @since 3.0
*/
public final class ProteinModificationXmlReader {
/**
* This is a utility class and thus cannot be instantialized.
*/
private ProteinModificationXmlReader() {}
/**
* Read protein modifications from XML file and register them.
* @param isXml {@link InputStream} of the XML file.
* @throws IOException if failed to read the XML file.
* @throws ParserConfigurationException if parse errors occur.
* @throws SAXException the {@link DocumentBuilder} cannot be created.
*/
public static void registerProteinModificationFromXml(InputStream isXml)
throws IOException, ParserConfigurationException, SAXException {
if (isXml==null) {
throw new IllegalArgumentException("Null argument.");
}
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(isXml);
NodeList modNodes = doc.getElementsByTagName("Entry");
int modSize = modNodes.getLength();
List nodes;
for (int iMod=0; iMod> infoNodes = getChildNodes(modNode);
// ID
nodes = infoNodes.get("Id");
if (nodes==null || nodes.size()!=1) {
throw new RuntimeException("Each modification must have exact " +
"one field.");
}
String id = nodes.get(0).getTextContent();
// modification category
nodes = infoNodes.get("Category");
if (nodes==null || nodes.size()!=1) {
throw new RuntimeException("Each modification must have exact " +
"one field. See Modification "+id+".");
}
ModificationCategory cat = ModificationCategory.getByLabel(
nodes.get(0).getTextContent());
if (cat==null) {
throw new RuntimeException(nodes.get(0).getTextContent()+
" is not defined as an modification category." +
" See Modification "+id+".");
}
// occurrence type
nodes = infoNodes.get("Occurrence");
if (nodes==null || nodes.size()!=1) {
throw new RuntimeException("Each modification must have exact " +
"one field. See Modification "+id+".");
}
ModificationOccurrenceType occType = ModificationOccurrenceType
.getByLabel(nodes.get(0).getTextContent());
if (occType==null) {
throw new RuntimeException(nodes.get(0).getTextContent()+
" is not defined as an modification occurence type." +
" See Modification "+id+".");
}
// condition
ModificationCondition condition = null;
{
nodes = infoNodes.get("Condition");
if (nodes==null || nodes.size()!=1) {
throw new RuntimeException("Each modification must have exact " +
"one field. See Modification "+id+".");
}
Node compsNode = nodes.get(0);
// keep track of the labels of component indices
Map mapLabelComp = new HashMap<>();
Map> compInfoNodes = getChildNodes(compsNode);
// components
List compNodes = compInfoNodes.get("Component");
int sizeComp = compNodes.size();
List comps = new ArrayList<>(sizeComp);
for (int iComp=0; iComp compIds = new HashSet<>();
List compIdNodes = getChildNodes(compNode).get("Id");
if (compIdNodes!=null) {
for (Node compIdNode : compIdNodes) {
NamedNodeMap compIdNodeAttr = compIdNode.getAttributes();
Node compIdSource = compIdNodeAttr.getNamedItem("source");
if (compIdSource!=null && "PDBCC".equals(compIdSource.getTextContent())) {
String strComps = compIdNode.getTextContent();
if (strComps.isEmpty()) {
throw new RuntimeException("Empty component." +
" See Modification "+id+".");
}
compIds.addAll(Arrays.asList(strComps.split(",")));
}
}
}
if (compIds.isEmpty()) {
throw new RuntimeException("Each component must have a PDBCC ID." +
" See Modification "+id+".");
}
// terminal
boolean nTerminal = false;
boolean cTerminal = false;
List compTermNode = getChildNodes(compNode).get("Terminal");
if (compTermNode!=null) {
if (compTermNode.size()!=1) {
throw new RuntimeException("Only one condition is allowed for " +
"each component. See Modification "+id+".");
}
String nc = compTermNode.get(0).getTextContent();
if ("N".equals(nc)) {
nTerminal = true;
} else if ("C".equals(nc)) {
cTerminal = true;
} else {
throw new RuntimeException("Only N or C is allowed for ." +
" See Modification "+id+".");
}
}
// register
Component comp = Component.of(compIds, nTerminal, cTerminal);
comps.add(comp);
mapLabelComp.put(label, comps.size()-1);
}
// bonds
List bondNodes = compInfoNodes.get("Bond");
List linkages = null;
if (bondNodes!=null) {
int sizeBonds = bondNodes.size();
linkages = new ArrayList<>(sizeBonds);
for (int iBond=0; iBond> bondChildNodes = getChildNodes(bondNode);
if (bondChildNodes==null) {
throw new RuntimeException("Each bond must contain two atoms" +
" See Modification "+id+".");
}
List atomNodes = bondChildNodes.get("Atom");
if (atomNodes==null || atomNodes.size()!=2) {
throw new RuntimeException("Each bond must contain two atoms" +
" See Modification "+id+".");
}
// atom 1
NamedNodeMap atomNodeAttrs = atomNodes.get(0).getAttributes();
Node compNode = atomNodeAttrs.getNamedItem("component");
if (compNode==null) {
throw new RuntimeException("Each atom must on a component." +
" See Modification "+id+".");
}
String labelComp1 = compNode.getTextContent();
int iComp1 = mapLabelComp.get(labelComp1);
Node labelNode = atomNodeAttrs.getNamedItem("atom");
String labelAtom1 = labelNode==null?null:labelNode.getTextContent();
String atom1 = atomNodes.get(0).getTextContent();
if (atom1.isEmpty()) {
throw new RuntimeException("Each atom must have a name. Please use wildcard * if unknown." +
" See Modification "+id+".");
}
List potentialAtoms1 = Arrays.asList(atom1.split(","));
// atom 2
atomNodeAttrs = atomNodes.get(1).getAttributes();
compNode = atomNodeAttrs.getNamedItem("component");
if (compNode==null) {
throw new RuntimeException("Each atom must on a component." +
" See Modification "+id+".");
}
String labelComp2 = compNode.getTextContent();
int iComp2 = mapLabelComp.get(labelComp2);
labelNode = atomNodeAttrs.getNamedItem("atom");
String labelAtom2 = labelNode==null?null:labelNode.getTextContent();
String atom2 = atomNodes.get(1).getTextContent();
if (atom2.isEmpty()) {
throw new RuntimeException("Each atom must have a name. Please use wildcard * if unknown." +
" See Modification "+id+".");
}
List potentialAtoms2 = Arrays.asList(atom2.split(","));
// add linkage
ModificationLinkage linkage = new ModificationLinkage(comps,
iComp1, potentialAtoms1, labelAtom1,
iComp2, potentialAtoms2, labelAtom2);
linkages.add(linkage);
}
}
condition = new ModificationConditionImpl(comps, linkages);
} // end of condition
ProteinModificationImpl.Builder modBuilder =
new ProteinModificationImpl.Builder(id, cat, occType, condition);
// description
nodes = infoNodes.get("Description");
if (nodes!=null && !nodes.isEmpty()) {
modBuilder.setDescription(nodes.get(0).getTextContent());
}
// cross references
nodes = infoNodes.get("CrossReference");
if (nodes!=null) {
for (Node node:nodes) {
Map> xrefInfoNodes = getChildNodes(node);
// source
List xrefNode = xrefInfoNodes.get("Source");
if (xrefNode==null || xrefNode.size()!=1) {
throw new RuntimeException("Error in XML file: " +
"a cross reference must contain exactly one
© 2015 - 2025 Weber Informatics LLC | Privacy Policy