data:image/s3,"s3://crabby-images/02ace/02ace956f9868cf2a1a780bd2c0a517cd3a46077" alt="JAR search and dependency download from the Maven repository"
org.biojava.nbio.ontology.obo.OboFileParser Maven / Gradle / Ivy
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
* Created on Jan 18, 2008
*
*/
package org.biojava.nbio.ontology.obo;
import org.biojava.nbio.ontology.Synonym;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.*;
/** A class to parse the content of an OBO file. It delegates handling of the
* content to the OBOFileEventListener implementation.
*
* This file contains parts of the OBO-Edit file OBOParseEngine, (particularly the encoding and decoding part)
*
* http://geneontology.cvs.sourceforge.net/geneontology/go-dev/java/oboedit/sources/org/geneontology/oboedit/dataadapter/OBOParseEngine.java?revision=1.10&view=markup
* Thanks to the OboEdit developers for giving permission to release this in BioJava.
*
*
* @author Andreas Prlic
* @author John Day Richter
* @since 1.6
*/
public class OboFileParser {
private static final Logger logger = LoggerFactory.getLogger(OboFileParser.class);
List listeners;
protected String line;
protected int linenum = 0;
protected int totalSize = 0;
protected int bytesRead = 0;
protected StringBuffer tempBuffer = new StringBuffer();
protected SimpleDateFormat dateFormat = new SimpleDateFormat("dd:MM:yyyy HH:mm", Locale.US);
protected static final Map escapeChars =
new HashMap();
protected static final Map unescapeChars =
new HashMap();
static {
escapeChars.put(new Character('n'), new Character('\n'));
escapeChars.put(new Character('W'), new Character(' '));
escapeChars.put(new Character('t'), new Character('\t'));
escapeChars.put(new Character(':'), new Character(':'));
escapeChars.put(new Character(','), new Character(','));
escapeChars.put(new Character('"'), new Character('"'));
escapeChars.put(new Character('\''), new Character('\''));
escapeChars.put(new Character('\\'), new Character('\\'));
escapeChars.put(new Character('{'), new Character('{'));
escapeChars.put(new Character('}'), new Character('}'));
escapeChars.put(new Character('('), new Character('('));
escapeChars.put(new Character(')'), new Character(')'));
escapeChars.put(new Character('['), new Character('['));
escapeChars.put(new Character(']'), new Character(']'));
escapeChars.put(new Character('!'), new Character('!'));
Iterator it = escapeChars.keySet().iterator();
while (it.hasNext()) {
Character key = it.next();
Character value = escapeChars.get(key);
unescapeChars.put(value, key);
}
}
public static class SOPair {
public String str = null;
public int index = -1;
public int endIndex = -1;
public SOPair(String str, int index) {
this(str, index, -1);
}
public SOPair(String str, int index, int endIndex) {
this.str = str;
this.index = index;
this.endIndex = endIndex;
}
}
public OboFileParser(){
listeners = new ArrayList();
}
public void addOboFileEventListener(OboFileEventListener listener){
listeners.add(listener);
}
public List getOboFileEventListener(){
return listeners;
}
/** parse an ontology file
*
* @param oboFile
* @throws IOException
* @throws IOException
*/
public void parseOBO(BufferedReader oboFile) throws IOException{
String line;
String currentStanza;
while ((line = oboFile.readLine()) != null) {
if (line.length() == 0)
continue;
if ( line.charAt(0) == '[') {
if (line.charAt(line.length() - 1) != ']')
throw new IOException("Unclosed stanza: \"" + line + "\"" );
String stanzaname = line.substring(1, line.length() - 1);
if (stanzaname.length() < 1)
throw new IOException("Empty stanza: \"" +line+"\"");
currentStanza = stanzaname;
//logger.info("stanza: {}", currentStanza);
triggerNewStanza(currentStanza);
} else {
// a content line
SOPair pair;
pair = unescape(line, ':', 0, true);
//logger.info(pair);
String name = pair.str;
int lineEnd = findUnescaped(line, '!', 0, line.length(), true);
if (lineEnd == -1)
lineEnd = line.length();
// find nested values
NestedValue nv = null;
int trailingStartIndex = -1;
int trailingEndIndex = -1;
for (int i = lineEnd - 1; i >= 0; i--) {
if (Character.isWhitespace(line.charAt(i))) {
// keep going until we see non-whitespace
} else if (line.charAt(i) == '}') {
// if the first thing we see is a closing brace,
// we have a trailing modifier
if (i >= 1 && line.charAt(i - 1) == '\\')
continue;
trailingEndIndex = i;
break;
} else
break;
}
if (trailingEndIndex != -1) {
for (int i = trailingEndIndex - 1; i >= 0; i--) {
if (line.charAt(i) == '{') {
if (i >= 1 && line.charAt(i - 1) == '\\')
continue;
trailingStartIndex = i + 1;
}
}
}
int valueStopIndex;
if (trailingStartIndex == -1 && trailingEndIndex != -1)
throw new IOException("Unterminated trailing modifier. " + line);
else if (trailingStartIndex != -1) {
valueStopIndex = trailingStartIndex - 1;
String trailing = line.substring(trailingStartIndex,
trailingEndIndex).trim();
nv = new NestedValue();
getNestedValue(nv, trailing, 0);
} else
valueStopIndex = lineEnd;
String value = line.substring(pair.index + 1, valueStopIndex).trim();
/*
* if (nv != null) logger.warn("nv = "+nv+", value =
* |"+value+"|");
*/
if (value.length() == 0)
throw new IOException("Tag found with no value "+ line);
if ( isSynonym(name)){
Synonym synonym = parseSynonym(name,value);
triggerNewSynonym(synonym);
} else {
//logger.info("new key:" + name + " " + value);
triggerNewKey(name,value);
}
//logger.info("parsed key: " + name +" value: " + value + " nv: " + nv);
}
}
}
private boolean isSynonym(String key){
if ( key.equals(OboFileHandler.SYNONYM) || key.equals(OboFileHandler.EXACT_SYNONYM))
return true;
return false;
}
/** parse the Synonym String from the Term.
* value can be:
* "ca_bind" RELATED [uniprot:curation]
* @param value
* @return the synonym text
*/
private Synonym parseSynonym(String key, String value) throws IOException{
//logger.info("PARSE SYNONYM " + key + " " + value);
int startIndex = findUnescaped(value, '"', 0, value.length());
if (startIndex == -1)
throw new IOException("Expected \"" + line + " " + linenum);
SOPair p = unescape(value, '"', startIndex + 1, value.length(),
true);
int defIndex = findUnescaped(value, '[', p.index, value.length());
if (defIndex == -1) {
throw new IOException("Badly formatted synonym. "
+ "No dbxref list found." + line + " " + linenum );
}
String leftovers = value.substring(p.index + 1, defIndex).trim();
StringTokenizer tokenizer = new StringTokenizer(leftovers, " \t");
int scope = Synonym.RELATED_SYNONYM;
if ( key.equals(OboFileHandler.EXACT_SYNONYM))
scope = Synonym.EXACT_SYNONYM;
else if ( key.equals(OboFileHandler.BROAD_SYNONYM))
scope = Synonym.BROAD_SYNONYM;
else if ( key.equals(OboFileHandler.NARROW_SYNONYM))
scope = Synonym.NARROW_SYNONYM;
String catID = null;
for (int i = 0; tokenizer.hasMoreTokens(); i++) {
String token = tokenizer.nextToken();
//logger.info("TOKEN:" +token);
if (i == 0) {
if (token.equals("RELATED"))
scope = Synonym.RELATED_SYNONYM;
else if (token.equals("UNSPECIFIED"))
scope = Synonym.RELATED_SYNONYM;
else if (token.equals("EXACT"))
scope = Synonym.EXACT_SYNONYM;
else if (token.equals("BROAD"))
scope = Synonym.BROAD_SYNONYM;
else if (token.equals("NARROW"))
scope = Synonym.NARROW_SYNONYM;
else
throw new IOException("Found unexpected scope "
+ "identifier " + token + line);
} else if (i == 1) {
catID = token;
} else
throw new IOException("Expected dbxref list,"
+ " instead found " + token + line );
}
Synonym synonym = new Synonym();
synonym.setScope(scope);
synonym.setCategory(catID);
synonym.setName(p.str);
//logger.info("SYNONYM: " + p.str +" " + synonym.getCategory() + " " + synonym.getScope());
Map[] refs = getDbxrefList(value,defIndex + 1, value.length());
// set the refs in the synonym
for (Map ref : refs){
@SuppressWarnings("unused")
String xref = (String) ref.get("xref");
@SuppressWarnings("unused")
String desc = (String) ref.get("desc");
//logger.info(xref + " " + desc);
@SuppressWarnings("unused")
NestedValue nv = (NestedValue) ref.get("nv");
//TODO: add implementation for this...
}
return synonym;
}
protected Map[] getDbxrefList(String line, int startoffset, int endoffset) throws IOException {
Vector
© 2015 - 2025 Weber Informatics LLC | Privacy Policy