org.obolibrary.oboformat.parser.OBOFormatParser Maven / Gradle / Ivy
package org.obolibrary.oboformat.parser;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.obolibrary.oboformat.model.*;
import org.obolibrary.oboformat.model.Frame.FrameType;
import org.obolibrary.oboformat.parser.OBOFormatConstants.OboFormatTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.Weigher;
/** implements the OBO Format 1.4 specification. */
public class OBOFormatParser {
static final Logger LOG = LoggerFactory.getLogger(OBOFormatParser.class);
// TODO use this to validate date strings for OboFormatTag.TAG_CREATION_DATE
@Nonnull
protected static SimpleDateFormat getISODateFormat() {
return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
}
private boolean followImport;
private Object location;
protected final MyStream stream;
public final com.google.common.cache.LoadingCache stringCache;
/**
*
*/
public OBOFormatParser() {
this(new MyStream());
}
/**
* @param s
*/
protected OBOFormatParser(MyStream s) {
stream = s;
Weigher stringWeigher = new Weigher() {
@Override
public int weigh(String key, String value) {
return key.length();
}
};
CacheLoader loader = new CacheLoader() {
@Override
public String load(String key) throws Exception {
return key;
}
};
if (LOG.isDebugEnabled()) {
stringCache = CacheBuilder.newBuilder().recordStats().maximumWeight(8192 * 1024).weigher(stringWeigher)
.build(loader);
} else {
stringCache = CacheBuilder.newBuilder().maximumWeight(8192 * 1024).weigher(stringWeigher).build(loader);
}
}
protected static class MyStream {
int pos = 0;
String line;
int lineNo = 0;
BufferedReader reader;
public MyStream() {
pos = 0;
}
public MyStream(BufferedReader r) {
reader = r;
}
protected char peekChar() {
prepare();
return line.charAt(pos);
}
public char nextChar() {
pos++;
return line.charAt(pos - 1);
}
@Nullable
public String rest() {
prepare();
if (line == null) {
return null;
}
if (pos >= line.length()) {
return "";
}
return line.substring(pos);
}
public void advance(int dist) {
pos += dist;
}
public void prepare() {
if (line == null) {
advanceLine();
}
}
public void advanceLine() {
try {
line = reader.readLine();
lineNo++;
pos = 0;
} catch (IOException e) {
throw new OBOFormatParserException(e, lineNo, "Error reading from input.");
}
}
public void forceEol() {
if (line == null) {
return;
}
pos = line.length();
}
public boolean eol() {
prepare();
if (line == null) {
return false;
}
return pos >= line.length();
}
public boolean eof() {
prepare();
return line == null;
}
@Nonnull
public static String getTag() {
return "";
}
public boolean consume(@Nonnull String s) {
String r = rest();
if (r == null) {
return false;
}
if (r.startsWith(s)) {
pos += s.length();
return true;
}
return false;
}
public int indexOf(char c) {
prepare();
if (line == null) {
return -1;
}
return line.substring(pos).indexOf(c);
}
@Nonnull
@Override
public String toString() {
return line + "//" + pos + " LINE:" + lineNo;
}
public boolean peekCharIs(char c) {
if (eol() || eof()) {
return false;
}
return peekChar() == c;
}
public int getLineNo() {
return lineNo;
}
}
/**
* @param r
* r
*/
public void setReader(BufferedReader r) {
stream.reader = r;
}
/**
* @param followImports
* followImports
*/
public void setFollowImports(boolean followImports) {
followImport = followImports;
}
/**
* @return follow imports
*/
public boolean getFollowImports() {
return followImport;
}
/**
* Parses a local file or URL to an OBODoc.
*
* @param fn
* fn
* @return parsed obo document
* @throws IOException
* io exception
* @throws OBOFormatParserException
* parser exception
*/
@Nonnull
public OBODoc parse(@Nonnull String fn) throws IOException {
if (fn.startsWith("http:")) {
return parse(new URL(fn));
}
return parse(new File(fn));
}
/**
* Parses a local file to an OBODoc.
*
* @param file
* file
* @return parsed obo document
* @throws IOException
* io exception
* @throws OBOFormatParserException
* parser exception
*/
@Nonnull
public OBODoc parse(File file) throws IOException {
location = file;
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file),
OBOFormatConstants.DEFAULT_CHARACTER_ENCODING));
try {
return parse(in);
} finally {
in.close();
}
}
/**
* Parses a remote URL to an OBODoc.
*
* @param url
* url
* @return parsed obo document
* @throws IOException
* io exception
* @throws OBOFormatParserException
* parser exception
*/
@Nonnull
public OBODoc parse(@Nonnull URL url) throws IOException {
location = url;
BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream(),
OBOFormatConstants.DEFAULT_CHARACTER_ENCODING));
return parse(in);
}
/**
* Parses a remote URL to an OBODoc.
*
* @param urlstr
* urlstr
* @return parsed obo document
* @throws IOException
* io exception
* @throws OBOFormatParserException
* parser exception
*/
@Nonnull
public OBODoc parseURL(String urlstr) throws IOException {
URL url = new URL(urlstr);
return parse(url);
}
@Nonnull
private String resolvePath(@Nonnull String inputPath) {
String path = inputPath;
if (!(path.startsWith("http:") || path.startsWith("file:") || path.startsWith("https:"))) {
// path is not absolue then guess it.
if (location != null) {
if (location instanceof URL) {
URL url = (URL) location;
String p = url.toString();
int index = p.lastIndexOf('/');
path = p.substring(0, index + 1) + path;
} else {
File f = new File(location.toString());
f = new File(f.getParent(), path);
path = f.toURI().toString();
}
}
}
return path;
}
/**
* @param reader
* reader
* @return parsed obo document
* @throws IOException
* io exception
* @throws OBOFormatParserException
* parser exception
*/
@Nonnull
public OBODoc parse(BufferedReader reader) throws IOException {
setReader(reader);
OBODoc obodoc = new OBODoc();
parseOBODoc(obodoc);
// handle imports
Frame hf = obodoc.getHeaderFrame();
List imports = new LinkedList<>();
if (hf != null) {
for (Clause cl : hf.getClauses(OboFormatTag.TAG_IMPORT)) {
@SuppressWarnings("null")
String path = resolvePath(cl.getValue(String.class));
// TBD -- changing the relative path to absolute
cl.setValue(path);
if (followImport) {
// resolve OboDoc documents from import paths.
OBOFormatParser parser = new OBOFormatParser();
OBODoc doc = parser.parseURL(path);
imports.add(doc);
} /*
* else{ //build a proxy document which reference import path
* as ontology id Frame importHeaer = new
* Frame(FrameType.HEADER); Clause ontologyCl = new Clause();
* ontologyCl.setTag(OboFormatTag.TAG_ONTOLOGY.getTag());
* ontologyCl.setValue(path);
* importHeaer.addClause(ontologyCl);
* doc.setHeaderFrame(importHeaer); }
*/
}
obodoc.setImportedOBODocs(imports);
}
return obodoc;
}
// ----------------------------------------
// GRAMMAR
// ----------------------------------------
/**
* @param obodoc
* obodoc
* @throws OBOFormatParserException
* parser exception
*/
public void parseOBODoc(@Nonnull OBODoc obodoc) {
Frame h = new Frame(FrameType.HEADER);
obodoc.setHeaderFrame(h);
parseHeaderFrame(h);
h.freeze();
parseZeroOrMoreWsOptCmtNl();
while (!stream.eof()) {
parseEntityFrame(obodoc);
parseZeroOrMoreWsOptCmtNl();
}
// set OBO namespace in frames
String defaultOboNamespace = h.getTagValue(OboFormatTag.TAG_DEFAULT_NAMESPACE, String.class);
if (defaultOboNamespace != null) {
addOboNamespace(obodoc.getTermFrames(), defaultOboNamespace);
addOboNamespace(obodoc.getTypedefFrames(), defaultOboNamespace);
addOboNamespace(obodoc.getInstanceFrames(), defaultOboNamespace);
}
}
private static void addOboNamespace(@Nullable Collection frames, String defaultOboNamespace) {
if (frames != null && !frames.isEmpty()) {
for (Frame termFrame : frames) {
Clause clause = termFrame.getClause(OboFormatTag.TAG_NAMESPACE);
if (clause == null) {
clause = new Clause(OboFormatTag.TAG_NAMESPACE, defaultOboNamespace);
termFrame.addClause(clause);
}
}
}
}
/**
* @param doc
* doc
* @return list of references
* @throws OBOFormatDanglingReferenceException
* dangling reference error
*/
@SuppressWarnings("null")
@Nonnull
public List checkDanglingReferences(@Nonnull OBODoc doc) {
List danglingReferences = new ArrayList<>();
// check term frames
for (Frame f : doc.getTermFrames()) {
for (String tag : f.getTags()) {
OboFormatTag tagconstant = OBOFormatConstants.getTag(tag);
Clause c = f.getClause(tag);
if (tagconstant == OboFormatTag.TAG_INTERSECTION_OF || tagconstant == OboFormatTag.TAG_UNION_OF
|| tagconstant == OboFormatTag.TAG_EQUIVALENT_TO || tagconstant == OboFormatTag.TAG_DISJOINT_FROM
|| tagconstant == OboFormatTag.TAG_RELATIONSHIP || tagconstant == OboFormatTag.TAG_IS_A) {
if (c.getValues().size() > 1) {
String error = checkRelation(c.getValue(String.class), tag, f.getId(), doc);
if (error != null) {
danglingReferences.add(error);
}
error = checkClassReference(c.getValue2(String.class), tag, f.getId(), doc);
if (error != null) {
danglingReferences.add(error);
}
} else {
String error = checkClassReference(c.getValue(String.class), tag, f.getId(), doc);
if (error != null) {
danglingReferences.add(error);
}
}
}
}
}
// check typedef frames
for (Frame f : doc.getTypedefFrames()) {
for (String tag : f.getTags()) {
OboFormatTag tagConstant = OBOFormatConstants.getTag(tag);
Clause c = f.getClause(tag);
assert c != null;
if (tagConstant == OboFormatTag.TAG_IS_A || tagConstant == OboFormatTag.TAG_INTERSECTION_OF
|| tagConstant == OboFormatTag.TAG_UNION_OF || tagConstant == OboFormatTag.TAG_EQUIVALENT_TO
|| tagConstant == OboFormatTag.TAG_DISJOINT_FROM || tagConstant == OboFormatTag.TAG_INVERSE_OF
|| tagConstant == OboFormatTag.TAG_TRANSITIVE_OVER
|| tagConstant == OboFormatTag.TAG_DISJOINT_OVER) {
String error = checkRelation(c.getValue(String.class), tag, f.getId(), doc);
if (error != null) {
danglingReferences.add(error);
}
} else if (tagConstant == OboFormatTag.TAG_HOLDS_OVER_CHAIN
|| tagConstant == OboFormatTag.TAG_EQUIVALENT_TO_CHAIN
|| tagConstant == OboFormatTag.TAG_RELATIONSHIP) {
String error = checkRelation(c.getValue().toString(), tag, f.getId(), doc);
if (error != null) {
danglingReferences.add(error);
}
error = checkRelation(c.getValue2().toString(), tag, f.getId(), doc);
if (error != null) {
danglingReferences.add(error);
}
} else if (tagConstant == OboFormatTag.TAG_DOMAIN || tagConstant == OboFormatTag.TAG_RANGE) {
String error = checkClassReference(c.getValue().toString(), tag, f.getId(), doc);
if (error != null) {
danglingReferences.add(error);
}
}
}
}
return danglingReferences;
}
@Nullable
private String checkRelation(String relId, String tag, String frameId, @Nonnull OBODoc doc) {
if (doc.getTypedefFrame(relId, followImport) == null) {
return "The relation '" + relId + "' reference in" + " the tag '" + tag + " ' in the frame of id '"
+ frameId + "' is not declared";
}
return null;
}
@Nullable
private String checkClassReference(String classId, String tag, String frameId, @Nonnull OBODoc doc) {
if (doc.getTermFrame(classId, followImport) == null) {
return "The class '" + classId + "' reference in" + " the tag '" + tag + " ' in the frame of id '" + frameId
+ "'is not declared";
}
return null;
}
/**
* @param h
* h
* @throws OBOFormatParserException
* parser exception
*/
public void parseHeaderFrame(@Nonnull Frame h) {
while (parseHeaderClauseNl(h)) {}
}
/**
* header-clause ::= format-version-TVP | ... | ...
*
* @param h
* header frame
* @return false if there are no more header clauses, other wise true
* @throws OBOFormatParserException
* parser exception
*/
protected boolean parseHeaderClauseNl(@Nonnull Frame h) {
parseZeroOrMoreWsOptCmtNl();
if (stream.peekCharIs('[') || stream.eof()) {
return false;
}
parseHeaderClause(h);
parseHiddenComment();
forceParseNlOrEof();
return true;
}
protected void parseHeaderClause(@Nonnull Frame h) {
String t = getParseTag();
Clause cl = new Clause(t);
OboFormatTag tag = OBOFormatConstants.getTag(t);
h.addClause(cl);
if (tag == OboFormatTag.TAG_DATA_VERSION) {
parseUnquotedString(cl);
} else if (tag == OboFormatTag.TAG_FORMAT_VERSION) {
parseUnquotedString(cl);
} else if (tag == OboFormatTag.TAG_SYNONYMTYPEDEF) {
parseSynonymTypedef(cl);
} else if (tag == OboFormatTag.TAG_SUBSETDEF) {
parseSubsetdef(cl);
} else if (tag == OboFormatTag.TAG_DATE) {
parseHeaderDate(cl);
} else if (tag == OboFormatTag.TAG_PROPERTY_VALUE) {
parsePropertyValue(cl);
parseZeroOrMoreWs();
parseQualifierBlock(cl);
parseHiddenComment();
} else if (tag == OboFormatTag.TAG_IMPORT) {
parseImport(cl);
} else if (tag == OboFormatTag.TAG_IDSPACE) {
parseIdSpace(cl);
} else {
parseUnquotedString(cl);
}
}
/**
* @param obodoc
* obodoc
* @throws OBOFormatParserException
* parser exception
*/
public void parseEntityFrame(@Nonnull OBODoc obodoc) {
parseZeroOrMoreWsOptCmtNl();
String rest = stream.rest();
if (rest != null && rest.startsWith("[Term]")) {
parseTermFrame(obodoc);
} else if (rest != null && rest.startsWith("[Instance]")) {
LOG.error("Error: Instance frames are not supported yet. Parsing stopped at line: " + stream.getLineNo());
while (!stream.eof()) {
stream.advanceLine();
}
} else {
parseTypedefFrame(obodoc);
}
}
// ----------------------------------------
// [Term] Frames
// ----------------------------------------
/**
* term-frame ::= nl* '[Term]' nl id-Tag Class-ID EOL { term-frame-clause
* EOL }.
*
* @param obodoc
* obodoc
* @throws OBOFormatParserException
* parser exception
*/
public void parseTermFrame(@Nonnull OBODoc obodoc) {
Frame f = new Frame(FrameType.TERM);
parseZeroOrMoreWsOptCmtNl();
if (stream.consume("[Term]")) {
forceParseNlOrEof();
parseIdLine(f);
parseZeroOrMoreWsOptCmtNl();
while (true) {
if (stream.eof() || stream.peekCharIs('[')) {
// reached end of file or new stanza
break;
}
parseTermFrameClauseEOL(f);
parseZeroOrMoreWsOptCmtNl();
}
try {
f.freeze();
obodoc.addFrame(f);
} catch (FrameMergeException e) {
throw new OBOFormatParserException("Could not add frame " + f
+ " to document, duplicate frame definition?", e, stream.lineNo, stream.line);
}
} else {
error("Expected a [Term] frame, but found unknown stanza type.");
}
}
/**
* @param f
* f
* @throws OBOFormatParserException
* parser exception
*/
protected void parseTermFrameClauseEOL(@Nonnull Frame f) {
// comment line:
if (stream.peekCharIs('!')) {
parseHiddenComment();
forceParseNlOrEof();
} else {
Clause cl = parseTermFrameClause();
parseEOL(cl);
f.addClause(cl);
}
}
/**
* @throws OBOFormatParserException
* parser exception
* @return parsed clause
*/
@Nonnull
public Clause parseTermFrameClause() {
String t = getParseTag();
Clause cl = new Clause(t);
if (parseDeprecatedSynonym(t, cl)) {
return cl;
}
OboFormatTag tag = OBOFormatConstants.getTag(t);
if (tag == null) {
error("Could not find tag for: " + t);
}
if (tag == OboFormatTag.TAG_IS_ANONYMOUS) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_NAME) {
parseUnquotedString(cl);
} else if (tag == OboFormatTag.TAG_NAMESPACE) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_ALT_ID) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_DEF) {
parseDef(cl);
} else if (tag == OboFormatTag.TAG_COMMENT) {
parseUnquotedString(cl);
} else if (tag == OboFormatTag.TAG_SUBSET) {
// in the obof1.4 spec, subsets may not contain spaces.
// unfortunately OE does not prohibit this, so subsets with spaces
// frequently escape. We should either allow spaces in the spec
// (with complicates parsing) or forbid them and reject all obo
// documents
// that do not conform. Unfortunately that would limit the utility
// of
// this parser, so for now we allow spaces. We may make it strict
// again
// when community is sufficiently forewarned.
// (alternatively we may add smarts to OE to translate the spaces to
// underscores,
// so it's a one-off translation)
//
// return parseIdRef(cl);
parseUnquotedString(cl);
} else if (tag == OboFormatTag.TAG_SYNONYM) {
parseSynonym(cl);
} else if (tag == OboFormatTag.TAG_XREF) {
parseDirectXref(cl);
} else if (tag == OboFormatTag.TAG_BUILTIN) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_PROPERTY_VALUE) {
parsePropertyValue(cl);
} else if (tag == OboFormatTag.TAG_IS_A) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_INTERSECTION_OF) {
parseTermIntersectionOf(cl);
} else if (tag == OboFormatTag.TAG_UNION_OF) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_EQUIVALENT_TO) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_DISJOINT_FROM) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_RELATIONSHIP) {
parseRelationship(cl);
} else if (tag == OboFormatTag.TAG_CREATED_BY) {
parsePerson(cl);
} else if (tag == OboFormatTag.TAG_CREATION_DATE) {
parseISODate(cl);
} else if (tag == OboFormatTag.TAG_IS_OBSELETE) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_REPLACED_BY) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_CONSIDER) {
parseIdRef(cl);
} else {
error("Unexpected tag " + tag + " in term frame.");
}
return cl;
}
// ----------------------------------------
// [Typedef] Frames
// ----------------------------------------
/**
* Typedef-frame ::= nl* '[Typedef]' nl id-Tag Class-ID EOL {
* Typedef-frame-clause EOL }.
*
* @param obodoc
* obodoc
* @throws OBOFormatParserException
* parser exception
*/
public void parseTypedefFrame(@Nonnull OBODoc obodoc) {
Frame f = new Frame(FrameType.TYPEDEF);
parseZeroOrMoreWsOptCmtNl();
if (stream.consume("[Typedef]")) {
forceParseNlOrEof();
parseIdLine(f);
parseZeroOrMoreWsOptCmtNl();
while (true) {
if (stream.eof() || stream.peekCharIs('[')) {
// reached end of file or new stanza
break;
}
parseTypedefFrameClauseEOL(f);
parseZeroOrMoreWsOptCmtNl();
}
try {
f.freeze();
obodoc.addFrame(f);
} catch (FrameMergeException e) {
throw new OBOFormatParserException("Could not add frame " + f
+ " to document, duplicate frame definition?", e, stream.lineNo, stream.line);
}
} else {
error("Expected a [Typedef] frame, but found unknown stanza type.");
}
}
/**
* @param f
* f
* @throws OBOFormatParserException
* parser exception
*/
protected void parseTypedefFrameClauseEOL(@Nonnull Frame f) {
// comment line:
if (stream.peekCharIs('!')) {
parseHiddenComment();
forceParseNlOrEof();
} else {
Clause cl = parseTypedefFrameClause();
parseEOL(cl);
f.addClause(cl);
}
}
/**
* @return parsed clause
* @throws OBOFormatParserException
* parser exception
*/
@Nonnull
public Clause parseTypedefFrameClause() {
String t = getParseTag();
if (t.equals("is_metadata")) {
LOG.info("is_metadata DEPRECATED; switching to is_metadata_tag");
t = OboFormatTag.TAG_IS_METADATA_TAG.getTag();
}
Clause cl = new Clause(t);
if (parseDeprecatedSynonym(t, cl)) {
return cl;
}
OboFormatTag tag = OBOFormatConstants.getTag(t);
if (tag == null) {
error("Could not find tag for: " + t);
}
if (tag == OboFormatTag.TAG_IS_ANONYMOUS) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_NAME) {
parseUnquotedString(cl);
} else if (tag == OboFormatTag.TAG_NAMESPACE) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_ALT_ID) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_DEF) {
parseDef(cl);
} else if (tag == OboFormatTag.TAG_COMMENT) {
parseUnquotedString(cl);
} else if (tag == OboFormatTag.TAG_SUBSET) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_SYNONYM) {
parseSynonym(cl);
} else if (tag == OboFormatTag.TAG_XREF) {
parseDirectXref(cl);
} else if (tag == OboFormatTag.TAG_PROPERTY_VALUE) {
parsePropertyValue(cl);
} else if (tag == OboFormatTag.TAG_DOMAIN) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_RANGE) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_BUILTIN) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_IS_ANTI_SYMMETRIC) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_IS_CYCLIC) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_IS_REFLEXIVE) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_IS_SYMMETRIC) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_IS_ASYMMETRIC) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_IS_TRANSITIVE) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_IS_FUNCTIONAL) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_IS_INVERSE_FUNCTIONAL) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_IS_A) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_INTERSECTION_OF) {
parseTypedefIntersectionOf(cl);
} else if (tag == OboFormatTag.TAG_UNION_OF) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_EQUIVALENT_TO) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_DISJOINT_FROM) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_INVERSE_OF) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_TRANSITIVE_OVER) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_HOLDS_OVER_CHAIN) {
parseIdRefPair(cl);
} else if (tag == OboFormatTag.TAG_EQUIVALENT_TO_CHAIN) {
parseIdRefPair(cl);
} else if (tag == OboFormatTag.TAG_DISJOINT_OVER) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_RELATIONSHIP) {
parseRelationship(cl);
} else if (tag == OboFormatTag.TAG_CREATED_BY) {
parsePerson(cl);
} else if (tag == OboFormatTag.TAG_CREATION_DATE) {
parseISODate(cl);
} else if (tag == OboFormatTag.TAG_IS_OBSELETE) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_REPLACED_BY) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_CONSIDER) {
parseIdRef(cl);
} else if (tag == OboFormatTag.TAG_IS_METADATA_TAG) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_IS_CLASS_LEVEL_TAG) {
parseBoolean(cl);
} else if (tag == OboFormatTag.TAG_EXPAND_ASSERTION_TO) {
parseOwlDef(cl);
} else if (tag == OboFormatTag.TAG_EXPAND_EXPRESSION_TO) {
parseOwlDef(cl);
} else {
error("Unexpected tag " + tag + " in type def frame.");
}
return cl;
}
// ----------------------------------------
// [Instance] Frames - TODO
// ----------------------------------------
// ----------------------------------------
// TVP
// ----------------------------------------
@SuppressWarnings("null")
@Nonnull
private String getParseTag() {
if (stream.eof()) {
error("Expected an id tag, not end of file.");
}
if (stream.eol()) {
error("Expected an id tag, not end of line");
}
int i = stream.indexOf(':');
if (i == -1) {
error("Could not find tag separator ':' in line.");
}
String tag = stream.rest().substring(0, i);
stream.advance(i + 1);
parseWs();
parseZeroOrMoreWs();
// Memory optimization
// re-use the tag string
OboFormatTag formatTag = OBOFormatConstants.getTag(tag);
if (formatTag != null) {
tag = formatTag.getTag();
}
return mapDeprecatedTag(tag);
}
private void parseIdRef(@Nonnull Clause cl) {
parseIdRef(cl, false);
}
private void parseIdRef(@Nonnull Clause cl, boolean optional) {
String id = getParseUntil(" !{");
if (!optional && id.length() < 1) {
error("");
}
cl.addValue(id);
}
private void parseIdRefPair(@Nonnull Clause cl) {
parseIdRef(cl);
parseOneOrMoreWs();
parseIdRef(cl);
}
private void parsePerson(@Nonnull Clause cl) {
parseUnquotedString(cl);
}
private boolean parseISODate(@Nonnull Clause cl) {
String dateStr = getParseUntil(" !{");
// Date date;
// try {
// date = isoDateFormat.parse(dateStr);
cl.setValue(dateStr);
return true;
/*
* } catch (ParseException e) { // TODO Auto-generated catch block
* e.printStackTrace(); return false; }
*/
}
private void parseSubsetdef(@Nonnull Clause cl) {
parseIdRef(cl);
parseOneOrMoreWs();
if (stream.consume("\"")) {
String desc = getParseUntilAdv("\"");
cl.addValue(desc);
} else {
error("");
}
parseZeroOrMoreWs();
parseQualifierBlock(cl);
parseHiddenComment();
}
private void parseSynonymTypedef(@Nonnull Clause cl) {
parseIdRef(cl);
parseOneOrMoreWs();
if (stream.consume("\"")) {
String desc = getParseUntilAdv("\"");
cl.addValue(desc);
// TODO: handle edge case where line ends with trailing whitespace
// and no scope
if (stream.peekCharIs(' ')) {
parseOneOrMoreWs();
parseIdRef(cl, true);// TODO - verify that this is a valid
// scope
}
}
parseZeroOrMoreWs();
parseQualifierBlock(cl);
parseHiddenComment();
}
private void parseHeaderDate(@Nonnull Clause cl) {
parseZeroOrMoreWs();
String v = getParseUntil("!");
v = removeTrailingWS(v);
try {
Date date = OBOFormatConstants.headerDateFormat().parse(v);
cl.addValue(date);
} catch (ParseException e) {
throw new OBOFormatParserException("Could not parse date from string: " + v, e, stream.lineNo, stream.line);
}
}
private boolean parseImport(@Nonnull Clause cl) {
parseZeroOrMoreWs();
String v = getParseUntil("!{");
v = removeTrailingWS(v);
cl.setValue(v);
// parse and ignore annotations for import statements
parseZeroOrMoreWs();
if (stream.peekCharIs('{')) {
// do noy parse trailing qualifiers.
getParseUntilAdv("}");
}
parseHiddenComment();// ignore return value, as comments are optional
return true;
}
private void parseIdSpace(@Nonnull Clause cl) {
parseZeroOrMoreWs();
parseIdRefPair(cl);
parseZeroOrMoreWs();
if (stream.peekCharIs('"')) {
stream.consume("\"");
String desc = getParseUntilAdv("\"");
cl.addValue(desc);
} else {
String desc = getParseUntil(" !{");
cl.addValue(desc);
}
parseZeroOrMoreWs();
parseQualifierBlock(cl);
parseHiddenComment();
}
private void parseRelationship(@Nonnull Clause cl) {
parseIdRef(cl);
parseOneOrMoreWs();
parseIdRef(cl);
}
private void parsePropertyValue(@Nonnull Clause cl) {
// parse a pair or triple
// the first and second value, may be quoted strings
if (stream.peekCharIs('\"')) {
stream.consume("\"");
String desc = getParseUntilAdv("\"");
cl.addValue(desc);
} else {
parseIdRef(cl);
}
parseOneOrMoreWs();
if (stream.peekCharIs('\"')) {
stream.consume("\"");
String desc = getParseUntilAdv("\"");
cl.addValue(desc);
} else {
parseIdRef(cl);
}
// check if there is a third value to parse
parseZeroOrMoreWs();
String s = getParseUntil(" !{");
if (!s.isEmpty()) {
cl.addValue(s);
}
}
/**
* intersection_of-Tag Class-ID | intersection_of-Tag Relation-ID Class-ID.
*
* @param cl
* cl
* @throws OBOFormatParserException
* parser exception
*/
private void parseTermIntersectionOf(@Nonnull Clause cl) {
parseIdRef(cl);
// consumed the first ID
parseZeroOrMoreWs();
if (!stream.eol()) {
char c = stream.peekChar();
if (c != '!' && c != '{') {
// try to consume the second id
parseIdRef(cl, true);
}
}
}
private void parseTypedefIntersectionOf(@Nonnull Clause cl) {
// single values only
parseIdRef(cl);
}
// ----------------------------------------
// Synonyms
// ----------------------------------------
private boolean parseDeprecatedSynonym(@Nonnull String tag, @Nonnull Clause cl) {
String scope = null;
if (tag.equals("exact_synonym")) {
scope = OboFormatTag.TAG_EXACT.getTag();
} else if (tag.equals("narrow_synonym")) {
scope = OboFormatTag.TAG_NARROW.getTag();
} else if (tag.equals("broad_synonym")) {
scope = OboFormatTag.TAG_BROAD.getTag();
} else if (tag.equals("related_synonym")) {
scope = OboFormatTag.TAG_RELATED.getTag();
} else {
return false;
}
cl.setTag(OboFormatTag.TAG_SYNONYM.getTag());
if (stream.consume("\"")) {
String syn = getParseUntilAdv("\"");
cl.setValue(syn);
cl.addValue(scope);
parseZeroOrMoreWs();
parseXrefList(cl, false);
return true;
}
return false;
}
private void parseSynonym(@Nonnull Clause cl) {
if (stream.consume("\"")) {
String syn = getParseUntilAdv("\"");
cl.setValue(syn);
parseZeroOrMoreWs();
if (!stream.peekCharIs('[')) {
parseIdRef(cl, true);
parseZeroOrMoreWs();
if (!stream.peekCharIs('[')) {
parseIdRef(cl, true);
parseZeroOrMoreWs();
}
}
parseXrefList(cl, false);
} else {
error("The synonym is always a quoted string.");
}
}
// ----------------------------------------
// Definitions
// ----------------------------------------
private void parseDef(@Nonnull Clause cl) {
if (stream.consume("\"")) {
String def = getParseUntilAdv("\"");
cl.setValue(def);
parseZeroOrMoreWs();
parseXrefList(cl, true);
} else {
error("Definitions should always be a quoted string.");
}
}
private void parseOwlDef(@Nonnull Clause cl) {
if (stream.consume("\"")) {
String def = getParseUntilAdv("\"");
cl.setValue(def);
parseZeroOrMoreWs();
parseXrefList(cl, true);
} else {
error("The " + cl.getTag() + " clause is always a quoted string.");
}
}
// ----------------------------------------
// XrefLists - e.g. [A:1, B:2, ... ]
// ----------------------------------------
private void parseXrefList(@Nonnull Clause cl, boolean optional) {
if (stream.consume("[")) {
parseZeroOrMoreXrefs(cl);
parseZeroOrMoreWs();
if (!stream.consume("]")) {
error("Missing closing ']' for xref list at pos: " + stream.pos);
}
} else if (!optional) {
error("Clause: " + cl.getTag() + "; expected an xref list, or at least an empty list '[]' at pos: "
+ stream.pos);
}
}
private boolean parseZeroOrMoreXrefs(@Nonnull Clause cl) {
if (parseXref(cl)) {
while (stream.consume(",") && parseXref(cl)) {}
}
return true;
}
// an xref that supports a value of values in a clause
private boolean parseXref(@Nonnull Clause cl) {
parseZeroOrMoreWs();
String id = getParseUntil("\",]!{", true);
if (!id.isEmpty()) {
id = removeTrailingWS(id);
if (id.contains(" ")) {
warn("accepting bad xref with spaces:" + id);
}
Xref xref = new Xref(id);
cl.addXref(xref);
parseZeroOrMoreWs();
if (stream.peekCharIs('"')) {
stream.consume("\"");
xref.setAnnotation(getParseUntilAdv("\""));
}
return true;
}
return false;
}
// an xref that is a direct value of a clause
private boolean parseDirectXref(@Nonnull Clause cl) {
parseZeroOrMoreWs();
String id = getParseUntil("\",]!{", true);
id = id.trim();
if (id.contains(" ")) {
warn("accepting bad xref with spaces:<" + id + '>');
}
id = id.replaceAll(" +\\Z", "");
Xref xref = new Xref(id);
cl.addValue(xref);
parseZeroOrMoreWs();
if (stream.peekCharIs('"')) {
stream.consume("\"");
xref.setAnnotation(getParseUntilAdv("\""));
}
return true;
}
// ----------------------------------------
// Qualifier Value blocks - e.g. {a="1",b="foo", ...}
// ----------------------------------------
private void parseQualifierBlock(@Nonnull Clause cl) {
if (stream.consume("{")) {
parseZeroOrMoreQuals(cl);
parseZeroOrMoreWs();
boolean success = stream.consume("}");
if (!success) {
error("Missing closing '}' for trailing qualifier block.");
}
}
}
private void parseZeroOrMoreQuals(@Nonnull Clause cl) {
if (parseQual(cl)) {
while (stream.consume(",") && parseQual(cl)) {}
}
}
private boolean parseQual(@Nonnull Clause cl) {
parseZeroOrMoreWs();
String rest = stream.rest();
assert rest != null;
if (!rest.contains("=")) {
error(
"Missing '=' in trailing qualifier block. This might happen for not properly escaped '{', '}' chars in comments.");
}
String q = getParseUntilAdv("=");
parseZeroOrMoreWs();
String v;
if (stream.consume("\"")) {
v = getParseUntilAdv("\"");
} else {
v = getParseUntil(" ,}");
warn("qualifier values should be enclosed in quotes. You have: " + q + '=' + stream.rest());
}
if (v.isEmpty()) {
warn("Empty value for qualifier in trailing qualifier block.");
v = "";
}
QualifierValue qv = new QualifierValue(q, v);
cl.addQualifierValue(qv);
parseZeroOrMoreWs();
return true;
}
// ----------------------------------------
// Other
// ----------------------------------------
private void parseBoolean(@Nonnull Clause cl) {
if (stream.consume("true")) {
cl.setValue(true);
} else if (stream.consume("false")) {
cl.setValue(false);
} else {
error("Could not parse boolean value.");
}
}
protected void parseIdLine(@Nonnull Frame f) {
String t = getParseTag();
OboFormatTag tag = OBOFormatConstants.getTag(t);
if (tag != OboFormatTag.TAG_ID) {
error("Expected id tag as first line in frame, but was: " + tag);
}
Clause cl = new Clause(t);
f.addClause(cl);
String id = getParseUntil(" !{");
if (id.isEmpty()) {
error("Could not find an valid id, id is empty.");
}
cl.addValue(id);
f.setId(id);
parseEOL(cl);
}
// ----------------------------------------
// End-of-line matter
// ----------------------------------------
/**
* @param cl
* cl
* @throws OBOFormatParserException
* parser exception
*/
public void parseEOL(@Nonnull Clause cl) {
parseZeroOrMoreWs();
parseQualifierBlock(cl);
parseHiddenComment();
forceParseNlOrEof();
}
private void parseHiddenComment() {
parseZeroOrMoreWs();
if (stream.peekCharIs('!')) {
stream.forceEol();
}
}
protected void parseUnquotedString(@Nonnull Clause cl) {
parseZeroOrMoreWs();
String v = getParseUntil("!{");
// strip whitespace from the end - TODO
v = removeTrailingWS(v);
cl.setValue(v);
if (stream.peekCharIs('{')) {
parseQualifierBlock(cl);
}
parseHiddenComment();
}
// Newlines, whitespace
protected void forceParseNlOrEof() {
parseZeroOrMoreWs();
if (stream.eol()) {
stream.advanceLine();
return;
}
if (stream.eof()) {
return;
}
error("expected newline or end of line but found: " + stream.rest());
}
protected void parseZeroOrMoreWsOptCmtNl() {
while (true) {
parseZeroOrMoreWs();
parseHiddenComment();
if (stream.eol()) {
stream.advanceLine();
} else {
return;
}
}
}
// non-newline
protected void parseWs() {
if (stream.eol()) {
error("Expected at least one white space, but found end of line at pos: " + stream.pos);
}
if (stream.eof()) {
error("Expected at least one white space, but found end of file.");
}
if (stream.peekChar() == ' ') {
stream.advance(1);
} else {
warn("Expected white space at pos: " + stream.pos);
}
}
protected void parseOneOrMoreWs() {
if (stream.eol() || stream.eof()) {
error("Expected at least one white space at pos: " + stream.pos);
}
int n = 0;
while (stream.peekCharIs(' ')) {
stream.advance(1);
n++;
}
if (n == 0) {
error("Expected at least one white space at pos: " + stream.pos);
}
}
protected void parseZeroOrMoreWs() {
if (!stream.eol() && !stream.eof()) {
while (stream.peekCharIs(' ')) {
stream.advance(1);
}
}
}
@Nonnull
private String getParseUntilAdv(@Nonnull String compl) {
String ret = getParseUntil(compl);
stream.advance(1);
return ret;
}
@Nonnull
private String getParseUntil(@Nonnull String compl) {
return getParseUntil(compl, false);
}
@Nonnull
private String getParseUntil(@Nonnull String compl, boolean commaWhitespace) {
String r = stream.rest();
assert r != null;
int i = 0;
boolean hasEscapedChars = false;
while (i < r.length()) {
if (r.charAt(i) == '\\') {
hasEscapedChars = true;
i += 2;// Escape
continue;
}
if (compl.contains(r.subSequence(i, i + 1))) {
if (commaWhitespace && r.charAt(i) == ',') {
// a comma is only a valid separator with a following
// whitespace
// see bug and specification update
// http://code.google.com/p/oboformat/issues/detail?id=54
if (i + 1 < r.length() && r.charAt(i + 1) == ' ') {
break;
}
} else {
break;
}
}
i++;
}
if (i == 0) {
return "";
}
String ret = r.substring(0, i);
if (hasEscapedChars) {
StringBuilder sb = new StringBuilder();
for (int j = 0; j < ret.length(); j++) {
char c = ret.charAt(j);
if (c == '\\') {
int next = j + 1;
if (next < ret.length()) {
char nextChar = ret.charAt(next);
switch (nextChar) {
case 'n':// newline
sb.append('\n');
break;
case 'W':// single space
sb.append(' ');
break;
case 't':// tab
sb.append('\n');
break;
default:
// assume that any char after a backlash is an
// escaped char.
// spec for this optional behavior
// http://www.geneontology.org/GO.format.obo-1_2.shtml#S.1.5
sb.append(nextChar);
break;
}
j += 1;// skip the next char
}
} else {
sb.append(c);
}
}
ret = sb.toString();
}
stream.advance(i);
String cachedValue = stringCache.getUnchecked(ret);
if (LOG.isTraceEnabled()) {
if (ret != cachedValue) {
LOG.trace("Cache hit for {}", cachedValue);
}
}
return cachedValue;
}
@Nonnull
private static String mapDeprecatedTag(@Nonnull String tag) {
if (tag.equals("inverse_of_on_instance_level")) {
return OboFormatTag.TAG_INVERSE_OF.getTag();
}
if (tag.equals("xref_analog")) {
return OboFormatTag.TAG_XREF.getTag();
}
if (tag.equals("xref_unknown")) {
return OboFormatTag.TAG_XREF.getTag();
}
if (tag.equals("instance_level_is_transitive")) {
return OboFormatTag.TAG_IS_TRANSITIVE.getTag();
}
return tag;
}
private static String removeTrailingWS(@Nonnull String s) {
// TODO make this more efficient
return s.replaceAll("\\s*$", "");
}
private void error(String message) {
throw new OBOFormatParserException(message, stream.lineNo, stream.line);
}
private void warn(String message) {
LOG.warn("LINE: {} {} LINE:\n{}", stream.lineNo, message, stream.line);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy