
org.biopax.ols.impl.BaseOBO2AbstractLoader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of obo-fetcher Show documentation
Show all versions of obo-fetcher Show documentation
OBO ontology access and handling (derived from EBI PSIDEV Ontology Manager and OLS)
package org.biopax.ols.impl;
/*
*
*/
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.biopax.ols.Annotation;
import org.biopax.ols.Constants;
import org.biopax.ols.DbXref;
import org.biopax.ols.Loader;
import org.biopax.ols.Ontology;
import org.biopax.ols.Parser;
import org.biopax.ols.Term;
import org.biopax.ols.TermPath;
import org.biopax.ols.TermRelationship;
import org.biopax.ols.TermSynonym;
import org.obo.datamodel.*;
import java.io.IOException;
import java.util.*;
/**
* Base class for all loaders. This class will perform OBO-to-OLS object conversion, creating all relevant
* objects. Three methods are listed as abstract: configure, parse and printusage, which depend on the OBO
* format being used (GOFF or OBO).
*
* @author Richard Cote
*/
public abstract class BaseOBO2AbstractLoader implements Loader {
protected static Log logger = LogFactory.getLog(BaseOBO2AbstractLoader.class);
protected HashMap ontologyTerms = new HashMap(5000);
protected String ONTOLOGY_DEFINITION;
protected String FULL_NAME;
protected String SHORT_NAME;
protected String QUERY_URL = null;
protected String SOURCE_URL = null;
protected OntologyBean ontBean = null;
//common objects
protected TermBean IS_A = null;
protected TermBean PART_OF = null;
protected TermBean DEVELOPS_FROM = null;
protected TermBean ALT_ID = null;
protected TermBean EXACT = null;
protected TermBean NARROW = null;
protected TermBean BROAD = null;
protected TermBean RELATED = null;
protected TermBean SYNONYM = null;
protected final HashSet PART_OF_SET = new HashSet();
protected final HashSet IS_A_SET = new HashSet();
protected final HashSet DEV_FROM_SET = new HashSet();
private HashMap unknown_relations = new HashMap();
private TreeSet rootTerms = new TreeSet();
protected Parser parser;
//for PSI-MOD
private Set MOD_NUMERIC_ANNOTATIONS = null;
private Set MOD_STRING_ANNOTATIONS = null;
/**
* batch mode will optimize inserts. set to false if having wonky errors or rdbms does not support it.
*/
protected static final boolean BATCH_MODE = true;
protected void setParser(Parser parser) {
this.parser = parser;
}
/**
* determine which root detection algorithm will be used
*/
protected boolean useGreedy = false;
protected void setUseGreedy(boolean useGreedy) {
this.useGreedy = useGreedy;
}
/**
* will return a collection of TermBean accesssion strings that correspond to the root term
*
* @throws IllegalStateException if the parser has not been initialized
*/
protected Collection getRootTerms() {
HashSet retval = new HashSet();
if (parser != null) {
Set roots = parser.getRootTerms(useGreedy);
if (roots != null) {
for (OBOObject root : roots) {
retval.add(safeTrim(root.getID()));
}
}
} else {
throw new IllegalStateException("Parser has not been initialized. Did you run configure()?");
}
return retval;
}
/**
* Will iterate through all parsed terms and generate memory structure suitable for loading to db
*
* @throws IllegalStateException if the parser has not been initialized
*/
protected void process() {
//returns unmodifiable set, so need to create a modifiable one
Set terms = new HashSet();
//sanity check
if (parser == null) {
throw new IllegalStateException("parser has not been initialized. Did you run configure()?");
}
//tmp collection to store terms
Collection toAdd;
//add all terms - this will include obsolete and roots
//sanity check to avoid NPE
toAdd = parser.getTerms();
if (toAdd != null) {
terms.addAll(toAdd);
}
//create common objects needed to build references and synonyms
initializeCommonObjects();
//process all terms
logger.info("Total Terms to process: " + terms.size());
int count = 0;
for (Object obj : terms) {
count++;
if (obj instanceof OBOObject) {
processTerm((OBOObject) obj);
if (count % 1000 == 0) {
logger.debug("Terms Processed: " + count);
}
} else {
logger.info("Ignored object: " + obj.toString());
}
}
logger.info("Term processing done");
//need to iterate again to build relationships
logger.info("Creating relationships");
count = 0;
for (Object obj : terms) {
count++;
if (obj instanceof OBOObject) {
processTermRelationships((OBOObject) obj);
if (count % 1000 == 0) {
logger.debug("Relationships Processed: " + count);
}
} else {
logger.info("Ignored object: " + obj.toString());
}
}
logger.info("Relationship processing done");
//add to OntologyAccess
ontBean.setTerms(ontologyTerms.values());
}
/**
* inetrnal helper method to initialize and reset shared objects
*/
protected void initializeCommonObjects() {
//no need to check if parser is not null because
//of previous sanity check at the start of process()
//this is not true in the cast of the NEWT loader!!!
String version;
if (parser != null) {
version = parser.getSession().getCurrentHistory().getVersion();
//if version is not set, set it to file date
if (version == null) {
Date tmp = parser.getSession().getCurrentHistory().getDate();
//if date is not set, set it to current date
if (tmp != null)
version = tmp.toString();
else
version = (new Date()).toString();
}
} else {
version = (new Date()).toString();
}
//create ontology
ontBean = new OntologyBean();
if (ONTOLOGY_DEFINITION != null && ONTOLOGY_DEFINITION.length() > 2000) {
logger.warn("ontology definition longer than allowed database column length - truncating");
ONTOLOGY_DEFINITION = ONTOLOGY_DEFINITION.substring(0, 2000);
}
ontBean.setDefinition(ONTOLOGY_DEFINITION);
if (FULL_NAME != null && FULL_NAME.length() > 128) {
logger.warn("ontology full name longer than allowed database column length - truncating");
FULL_NAME = FULL_NAME.substring(0, 128);
}
ontBean.setFullOntologyName(FULL_NAME);
ontBean.setLoadDate(new java.sql.Date(GregorianCalendar.getInstance().getTime().getTime()));
if (SHORT_NAME != null && SHORT_NAME.length() > 32) {
logger.warn("ontology short name longer than allowed database column length - truncating");
SHORT_NAME = SHORT_NAME.substring(0, 32);
}
ontBean.setShortOntologyName(SHORT_NAME);
if (version != null && version.length() > 128) {
logger.warn("ontology version longer than allowed database column length - truncating");
version = version.substring(0, 128);
}
ontBean.setVersion(version);
ontBean.setFullyLoaded(false);
if (QUERY_URL != null && QUERY_URL.length() > 255) {
logger.warn("ontology query url longer than allowed database column length - truncating");
QUERY_URL = QUERY_URL.substring(0, 255);
}
ontBean.setQueryURL(QUERY_URL);
if (SOURCE_URL != null && SOURCE_URL.length() > 255) {
logger.warn("ontology source url longer than allowed database column length - truncating");
SOURCE_URL = SOURCE_URL.substring(0, 255);
}
ontBean.setSourceURL(SOURCE_URL);
//make certain there's no dirty data (esp if we're using it to load multiple ontologies)
ontologyTerms.clear();
//create mapping sets
IS_A_SET.clear();
IS_A_SET.add(Constants.IS_A_RELATION_TYPE);
IS_A_SET.add(Constants.IS_A_RELATION_TYPE.toUpperCase());
//other mappings seen
IS_A_SET.add("isa");
IS_A_SET.add("ISA");
IS_A_SET.add("OBO_REL:is_a");
PART_OF_SET.clear();
PART_OF_SET.add(Constants.PART_OF_RELATION_TYPE);
PART_OF_SET.add(Constants.PART_OF_RELATION_TYPE.toUpperCase());
//other mappings seen
PART_OF_SET.add("partof");
PART_OF_SET.add("PARTOF");
PART_OF_SET.add("OBO_REL:part_of");
PART_OF_SET.add("is_part_of");
DEV_FROM_SET.clear();
DEV_FROM_SET.add(Constants.DEVELOPS_FROM_RELATION_TYPE);
DEV_FROM_SET.add(Constants.DEVELOPS_FROM_RELATION_TYPE.toUpperCase());
//other mappings seen
DEV_FROM_SET.add("DERIVED/DEVELOPS_FROM");
//set PSI-MOD specific xrefs that will be converted to annotations
MOD_NUMERIC_ANNOTATIONS = new HashSet();
MOD_NUMERIC_ANNOTATIONS.add("DiffAvg");
MOD_NUMERIC_ANNOTATIONS.add("DiffMono");
MOD_NUMERIC_ANNOTATIONS.add("MassAvg");
MOD_NUMERIC_ANNOTATIONS.add("MassMono");
MOD_STRING_ANNOTATIONS = new HashSet();
MOD_STRING_ANNOTATIONS.add("DiffFormula");
MOD_STRING_ANNOTATIONS.add("Formula");
MOD_STRING_ANNOTATIONS.add("Source");
MOD_STRING_ANNOTATIONS.add("Origin");
MOD_STRING_ANNOTATIONS.add("TermSpec");
//create relations
IS_A = initializeTermBean(Constants.IS_A_RELATION_TYPE, Loader.RELATION_TYPE);
ontologyTerms.put(IS_A.getIdentifier(), IS_A);
PART_OF = initializeTermBean(Constants.PART_OF_RELATION_TYPE, Loader.RELATION_TYPE);
ontologyTerms.put(PART_OF.getIdentifier(), PART_OF);
DEVELOPS_FROM = initializeTermBean(Constants.DEVELOPS_FROM_RELATION_TYPE, Loader.RELATION_TYPE);
ontologyTerms.put(DEVELOPS_FROM.getIdentifier(), DEVELOPS_FROM);
//create synonyms
ALT_ID = initializeTermBean(Constants.ALT_ID_SYNONYM_TYPE, Loader.SYNONYM_TYPE);
ontologyTerms.put(ALT_ID.getIdentifier(), ALT_ID);
EXACT = initializeTermBean(Constants.EXACT_SYNONYM_TYPE, Loader.SYNONYM_TYPE);
ontologyTerms.put(EXACT.getIdentifier(), EXACT);
NARROW = initializeTermBean(Constants.NARROW_SYNONYM_TYPE, Loader.SYNONYM_TYPE);
ontologyTerms.put(NARROW.getIdentifier(), NARROW);
BROAD = initializeTermBean(Constants.BROAD_SYNONYM_TYPE, Loader.SYNONYM_TYPE);
ontologyTerms.put(BROAD.getIdentifier(), BROAD);
RELATED = initializeTermBean(Constants.RELATED_SYNONYM_TYPE, Loader.SYNONYM_TYPE);
ontologyTerms.put(RELATED.getIdentifier(), RELATED);
SYNONYM = initializeTermBean(Constants.DEFAULT_SYNONYM_TYPE, Loader.SYNONYM_TYPE);
ontologyTerms.put(SYNONYM.getIdentifier(), SYNONYM);
//initialize synonymTypeDefs
if (parser != null) {
Collection synonymTypes = parser.getSession().getSynonymTypes();
if (synonymTypes != null && !synonymTypes.isEmpty()) {
for (SynonymType st : synonymTypes) {
ontologyTerms.put(st.getID(), initializeTermBean(st.getName(), SHORT_NAME + ":" + st.getID(), getSynonymTypeDef(st.getScope())));
}
}
}
// //get rid of stale data
// instances.clear();
//get rid of stale data and get root terms
rootTerms.clear();
if (parser != null) {
rootTerms.addAll(getRootTerms());
}
}
/**
* helper method to create a TermBean given a name and a definition. This is mostly for terms
* associated with an ontology but not defined by it (eg relations types, synonym types, etc)
*
* @param name - this value will be used to set the term.name and term.identifier
* @param def - the term definition.
* @return a valid TermBean object
* @throws IllegalArgumentException if the term name is null.
*/
private TermBean initializeTermBean(String name, String def) {
if (name != null && !"".equals(name.trim())) {
TermBean bean = new TermBean();
bean.setName(name.trim());
bean.setIdentifier(SHORT_NAME + ":" + name.toUpperCase().trim());
bean.setDefinition(safeTrim(def));
bean.setParentOntology(ontBean);
//must set PK here because OJB will now not set it automatically
//PK will be term_ac+ont+fully_loaded_false
bean.setTermPk(bean.getIdentifier() + SHORT_NAME + "0");
return bean;
} else {
throw new IllegalArgumentException("Can't have a non-null term name!");
}
}
/**
* helper method to create a TermBean given a name, an accession and a definition.
*
* @param name - this value will be used to set the term.name
* @param accession - this value will be used to set the term.identifier
* @param def - the term definition.
* @return a valid TermBean object
* @throws IllegalArgumentException if the term name or accession is null.
*/
protected TermBean initializeTermBean(String name, String accession, String def) {
if (accession != null && !"".equals(accession.trim())) {
TermBean bean = initializeTermBean(name, def);
bean.setIdentifier(accession.trim());
bean.setTermPk(bean.getIdentifier() + SHORT_NAME + "0");
return bean;
} else {
throw new IllegalArgumentException("Can't have a non-null term name!");
}
}
/**
* This method will convert an OBOEdit model term into a valid TermBean, while creating synonyms,
* xrefs and annotations. The valid TermBean generated will be added to a global HashMap that will
* be used at a later stage.
*
* @param obj being an OBOObject object obtained from the parser
*/
protected void processTerm(OBOObject obj) {
if (obj.getID().startsWith("obo:")) {
/*
obo:datatype
obo:property
obo:class
*/
logger.debug("bogus term: " + obj.getID());
return;
}
TermBean trm = new TermBean();
//must set PK here because OJB will now not set it automatically
//PK will be term_ac+ont+fully_loaded_false
trm.setTermPk(safeTrim(obj.getID()) + SHORT_NAME + "0");
if (trm.getTermPk().length() > 255) {
throw new IllegalStateException("term PK longer than allowed database column length: " + trm.getTermPk());
}
//trim definition
trm.setDefinition(safeTrim(obj.getDefinition()));
if (trm.getDefinition() != null && trm.getDefinition().length() > 4000) {
logger.warn("term definition longer than allowed database column length - truncating" + trm.getIdentifier());
trm.setDefinition(trm.getDefinition().substring(0, 4000));
}
//trim ID
trm.setIdentifier(safeTrim(obj.getID()));
if (trm.getIdentifier() != null && trm.getIdentifier().length() > 255) {
logger.warn("term identifier longer than allowed database column length - truncating" + trm.getIdentifier());
trm.setIdentifier(trm.getIdentifier().substring(0, 255));
}
//set as root term if required
if (rootTerms.contains(safeTrim(obj.getID()))) {
trm.setRootTerm(true);
logger.info(obj.getID() + " is a root term");
}
//trim name
trm.setName(safeTrim(obj.getName()));
//trim namespace
if (trm.getName() != null && trm.getName().length() > 2000) {
logger.warn("term name longer than allowed database column length - truncating" + trm.getIdentifier());
trm.setName(trm.getName().substring(0, 2000));
}
Namespace nspace = obj.getNamespace();
if (nspace != null) {
trm.setNamespace(safeTrim(nspace.getID()));
if (trm.getNamespace() != null && trm.getNamespace().length() > 255) {
logger.warn("term namespace longer than allowed database column length - truncating" + trm.getIdentifier());
trm.setNamespace(trm.getNamespace().substring(0, 255));
}
}
//set if obsolete
trm.setObsolete(obj.isObsolete());
//set parent ontology
trm.setParentOntology(ontBean);
//process synonyms
trm.setSynonyms(processSynonyms(obj, trm));
//process xrefs
trm.setXrefs(processXrefs(obj, trm));
//process annotations
trm.setAnnotations(processAnnotations(obj, trm));
//set number of children
int nbChild = 0;
if (obj.getChildren() != null) {
nbChild = obj.getChildren().size();
}
//set leaf status
if (nbChild > 0) {
trm.setLeaf(false);
} else {
trm.setLeaf(true);
}
// if (obj instanceof Instance) {
// trm.setInstance(true);
// //store type (eg objID is_instance_of typeID
// instances.put(obj.getType().getID(), obj.getID());
// } else {
// trm.setInstance(false);
// }
//add to global storage
ontologyTerms.put(trm.getIdentifier(), trm);
}
/**
* internal method to create AnnotationBeans objects from values extracted from an OBOEdit term object
* and properly setup associations to the parent OLS term objet. Annotations can include comments and
* replacement term ids for obsolete or misused terms, as well as defined subsets.
*
* @param obj - the OBOEdit object to extract information from
* @param trm - the parent term to link the annotations to
* @return a collection of properly created AnnotationBeans
*/
private Collection processAnnotations(OBOObject obj, TermBean trm) {
ArrayList retval = new ArrayList();
String comment = safeTrim(obj.getComment());
if (comment != null) {
AnnotationBean annot = new AnnotationBean();
annot.setAnnotationStringValue(comment);
annot.setAnnotationType(Annotation.OBO_COMMENT);
if (comment != null && comment.length() > 2000) {
logger.warn("annotation comment longer than allowed database column length - truncating " + trm.getIdentifier());
annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000));
}
annot.setParentTerm(trm);
retval.add(annot);
}
Set considers = obj.getConsiderReplacements();
for (ObsoletableObject obsolete : considers) {
AnnotationBean annot = new AnnotationBean();
annot.setAnnotationType(Annotation.OBO_CONSIDER_REPLACEMENT);
String val = obsolete.getID();
if (obsolete.getName() != null) {
val += ": " + obsolete.getName();
}
annot.setAnnotationStringValue(val);
if (val != null && val.length() > 2000) {
logger.warn("annotation value longer than allowed database column length - truncating " + trm.getIdentifier());
annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000));
}
annot.setParentTerm(trm);
retval.add(annot);
}
Set replacers = obj.getReplacedBy();
for (ObsoletableObject replacedby : replacers) {
AnnotationBean annot = new AnnotationBean();
annot.setAnnotationType(Annotation.OBO_REPLACED_BY);
String val = replacedby.getID();
if (replacedby.getName() != null) {
val += ": " + replacedby.getName();
}
annot.setAnnotationStringValue(val);
if (val != null && val.length() > 2000) {
logger.warn("annotation value longer than allowed database column length - truncating " + trm.getIdentifier());
annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000));
}
annot.setParentTerm(trm);
retval.add(annot);
}
Set subsets = obj.getSubsets();
for (TermSubset subset : subsets) {
AnnotationBean annot = new AnnotationBean();
annot.setAnnotationType(Annotation.SUBSET + "_" + subset.getName());
String val = subset.getDesc();
annot.setAnnotationStringValue(val);
if (val != null && val.length() > 2000) {
logger.warn("annotation value longer than allowed database column length - truncating " + trm.getIdentifier());
annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000));
}
annot.setParentTerm(trm);
retval.add(annot);
}
Set propVal = obj.getPropertyValues();
for (PropertyValue pv : propVal) {
AnnotationBean annot = new AnnotationBean();
//property_value: EFO:definition_editor "James Malone" xsd:string
//parses to property = property_value
// value = EFO:definition_editor "James Malone" xsd:string
//so manually process the value to something more informative
// property = EFO:definition_editor
// value = "James Malone"
try {
if (pv.getValue() == null) {
//invalidly constucted property_value!
logger.warn("Error parsing property_value - Ignoring null value: " + pv.toString());
continue;
}
int ndx;
//parse property type
String tmpStr = pv.getValue();
ndx = tmpStr.indexOf(' ');
if (ndx > 0) {
tmpStr = tmpStr.substring(0, ndx).trim();
if (tmpStr.endsWith(":")) {
tmpStr = tmpStr.substring(0, tmpStr.length() - 1);
}
logger.debug("Setting property type: " + tmpStr);
annot.setAnnotationType(tmpStr.trim());
if (annot.getAnnotationType() != null && annot.getAnnotationType().length() > 2000) {
logger.warn("annotation type longer than allowed database column length - truncating " + trm.getIdentifier());
annot.setAnnotationType(annot.getAnnotationType().substring(0, 2000));
}
//parse property value
tmpStr = pv.getValue();
tmpStr = tmpStr.substring(ndx + 1);
ndx = tmpStr.indexOf("xsd:");
if (ndx > 0) {
tmpStr = tmpStr.substring(0, ndx);
}
tmpStr = tmpStr.trim();
if (tmpStr.startsWith("\"") && tmpStr.endsWith("\"")) {
tmpStr = tmpStr.substring(1, tmpStr.length() - 1);
}
logger.debug("Setting property value: " + tmpStr.trim());
annot.setAnnotationStringValue(tmpStr.trim());
if (tmpStr != null && tmpStr.length() > 2000) {
logger.warn("annotation value longer than allowed database column length - truncating " + trm.getIdentifier());
annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000));
}
} else {
//this will capture occations where people just put random key-value tags
//as annotations
annot.setAnnotationType(pv.getProperty());
if (pv.getProperty() != null && pv.getProperty().length() > 2000) {
logger.warn("annotation type longer than allowed database column length - truncating " + trm.getIdentifier());
annot.setAnnotationType(annot.getAnnotationType().substring(0, 2000));
}
annot.setAnnotationStringValue(pv.getValue());
if (pv.getValue() != null && pv.getValue().length() > 2000) {
logger.warn("annotation value longer than allowed database column length - truncating " + trm.getIdentifier());
annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000));
}
}
annot.setParentTerm(trm);
} catch (RuntimeException re) {
logger.warn("Error parsing property_value - Ignoring : " + pv.toString());
logger.debug("pv.getProperty() = " + pv.getProperty());
logger.debug("pv.getValue() = " + pv.getValue());
continue;
}
retval.add(annot);
}
if ("MOD".equals(SHORT_NAME)) {
for (Object xrObj : obj.getDbxrefs()) {
Dbxref xref = (Dbxref) xrObj;
if (MOD_STRING_ANNOTATIONS.contains(safeTrim(xref.getDatabase()))) {
//create string annotation
AnnotationBean annot = new AnnotationBean();
annot.setAnnotationType(safeTrim(xref.getDatabase()));
annot.setAnnotationStringValue(safeTrim(xref.getDesc()));
annot.setParentTerm(trm);
retval.add(annot);
} else if (MOD_NUMERIC_ANNOTATIONS.contains(safeTrim(xref.getDatabase()))) {
//create numeric annotation
AnnotationBean annot = new AnnotationBean();
annot.setAnnotationType(safeTrim(xref.getDatabase()));
annot.setAnnotationDoubleValue(safeTrim(xref.getDesc()));
annot.setParentTerm(trm);
retval.add(annot);
}
}
}
return retval;
}
/**
* internal method to create DbXrefBean objects from values extracted from an OBOEdit term object
* and properly setup associations to the parent OLS term objet. Xrefs will be generated for analog
* xrefs, definition xrefs and synonym xrefs.
*
* @param obj - the OBOEdit object to extract information from
* @param trm - the parent term to link the annotations to
* @return a collection of properly created DbXrefBean
*/
private Collection processXrefs(OBOObject obj, TermBean trm) {
HashSet retval = new HashSet();
for (Object xrObj : obj.getDbxrefs()) {
Dbxref xref = (Dbxref) xrObj;
if (!"MOD".equals(SHORT_NAME)) {
//todo - hardcode analog for now and remove it when bug is fixed
//fix oboedit codebase error
retval.add(createDbXref(xref, Dbxref.ANALOG));
} else {
if (MOD_STRING_ANNOTATIONS.contains(safeTrim(xref.getDatabase()))) {
//do nothing - annotation will be created later
} else if (MOD_NUMERIC_ANNOTATIONS.contains(safeTrim(xref.getDatabase()))) {
//do nothing - annotation will be created later
} else {
//todo - hardcode analog for now and remove it when bug is fixed
//fix oboedit codebase error
retval.add(createDbXref(xref, Dbxref.ANALOG));
}
}
}
//todo - remove this when bug is fixed
for (Object xrObj : obj.getDefDbxrefs()) {
Dbxref xref = (Dbxref) xrObj;
retval.add(createDbXref(xref, Dbxref.DEFINITION));
}
return retval;
}
/**
* helper method to create and populate a DbXrefBean object from an OBOEdit Dbxref object
*
* @param xref - the OBOEdit object to extract information from
* @param xrefType - the xref type
* @return a valid OLS model DbXrefBean object
*/
private DbXref createDbXref(Dbxref xref, int xrefType) {
DbXrefBean retval = new DbXrefBean();
retval.setDbName(safeTrim(xref.getDatabase()));
if (retval.getDbName() != null && retval.getDbName().length() > 255) {
logger.warn("dbxref dbname longer than allowed database column length - truncating " + retval.getDbName());
retval.setDbName(retval.getDbName().substring(0, 255));
}
//stupid oboeit artifact
if (xref.getDatabaseID() != null && !xref.getID().trim().equals("none")) {
retval.setAccession(safeTrim(xref.getDatabaseID()));
// if (retval.getAccession() != null && retval.getAccession().length() > 512) {
if (retval.getAccession() != null && retval.getAccession().length() > 256) {
System.out.println("retval.getAccession().length() = " + retval.getAccession().length());
logger.warn("dbxref accession longer than allowed database column length: " + retval.getAccession());
retval.setAccession(retval.getAccession().substring(0, 256));
// retval.setAccession(retval.getAccession().substring(0, 512));
}
}
//stupid oboeit artifact
if (xref.getDesc() != null && !xref.getDesc().trim().equals("none")) {
retval.setDescription(safeTrim(xref.getDesc()));
if (retval.getDescription() != null && retval.getDescription().length() > 2000) {
logger.warn("dbxref description longer than allowed database column length: " + retval.getDescription());
retval.setDescription(retval.getDescription().substring(0, 2000));
}
}
retval.setXrefType(xrefType);
return retval;
}
/**
* Once all the term objects have been created, this method will create the relationships and paths
* to link all the children terms to a given parent term and update the OLS TermBean object with
* the proper information from the global term storage.
*
* @param obj - the OBOEdit term object that will be used to extract information from
*/
private void processTermRelationships(OBOObject obj) {
if (obj.getID().startsWith("obo:")) {
/*
obo:datatype
obo:property
obo:class
*/
logger.debug("bogus term: " + obj.getID());
return;
}
TermBean trm = (TermBean) ontologyTerms.get(safeTrim(obj.getID()));
trm.setRelationships(processRelationships(obj, trm));
trm.setPaths(processPaths(obj, trm));
//update term object
ontologyTerms.put(trm.getIdentifier(), trm);
}
/**
* internal helper method to create TermRelationshipBeans for a given term.
*
* term1
* |_ child1 child1 IS_A term1
* |_ child2 child2 IS_A term1
* subject pred object
*
*
* @param obj - the OBOEdit term object to extract information from
* @param trm - the OLS parent term to link to
* @return a Collection of valid TermRelationshipBeans
*/
private Collection processRelationships(OBOObject obj, TermBean trm) {
HashSet retval = new HashSet();
// //check to see if a term has instances
// //if there are instances, add them as children so that the relationship can be created by the loader
// if (instances.get(obj.getID()) != null && !instances.get(obj.getID()).isEmpty()){
// for(String inst : instances.get(obj.getID())){
//
// Term childTrm = ontologyTerms.get(inst);
// TermRelationshipBean trb = new TermRelationshipBean();
// trb.setSubjectTerm(childTrm);
// trb.setObjectTerm(trm);
// TermBean otherRelation = getUnknownRelationTermBean("is_instance_of");
// if (otherRelation != null) {
// trb.setPredicateTerm(otherRelation);
// }
// //set ontology
// trb.setParentOntology(ontBean);
// //add to retval
// retval.add(trb);
//
// }
// }
Collection children = obj.getChildren();
for (Link lnk : children) {
//get the child term from the link
//use its ID to lookup in the map we created
//if null, continue
/*
term1
|_ child1 child1 IS_A term1
|_ child2 child2 IS_A term1
subject pred object
*/
Term childTrm = ontologyTerms.get(safeTrim(lnk.getChild().getID()));
if (childTrm != null) {
TermRelationshipBean trb = new TermRelationshipBean();
trb.setSubjectTerm(childTrm);
trb.setObjectTerm(trm);
//set predicate type
if (IS_A_SET.contains(lnk.getType().getID())) {
trb.setPredicateTerm(IS_A);
} else if (PART_OF_SET.contains(lnk.getType().getID())) {
trb.setPredicateTerm(PART_OF);
} else if (DEV_FROM_SET.contains(lnk.getType().getID())) {
trb.setPredicateTerm(DEVELOPS_FROM);
} else {
TermBean otherRelation = getUnknownRelationTermBean(lnk.getType().getID());
if (otherRelation != null) {
trb.setPredicateTerm(otherRelation);
} else {
logger.warn("Unable to create unknown relation type: >" + lnk.getType().getID() + "<");
continue;
}
}
//set ontology
trb.setParentOntology(ontBean);
//add to retval
retval.add(trb);
} else {
logger.debug("No object term found for link: " + lnk.toString());
}
}
return retval;
}
/**
* helper method to create unknow relationship terms as they are required. These terms will also
* be added to the global term storage for persistence with the ontology.
*
* @param relationType - the string that defines the relationship from the ontology
* @return a valid TermBean
*/
protected TermBean getUnknownRelationTermBean(String relationType) {
TermBean retval = null;
if (relationType != null) {
retval = (TermBean) unknown_relations.get(relationType.trim().toUpperCase());
if (retval == null) {
retval = initializeTermBean(relationType.trim(), Loader.RELATION_TYPE);
logger.info("Created unkonwn relation type: " + relationType);
unknown_relations.put(relationType.trim().toUpperCase(), retval);
//add to storage map so it'll get persisted with the rest
ontologyTerms.put(retval.getIdentifier(), retval);
}
}
return retval;
}
/**
* internal helper method to create TermPathBeans for a given term. This method will
* precompute all paths from a parent to all its children for the 3 major relationship types:
* IS_A, PART_OF and DEVELOPS_FROM. The PART_OF and DEVELOPS_FROM relations can traverse IS_A
* relations for maximal completeness and still be semantically correct, but IS_A relationships
* cannot traverse other relation types.
*
* term1
* |_ child1 child1 IS_A term1
* |_ child2 child2 IS_A term1
* subject pred object
*
*
* @param obj - the OBOEdit term object to extract information from
* @param trm - the OLS parent term to link to
* @return a Collection of valid TermRelationshipBeans
*/
private Collection processPaths(OBOObject obj, TermBean trm) {
HashSet retval = new HashSet();
HashMap paths = parser.computeChildPaths(1, IS_A_SET, obj);
retval.addAll(createTermPathBeans(paths, Constants.IS_A_RELATION_TYPE_ID, IS_A, trm));
//the part_of relation can traverse is_a relations to generate term_paths
//so the set passed to computeChildPaths needs to contain both PART_OF and IS_A labels.
HashSet traversingSet = new HashSet();
traversingSet.addAll(PART_OF_SET);
traversingSet.addAll(IS_A_SET);
paths = parser.computeChildPaths(1, traversingSet, obj);
retval.addAll(createTermPathBeans(paths, Constants.PART_OF_RELATION_TYPE_ID, PART_OF, trm));
//the dev_from relation can traverse is_a relations to generate term_paths
//so the set passed to computeChildPaths needs to contain both DEV_FROM and IS_A labels.
traversingSet.clear();
traversingSet.addAll(DEV_FROM_SET);
traversingSet.addAll(IS_A_SET);
paths = parser.computeChildPaths(1, traversingSet, obj);
retval.addAll(createTermPathBeans(paths, Constants.DEVELOPS_FROM_RELATION_TYPE_ID, DEVELOPS_FROM, trm));
return retval;
}
/**
* Internal method that actually does all the precomputing of paths
*/
private Collection createTermPathBeans(HashMap paths, int relationTypeId,
TermBean relationBean, TermBean trm)
{
HashSet retval = new HashSet();
//get the child term from the link
//use its ID to lookup in the map we created
//if null, continue
/*
term1
|_ child1 child1 IS_A term1
|_ child2 child2 IS_A term1
subject pred object
*/
Term objTrm;
for (String termId : paths.keySet()) {
//key = termID, value = distance
int distance = paths.get(termId);
objTrm = ontologyTerms.get(termId);
if (objTrm != null) {
//create bean
TermPathBean tpb = new TermPathBean();
//set distance
tpb.setDistance(distance);
//set subject term
tpb.setSubjectTerm(objTrm);
//set object
tpb.setObjectTerm(trm);
//set predicateTerm - is_a, part_of, develops_from
tpb.setPredicateTerm(relationBean);
//set relationshipType
tpb.setRelationshipTypeId((long) relationTypeId);
//set ontology
tpb.setParentOntology(ontBean);
//add to retval
retval.add(tpb);
} else {
logger.debug("No object term found for term path: " + trm.getIdentifier() + "->" + termId);
}
}
return retval;
}
private String getSynonymTypeDef(int scope) {
switch (scope) {
case Synonym.EXACT_SYNONYM:
return "Exact synonym type";
case Synonym.NARROW_SYNONYM:
return "Narrow synonym type";
case Synonym.BROAD_SYNONYM:
return "Broad synonym type";
case Synonym.RELATED_SYNONYM:
return "Related synonym type";
case Synonym.UNKNOWN_SCOPE:
default:
return "Unknown synonym type";
}
}
/**
* Builds the synonyms for a given term
*
* @param obj OBOObject representing the term
* @param trm TermBean to link to the TermSynonym objects being created
* @return a collection of properly constructed and linked TermSynonymBean objects
*/
private Collection processSynonyms(OBOObject obj, TermBean trm) {
HashSet retval = new HashSet();
//loop over synonyms
Set syns = obj.getSynonyms();
int synCount = 1;
for (Synonym aSyn : syns) {
TermSynonymBean tsb = new TermSynonymBean();
//link parent term
tsb.setParentTerm(trm);
String synVal = safeTrim(aSyn.getText());
if (synVal != null) {
//set value
tsb.setSynonym(synVal);
if (synVal.length() > 2000) {
logger.warn("synonym value longer than allowed database column length - truncating " + trm.getIdentifier());
tsb.setSynonym(tsb.getSynonym().substring(0, 2000));
}
//check to see if there's a defined synonymType for it
if (aSyn.getSynonymType() != null) {
logger.debug("using user-defined synonym type: " + aSyn.getSynonymType().getName());
Term synTrm = ontologyTerms.get(aSyn.getSynonymType().getID());
if (synTrm != null) {
tsb.setSynonymType(synTrm);
} else {
throw new IllegalStateException(
"Attempting to use user-defined synonym type that has not been initialized in common objects: "
+ aSyn.getSynonymType().getID()
);
}
} else {
//logger.debug("using old-style synonym types");
//link synonymType Term
switch (aSyn.getScope()) {
case Synonym.EXACT_SYNONYM:
tsb.setSynonymType(EXACT);
break;
case Synonym.NARROW_SYNONYM:
tsb.setSynonymType(NARROW);
break;
case Synonym.BROAD_SYNONYM:
tsb.setSynonymType(BROAD);
break;
case Synonym.RELATED_SYNONYM:
tsb.setSynonymType(RELATED);
break;
case Synonym.UNKNOWN_SCOPE:
default:
tsb.setSynonymType(SYNONYM);
break;
}
}
Collection oboSynXrefs = aSyn.getXrefs();
if (oboSynXrefs != null) {
Collection xrefs = new HashSet();
for (Dbxref xref : oboSynXrefs) {
xrefs.add(createDbXref(xref, Dbxref.RELATED_SYNONYM));
}
tsb.setSynonymXrefs(xrefs);
}
//set synonym primary key because it is no longer being set by OJB
//use syncount to avoid tsb hashcode collisions for a single trm
tsb.setSynonymPk(SHORT_NAME + (synCount++ * tsb.hashCode()) + "!" + trm.getTermPk());
if (tsb.getSynonymPk().length() > 255) {
throw new IllegalStateException("synonym PK longer than allowed database column length: " + tsb.getSynonymPk());
}
//add TermSynonym to retval collection
retval.add(tsb);
} else {
logger.debug("Null Synonym value encountered for " + trm.getIdentifier());
}
}
//check for alt_ids
Set altIDs = obj.getSecondaryIDs();
String altID;
for ( Iterator i = altIDs.iterator(); i.hasNext(); ) {
altID = i.next();
TermSynonymBean tsb = new TermSynonymBean();
//link parent term
tsb.setParentTerm(trm);
//set def
tsb.setSynonym(safeTrim(altID));
if (altID != null && altID.length() > 2000) {
logger.warn("synonym value longer than allowed database column length - truncating " + trm.getIdentifier());
tsb.setSynonym(tsb.getSynonym().substring(0, 2000));
}
//set synType
tsb.setSynonymType(ALT_ID);
//set synonym primary key because it is no longer being set by OJB
//use syncount to avoid tsb hashcode collisions for a single trm
tsb.setSynonymPk(SHORT_NAME + (synCount++ * tsb.hashCode()) + "!" + trm.getTermPk());
if (tsb.getSynonymPk().length() > 255) {
throw new IllegalStateException("synonym PK longer than allowed database column length: " + tsb.getSynonymPk());
}
//add TermSynonym to retval collection
retval.add(tsb);
}
//if main termID is URL, eg http://www.ebi.ac.uk/EFO_1234
//add synonym with just final portion of URL, eg EFO_1234
try {
if (trm.getIdentifier().toLowerCase().startsWith("http:")) {
altID = trm.getIdentifier().substring(trm.getIdentifier().lastIndexOf("/") + 1);
TermSynonymBean tsb = new TermSynonymBean();
//link parent term
tsb.setParentTerm(trm);
//set def
tsb.setSynonym(safeTrim(altID));
if (altID != null && altID.length() > 2000) {
logger.warn("synonym value longer than allowed database column length - truncating " + trm.getIdentifier());
tsb.setSynonym(tsb.getSynonym().substring(0, 2000));
}
//set synType
tsb.setSynonymType(ALT_ID);
//set synonym primary key because it is no longer being set by OJB
//use syncount to avoid tsb hashcode collisions for a single trm
tsb.setSynonymPk(SHORT_NAME + (synCount++ * tsb.hashCode()) + "!" + trm.getTermPk());
if (tsb.getSynonymPk().length() > 255) {
throw new IllegalStateException("synonym PK longer than allowed database column length: " + tsb.getSynonymPk());
}
//add TermSynonym to retval collection
retval.add(tsb);
}
} catch (IndexOutOfBoundsException e) {
logger.debug("Could not create alt_id from URL from term: " + trm.getIdentifier());
}
return retval;
}
/**
* for debugging
*/
protected void dumpOntology() {
for (String id : getRootTerms()) {
logger.debug("Root term: " + id);
dumpTerm(ontologyTerms.get(id), "");
}
}
/**
* for debugging
*/
protected void dumpOntologyStats() {
int ts = 0, tp = 0, tr = 0, ta = 0;
for (Term tb : ontologyTerms.values()) {
if (tb.getSynonyms() != null) {
ts += tb.getSynonyms().size();
}
if (tb.getPaths() != null) {
tp += tb.getPaths().size();
}
if (tb.getRelationships() != null) {
tr += tb.getRelationships().size();
}
if (tb.getAnnotations() != null) {
ta += tb.getAnnotations().size();
}
}
logger.info("Number of terms: " + ontologyTerms.size());
logger.info("Number of synonyms: " + ts);
logger.info("Number of relationships: " + tr);
logger.info("Number of paths: " + tp);
logger.info("Number of annotations: " + ta);
}
/**
* for debugging
*
* @param term - term to dump
* @param indent - spaces to indent
*/
protected void dumpTerm(Term term, String indent) {
if (indent.length() > 15) {
return;
}
if (term != null) {
logger.debug(indent + "id: " + term.getIdentifier());
logger.debug(indent + "name: " + term.getName());
if (term.getSynonyms() != null)
logger.debug(indent + "nb syn: " + term.getSynonyms().size());
if (term.getAnnotations() != null)
logger.debug(indent + "nb annot: " + term.getAnnotations().size());
if (term.getRelationships() != null) {
for (TermRelationship tr : term.getRelationships()) {
String relationStr = indent + tr.getSubjectTerm().getName() + " " + tr.getPredicateTerm().getName() + " " + tr.getObjectTerm().getName();
logger.debug(relationStr);
dumpTerm(tr.getSubjectTerm(), indent + " ");
}
}
}
}
/**
* takes a string and trims whitespace. if resulting string is empty, return null;
*
* @param inStr - string to trim. if null, return null;
*/
protected String safeTrim(String inStr) {
if (inStr != null) {
String tmp = inStr.trim();
if (tmp.length() > 0) {
return tmp;
} else {
return null;
}
} else {
return null;
}
}
/**
* returns the OntologyAccess that has been loaded from file.
*
* @return returns the OntologyAccess that has been loaded from file.
* @throws IllegalStateException if the bean has not been properly initialized.
*/
public Ontology getOntology() throws IOException {
if (ontBean != null)
return ontBean;
else {
throw new IllegalStateException("OntologyAccess bean not properly initialized. Did you call the proper sequence of methods: configure(), parse(), process()?");
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy