
org.biopax.validator.CvTermsRule Maven / Gradle / Ivy
package org.biopax.validator;
import java.util.*;
import org.biopax.paxtools.controller.SimpleEditorMap;
import org.biopax.paxtools.model.BioPAXLevel;
import org.biopax.paxtools.model.level3.Level3Element;
import org.biopax.paxtools.model.level3.ControlledVocabulary;
import org.biopax.paxtools.model.level3.UnificationXref;
import org.biopax.paxtools.normalizer.Normalizer;
import org.biopax.paxtools.util.ClassFilterSet;
import org.biopax.psidev.ontology_manager.*;
import org.biopax.validator.api.CvRestriction;
import org.biopax.validator.api.beans.Validation;
import javax.annotation.PostConstruct;
/**
* A BioPAX L3 controlled vocabulary context-aware term validation rule.
*
* @author rodch
*/
public abstract class CvTermsRule extends AbstractCvRule
{
/**
* Constructor.
*
* TODO: (a feature) to allow using a path in 'property' parameter (e.g., "modificationFeature/modificationType")
*
* @param domain biopax property domain (object)
* @param property biopax property name
* @param restrictions restrictions on the controlled vocabulary (ontology) terms, given biopax property context
*/
public CvTermsRule(Class domain, String property, CvRestriction... restrictions)
{
super(domain, property, restrictions);
}
@PostConstruct
@Override
public void init() {
super.init();
this.editor = (property != null && !ControlledVocabulary.class.isAssignableFrom(domain))
? SimpleEditorMap.L3.getEditorForProperty(property, this.domain)
: null;
};
public void check(Validation validation, T thing) {
// a set of CVs for this rule to validate
Collection vocabularies = new HashSet<>();
// if the editor is null, we expect a ControlledVocabulary object!
if(editor == null) {
vocabularies.add((ControlledVocabulary)thing);
} else {
vocabularies.addAll((Collection) editor.getValueFromBean(thing));
}
// shortcut
if(vocabularies.isEmpty()) return;
// text to report in any CV error case
String cvRuleInfo = ((editor != null) ? " property: "
+ property : "") + " " + restrictions.toString();
// check each CV terms against the restrictions
for (ControlledVocabulary cv : vocabularies)
{
if (cv == null) {
logger.warn(thing
+ " referes to 'null' controlled vocabulary (bug!): "
+ ", domain: " + domain + ", property: " + property);
} else if(cv.getTerm().isEmpty()) {
/* won't report/fix what other rules (e.g., 'controlledVocabularyTermCRRule') or Normalizer do */
}
else {
//TODO: check if multiple terms are synonyms (equivalent)
final Set badTerms = new HashSet<>(); // initially - none
final Map> noXrefTerms = new HashMap<>();
//original terms set to iterate over (to avoid concurrent modification exceptions - other rules can modify the set simultaneously)
final Set terms = Collections.unmodifiableSet(new HashSet<>(cv.getTerm()));
// first, check terms (names) are valid
for(String name : terms)
{
if(!getValidTerms().contains(name.toLowerCase())) {
// save to report/delete/replace the invalid term later
badTerms.add(name);
}
}
// report but keep original perhaps illegal terms
if (!badTerms.isEmpty()) {
String badTermInfo = badTerms.toString();
error(validation, thing, "illegal.cv.term", false, badTermInfo, cvRuleInfo);
}
/* check if unif. xref.id points to invalid term,
* and, if so, report 'illegal.cv.xref' error
*/
final Set badXrefs = new HashSet<>();
for (UnificationXref x : new ClassFilterSet<>(
cv.getXref(), UnificationXref.class))
{
OntologyTermI ot = ontologyUtils.getOntologyManager().findTermByAccession(x.getId());
if(ot == null || !getValidTerms().contains(ot.getPreferredName().toLowerCase())) {
badXrefs.add(x);
}
}
// report wrong uni.xrefs
if(!badXrefs.isEmpty()) {
String bads = badXrefs.toString();
// report as not fixed error case (won't fix/remove such xrefs, keep original)
error(validation, thing, "illegal.cv.xref", false, bads, cvRuleInfo);
}
// check valid terms have a uni.xref
for(String name : terms)
{
// only for valid terms
if(getValidTerms().contains(name.toLowerCase())) {
// check if there is the corresponding unification xref
Set ots = ontologyUtils
.getOntologyManager().searchTermByName(name.toLowerCase(), getOntologyIDs());
assert(!ots.isEmpty()); // shouldn't be, because the above getValidTerms() contains the name
boolean noXrefsForTermNameFound = true; // next, - prove otherwise is the case
terms: for(OntologyTermI term : ots) {
String id = term.getTermAccession();
// search for the xref with the same xref.id
for (UnificationXref x : new ClassFilterSet<>(
cv.getXref(), UnificationXref.class)) {
if(id.equalsIgnoreCase(x.getId())) {
noXrefsForTermNameFound = false;
break terms; // exit this and outer loops!
}
}
}
if(noXrefsForTermNameFound)
noXrefTerms.put(name, ots); //store terms to fix later (to generate xrefs)
}
}
// note: at this point, 'noXrefTerms' (valid terms only) map is defined...
if (!noXrefTerms.isEmpty()) {
String noXrefTermsInfo = noXrefTerms.toString();
boolean fixed = false;
if(validation.isFix()) {
/*
* However, it's not so trivial to fix by adding the xrefs, because:
* 1) no reference to the parent Model here available
* (thus the validator must detect and add new objects automatically! [done!])
* 2) having the chance of creating several xrefs with the same RDFId requires
* a special care or follow-up merging, as simply adding them to a model will
* throw the "already have this element" exception!); and other rules
* can also generate duplicates...
* 3) risk that a rule generating/adding a new element may cause
* other rules to interfere via AOP and prevent changes in quite
* unpredictable manner (...bites its own tail)
* 4) multiple terms (accession numbers) can result from searching by (synonym) name
*
* Well, let's try to fix, anyway (and modifying ValidatorImpl as well)!
* That's awesome!
*/
Set validTermIs = ontologyUtils.getValidTerms(this);
for (String name : noXrefTerms.keySet()) {
//get previously saved valid ontology term beans by name
Set ots = noXrefTerms.get(name);
//get only top (parent) valid terms
Set topvalids = new HashSet<>();
for (OntologyTermI term : ots) {
// skip terms that are not applicable although having the same synonym name
if(validTermIs.contains(term)) {
OntologyAccess ont = ontologyUtils.getOntologyManager().getOntology(term.getOntologyId());
//if term's parents does not contain any of these terms
if(Collections.disjoint(ots, ont.getAllParents(term))) {
topvalids.add(term);
}
}
}
Set added = new HashSet<>();
for (OntologyTermI term : topvalids) {
String ontId = term.getOntologyId();
String db = ontologyUtils.getOntologyManager().getOntology(ontId).getName();
String id = term.getTermAccession();
// auto-create and add the xref to the cv;
// generate an URI in the same namespace
String uri = Normalizer.uri(cv.getUri() + "_", db, id, UnificationXref.class);
if(!added.contains(uri)) {
added.add(uri);
UnificationXref ux = BioPAXLevel.L3.getDefaultFactory()
.create(UnificationXref.class, uri);
ux.setDb(db);
ux.setId(id);
cv.addXref(ux);
fixed = true; // 99% true ;-)
noXrefTermsInfo += "; " + id + " added!";
}
}
}
}
// report
error(validation, thing, "no.xref.cv.terms",
fixed, noXrefTermsInfo, cvRuleInfo);
}
//if in the fixing mode,
if (validation != null && validation.isFix()
//and there were some errors found,
&& !(badTerms.isEmpty() && noXrefTerms.isEmpty()))
{
//then add/infer the valid preferred term from the unification xrefs -
Set addTerms = createTermsFromUnificationXrefs(cv);
if (!addTerms.isEmpty()) {
cv.getTerm().addAll(addTerms);
}
}
}
}
}
//discover valid terms by unification xrefs (invalid xrefs won't get you anything)
private Set createTermsFromUnificationXrefs(
ControlledVocabulary cv)
{
Set inferred = new HashSet<>();
for (UnificationXref x : new ClassFilterSet<>(
cv.getXref(), UnificationXref.class))
{
OntologyTermI ot = ontologyUtils.getOntologyManager().findTermByAccession(x.getId());
//if found and valid
if (ot != null && getValidTerms().contains(ot.getPreferredName().toLowerCase())) {
inferred.add(ot.getPreferredName());
}
else if(ot == null)
logger.warn("No term found by the xref.id: " + x.getId());
else
logger.debug("Invalid (for this CV context) term: " + x.getId());
}
return inferred;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy