
org.monarchinitiative.phenol.annotations.hpo.HpoAnnotationEntry Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phenol-annotations Show documentation
Show all versions of phenol-annotations Show documentation
phenol-annotation contains the annotation functionality for ontologies
package org.monarchinitiative.phenol.annotations.hpo;
import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase;
import org.monarchinitiative.phenol.base.PhenolException;
import org.monarchinitiative.phenol.base.PhenolRuntimeException;
import org.monarchinitiative.phenol.annotations.constants.hpo.HpoClinicalModifierTermIds;
import org.monarchinitiative.phenol.annotations.constants.hpo.HpoModeOfInheritanceTermIds;
import org.monarchinitiative.phenol.annotations.constants.hpo.HpoSubOntologyRootTermIds;
import org.monarchinitiative.phenol.ontology.data.Ontology;
import org.monarchinitiative.phenol.ontology.data.Term;
import org.monarchinitiative.phenol.ontology.data.TermId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import static org.monarchinitiative.phenol.ontology.algo.OntologyAlgorithm.existsPath;
import static org.monarchinitiative.phenol.annotations.constants.hpo.HpoFrequencyTermIds.*;
import static org.monarchinitiative.phenol.annotations.constants.hpo.HpoOnsetTermIds.*;
/**
* Created by peter on 1/20/2018.
* This class represents the contents of a single annotation line.
*
* @author Peter Robinson
* @deprecated to be removed in v3.0.0.
*/
@Deprecated(forRemoval = true)
public class HpoAnnotationEntry {
private final static Logger logger = LoggerFactory.getLogger(HpoAnnotationEntry.class);
// To match e.g. 10/20
private static final Pattern RATIO_PATTERN = Pattern.compile("(?\\d+)/(?\\d+)");
// To match an int of optionally a float percentage (e.g. 1% or 1.23456789%).
private static final Pattern PERCENTAGE_PATTERN = Pattern.compile("(?\\d+\\.?(\\d+)?)%");
private static final String EMPTY_STRING = "";
/**
* The CURIE of the disease, e.g., OMIM:600201 (Field #0).
*/
private final String diseaseID;
/**
* Field #2
*/
private final String diseaseName;
/**
* Field #3
*/
private final TermId phenotypeId;
/**
* Field #4
*/
private final String phenotypeName;
/**
* Field #5
*/
private final String ageOfOnsetId;
/**
* Field #6
*/
private final String ageOfOnsetName;
/**
* Field #7
*/
private final String evidenceCode;
/**
* Field #8 can be one of N/M, X% or a valid frequency term Id
*/
private final String frequencyModifier;
/**
* Field #9
*/
private final String sex;
/**
* Field #10
*/
private final String negation;
/**
* Field #11
*/
private final String modifier;
/**
* Field #12
*/
private final String description;
/**
* Field #13
*/
private final String publication;
/**
* Field #14
*/
private final String biocuration;
private final static String[] expectedFields = {"#diseaseID",
"diseaseName",
"phenotypeID",
"phenotypeName",
"onsetID",
"onsetName",
"frequency",
"sex",
"negation",
"modifier",
"description",
"publication",
"evidence",
"biocuration"};
/**
* Number of tab-separated expectedFields in a valid small file.
*/
private static final int NUMBER_OF_FIELDS = expectedFields.length;
private final static Set validDatabases = Arrays.stream(DiseaseDatabase.values())
.map(DiseaseDatabase::prefix)
.collect(Collectors.toUnmodifiableSet());
/**
* A set with all of the TermIds for frequency.
*/
private final static Set frequencySubhierarchyTermIds = Set.of(FREQUENCY, OBLIGATE, VERY_FREQUENT, FREQUENT, OCCASIONAL, VERY_RARE, EXCLUDED);
/**
* A set with all of the TermIds for age of onset.
*/
private final static Set onsetSubhierarchyTermIds = Set.of(ONSET, EMBRYONAL_ONSET,
ADULT_ONSET, MIDDLE_AGE_ONSET, LATE_ONSET, YOUNG_ADULT_ONSET,
CONGENITAL_ONSET, NEONATAL_ONSET, PEDIATRIC_ONSET, CHILDHOOD_ONSET,
INFANTILE_ONSET, JUVENILE_ONSET, ANTENATAL_ONSET, FETAL_ONSET);
/**
* Set of allowable evidence codes.
*/
private static final Set EVIDENCE_CODES = Set.of("IEA", "TAS", "PCS");
private static final Set VALID_CITATION_PREFIXES = Set.of("PMID", "OMIM", "http", "https", "DECIPHER",
"ORPHA", "ISBN", "ISBN-10", "ISBN-13");
/**
* regex for patterns such as HPO:skoehler[2018-09-22]
*/
private static final String biocurationRegex = "(\\w+:\\w+)\\[(\\d{4}-\\d{2}-\\d{2})]";
/**
* The pattern that corresponds to {@link #biocurationRegex}.
*/
private static final Pattern biocurationPattern = Pattern.compile(biocurationRegex);
public String getDiseaseID() {
return diseaseID;
}
/**
* The disease ID has two parts, the database (before the :) and the id (after the :).
*
* @return the database part of the diseaseID.
*/
public String getDB() {
return TermId.of(diseaseID).getPrefix();
}
/**
* The disease ID has two parts, the database (before the :) and the id (after the :).
*
* @return the object_ID part of the diseaseID.
*/
public String getDB_Object_ID() {
return TermId.of(diseaseID).getId();
}
/**
* @return the disease name, e.g., Noonan syndrome.
*/
public String getDiseaseName() {
return diseaseName;
}
/**
* @return HPO id of this annotation.
*/
public TermId getPhenotypeId() {
return phenotypeId;
}
/**
* @return HPO term label of this annotation.
*/
public String getPhenotypeLabel() {
return phenotypeName;
}
/**
* @return HPO Id of the age of onset, or null.
*/
public String getAgeOfOnsetId() {
return ageOfOnsetId;
}
/**
* @return HPO term label of age of onset or empty string.
*/
public String getAgeOfOnsetLabel() {
return ageOfOnsetName != null ? ageOfOnsetName : EMPTY_STRING;
}
/**
* @return evidence for this annotation (one of IEA, PCS, TAS).
*/
public String getEvidenceCode() {
return evidenceCode;
}
/**
* @return String representing the frequency modifier.
*/
public String getFrequencyModifier() {
return frequencyModifier != null ? frequencyModifier : EMPTY_STRING;
}
/**
* @return String represeting the sex (MALE or FEMALE) or Empty string.
*/
public String getSex() {
return sex != null ? sex : EMPTY_STRING;
}
/**
* @return the String "NOT" or the empty string.
*/
public String getNegation() {
return negation != null ? negation : EMPTY_STRING;
}
/**
* @return list of one or more HPO term ids (as a semicolon-separated String), or emtpry string.
*/
public String getModifier() {
return modifier != null ? modifier : EMPTY_STRING;
}
/**
* @return (optional) free text description.
*/
public String getDescription() {
return description != null ? modifier : EMPTY_STRING;
}
/**
* @return the citation supporting the annotation, e.g., a PubMed ID.
*/
public String getPublication() {
return publication;
}
/**
* @return a string representing the biocuration history.
*/
public String getBiocuration() {
return biocuration;
}
/**
* This constructor is package-private so that we can use it for merging in
* {@link HpoAnnotationModel}
*/
HpoAnnotationEntry(String disID,
String diseaseName,
TermId phenotypeId,
String phenotypeName,
String ageOfOnsetId,
String ageOfOnsetName,
String frequencyString,
String sex,
String negation,
String modifier,
String description,
String publication,
String evidenceCode,
String biocuration) {
this.diseaseID = disID;
this.diseaseName = diseaseName;
this.phenotypeId = phenotypeId;
this.phenotypeName = phenotypeName;
this.ageOfOnsetId = ageOfOnsetId;
this.ageOfOnsetName = ageOfOnsetName;
this.frequencyModifier = frequencyString;
this.sex = sex;
this.negation = negation;
this.modifier = modifier;
this.description = description;
this.publication = publication;
this.evidenceCode = evidenceCode;
this.biocuration = biocuration;
}
/**
* @return the row that will be written to the V2 file for this entry.
*/
@Override
public String toString() {
return getRow();
}
public String getLineNoTabs() {
return getRow().replaceAll("\\s+", " ");
}
/**
* Return the row that will be used to write the V2 small files entries to a file. Note that
* we replace null strings (which are a signal for no data available) with the empty string
* to avoid the string "null" being written.
*
* @return One row of the "big" file corresponding to this entry
*/
public String getRow() {
return String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
diseaseID,
diseaseName,
phenotypeId.getValue(),
phenotypeName,
ageOfOnsetId != null ? ageOfOnsetId : EMPTY_STRING,
ageOfOnsetName != null ? ageOfOnsetName : EMPTY_STRING,
frequencyModifier != null ? frequencyModifier : EMPTY_STRING,
sex != null ? sex : EMPTY_STRING,
negation != null ? negation : EMPTY_STRING,
modifier != null ? modifier : EMPTY_STRING,
description != null ? description : EMPTY_STRING,
publication != null ? publication : EMPTY_STRING,
evidenceCode != null ? evidenceCode : "",
biocuration != null ? biocuration : EMPTY_STRING);
}
/**
* Create an {@link HpoAnnotationEntry} object for a line in an HPO Annotation file. By default, we do not
* replace obsolete term ids here, this should be done with PhenoteFX in the original files.
*
* @param line A line from an HPO Annotation file (small file)
* @param ontology reference to HPO ontology
* @return corresponding {@link HpoAnnotationEntry} object
* @throws PhenolException if there were Q/C problems with the line.
*/
public static HpoAnnotationEntry fromLine(String line, Ontology ontology) throws PhenolException {
String[] A = line.split("\t");
if (A.length != NUMBER_OF_FIELDS) {
throw new HpoAnnotationModelException(String.format("We were expecting %d expectedFields but got %d for line %s", NUMBER_OF_FIELDS, A.length, line));
}
String diseaseID = A[0];
String diseaseName = A[1];
TermId phenotypeId = TermId.of(A[2]);
String phenotypeName = A[3];
String ageOfOnsetId = A[4];
String ageOfOnsetName = A[5];
String frequencyString = A[6];
String sex = A[7];
String negation = A[8];
String modifier = A[9];
String description = A[10];
String publication = A[11];
String evidenceCode = A[12];
String biocuration = A[13];
HpoAnnotationEntry entry = new HpoAnnotationEntry(diseaseID,
diseaseName,
phenotypeId,
phenotypeName,
ageOfOnsetId,
ageOfOnsetName,
frequencyString,
sex,
negation,
modifier,
description,
publication,
evidenceCode,
biocuration);
// if the following method does not throw an Exception, we are good to go!
performQualityControl(entry, ontology);
return entry;
}
/**
* Create an {@link HpoAnnotationEntry} object for a line in an HPO Annotation file. By default, we do not
* replace obsolete term ids here, this should be done with PhenoteFX in the original files.
*
* @param line A line from an HPO Annotation file (small file)
* @param ontology reference to HPO ontology
* @return corresponding {@link HpoAnnotationEntry} object
*/
public static Optional fromLineReplaceObsoletePhenotypeData(String line, Ontology ontology) {
String[] A = line.split("\t");
if (A.length != NUMBER_OF_FIELDS) {
return Optional.empty();
}
String diseaseID = A[0];
String diseaseName = A[1];
TermId phenotypeId = TermId.of(A[2]);
String phenotypeName = A[3];
// replace if out of data
TermId currentPhenotypeId = ontology.getPrimaryTermId(phenotypeId);
if (currentPhenotypeId != null) {
String currentLabel = ontology.getTermMap().get(currentPhenotypeId).getName();
phenotypeId = currentPhenotypeId;
phenotypeName = currentLabel;
}
String ageOfOnsetId = A[4];
String ageOfOnsetName = A[5];
String frequencyString = A[6];
String sex = A[7];
String negation = A[8];
String modifier = A[9];
String description = A[10];
String publication = A[11];
String evidenceCode = A[12];
String biocuration = A[13];
HpoAnnotationEntry entry = new HpoAnnotationEntry(diseaseID,
diseaseName,
phenotypeId,
phenotypeName,
ageOfOnsetId,
ageOfOnsetName,
frequencyString,
sex,
negation,
modifier,
description,
publication,
evidenceCode,
biocuration);
// if the following method does not throw an Exception, we are good to go!
try {
performQualityControl(entry, ontology);
} catch (HpoAnnotationModelException e) {
logger.warn(e.getMessage());
return Optional.empty();
}
return Optional.of(entry);
}
/**
* If the frequency of an HPO term is listed in Orphanet as Excluded (0%), then we encode it as
* a NOT (negated) term.
*
* @param diseaseID Orphanet ID, e.g., ORPHA:99776
* @param diseaseName Orphanet disease name, e.g., Moasic trisomy 9
* @param hpoId HPO id (e.g., HP:0001234) as String
* @param hpoLabel corresponding HPO term Label
* @param frequency Orphanet frequency data as TermId
* @param ontology reference to HPO ontology
* @param biocuration A String to represent provenance from Orphanet, e.g., ORPHA:orphadata[2019-01-05]
* @param replaceObsoleteTermId if true, correct obsolete term ids and do not throw an exception.
* @return corresponding HpoAnnotationEntry object
* @throws HpoAnnotationModelException if there is a Q/C problem with the data
*/
public static HpoAnnotationEntry fromOrphaData(String diseaseID,
String diseaseName,
String hpoId,
String hpoLabel,
TermId frequency,
Ontology ontology,
String biocuration,
boolean replaceObsoleteTermId) throws HpoAnnotationModelException {
if (hpoId == null) {
throw new HpoAnnotationModelException("Null String passed as hpoId for disease " + (diseaseID != null ? diseaseID : "n/a"));
}
TermId phenotypeId = TermId.of(hpoId);
// replace the frequency termid with its string equivalent
// except if it is Excluded, which we treat as a negative annotation
String frequencyString = frequency.equals(EXCLUDED) ? EMPTY_STRING : frequency.getValue();
String negationString = frequency.equals(EXCLUDED) ? "NOT" : EMPTY_STRING;
if (replaceObsoleteTermId) {
TermId currentPhenotypeId = ontology.getPrimaryTermId(phenotypeId);
if (currentPhenotypeId != null && !currentPhenotypeId.equals(phenotypeId)) {
String newLabel = ontology.getTermMap().get(phenotypeId).getName();
logger.warn("{}: Replacing obsolete TermId \"{}\" with current ID \"{}\" (and obsolete label {} with current label {})",
diseaseID, hpoId, currentPhenotypeId.getValue(), hpoLabel, newLabel);
phenotypeId = currentPhenotypeId;
hpoLabel = newLabel;
}
// replace label if needed
if (currentPhenotypeId != null) { // we can only get new name if we got the new id!
String currentPhenotypeLabel = ontology.getTermMap().get(phenotypeId).getName();
if (currentPhenotypeLabel != null && !hpoLabel.equals(currentPhenotypeLabel)) {
logger.warn("{}: Replacing obsolete Term label \"{}\" with current label \"{}\"",
diseaseID, hpoLabel, currentPhenotypeLabel);
hpoLabel = currentPhenotypeLabel;
}
}
}
HpoAnnotationEntry entry = new HpoAnnotationEntry(diseaseID,
diseaseName,
phenotypeId,
hpoLabel,
EMPTY_STRING,
EMPTY_STRING,
frequencyString,
EMPTY_STRING,
negationString,
EMPTY_STRING,
EMPTY_STRING,
diseaseID,
"TAS",
biocuration);
// if the following method does not throw an Exception, we are good to go!
performQualityControl(entry, ontology);
return entry;
}
/**
* If the frequency of an HPO term is listed in Orphanet as Excluded (0%), then we encode it as
* a NOT (negated) term.
*
* @param diseaseID Orphanet ID, e.g., ORPHA:99776
* @param diseaseName Orphanet disease name, e.g., Moasic trisomy 9
* @param hpoInheritanceId HPO id (e.g., HP:0001234) for an inheritance term
* @param hpoLabel corresponding HPO term Label
* @param biocuration A String to represent provenance from Orphanet, e.g., ORPHA:orphadata[2019-01-05]
* @return corresponding HpoAnnotationEntry object
*/
public static HpoAnnotationEntry fromOrphaInheritanceData(String diseaseID,
String diseaseName,
TermId hpoInheritanceId,
String hpoLabel,
String biocuration) {
// These items are always empty for inheritance annotations
String frequencyString = EMPTY_STRING;
String negationString = EMPTY_STRING;
return new HpoAnnotationEntry(diseaseID,
diseaseName,
hpoInheritanceId,
hpoLabel,
EMPTY_STRING,
EMPTY_STRING,
frequencyString,
EMPTY_STRING,
negationString,
EMPTY_STRING,
EMPTY_STRING,
diseaseID,
"TAS",
biocuration);
}
// Q/C methods
/**
* This method checks all of the fields of the HpoAnnotationEntry. If there is an error, then
* it throws an Exception (upon the first error). If no exception is thrown, then the
* no errors were found.
*
* @param entry The {@link HpoAnnotationEntry} to be tested.
* @param ontology A reference to an HpoOntology object (needed for Q/C'ing terms).
* @throws HpoAnnotationModelException if any parse error is encountered
*/
private static void performQualityControl(HpoAnnotationEntry entry, Ontology ontology) throws HpoAnnotationModelException {
checkDB(entry);
checkDiseaseName(entry.getDiseaseName());
checkPhenotypeFields(entry, ontology);
checkAgeOfOnsetFields(entry.getAgeOfOnsetId(), entry.getAgeOfOnsetLabel(), ontology);
checkFrequency(entry.getFrequencyModifier(), ontology);
checkSexEntry(entry.getSex());
checkNegation(entry.getNegation());
checkModifier(entry.getModifier(), ontology);
// description is free text, nothing to check
checkPublication(entry.getPublication());
checkEvidence(entry.getEvidenceCode());
checkBiocuration(entry.getBiocuration());
}
/**
* Checks if the database string is in the set of valid strings ({@link #validDatabases})
*
* @param entry SMallFileEntry to be checked for a database String such as OMIM or ORPHA
* @throws HpoAnnotationModelException if an invalid database code is used
*/
private static void checkDB(HpoAnnotationEntry entry) throws HpoAnnotationModelException {
try {
String db = entry.getDB();
if (!validDatabases.contains(db)) {
throw new HpoAnnotationModelException(String.format("Invalid database symbol: \"%s\"", db));
}
} catch (PhenolRuntimeException r) {
throw new HpoAnnotationModelException("Could not construct database: " + r.getMessage());
}
}
/**
* Check that this disease name is present.
*/
private static void checkDiseaseName(String name) throws HpoAnnotationModelException {
if (name == null || name.isEmpty()) {
throw new HpoAnnotationModelException("Missing disease name");
}
}
/**
* Check that the id is not an alt_id (i.e., out of date!)
*
* @param entry the {@link HpoAnnotationEntry} to be checked
*/
private static void checkPhenotypeFields(HpoAnnotationEntry entry, Ontology ontology)
throws HpoAnnotationModelException {
TermId id = entry.getPhenotypeId();
String termLabel = entry.getPhenotypeLabel();
if (id == null) {
throw new HpoAnnotationModelException("Phenotype id was for \"" + termLabel + "\"");
} else if (!ontology.getTermMap().containsKey(id)) {
throw new HpoAnnotationModelException(String.format("Could not find HPO term id (\"%s\") for \"%s\"", id, termLabel));
} else {
TermId current = ontology.getTermMap().get(id).id();
if (!current.equals(id)) {
throw new ObsoleteTermIdRuntimeException(String.format("Usage of (obsolete) alt_id %s for %s (%s)",
id.getValue(),
current.getValue(),
ontology.getTermMap().get(id).getName()));
}
}
// if we get here, the TermId of the HPO Term was OK
// now check that the label corresponds to the TermId
if (termLabel == null || termLabel.isEmpty()) {
throw new HpoAnnotationModelException("Missing HPO term label for id=" + id.getValue());
}
String currentLabel = ontology.getTermMap().get(id).getName();
if (!currentLabel.equals(termLabel)) {
String errmsg = String.format("Wrong term label %s instead of %s for %s",
termLabel,
currentLabel,
ontology.getTermMap().get(id).getName());
throw new HpoAnnotationModelException(errmsg);
}
}
private static void checkAgeOfOnsetFields(String id, String termLabel, Ontology ontology)
throws HpoAnnotationModelException {
if (id == null || id.isEmpty()) {
// valid, onset is not required, but let's check that there is not a stray label
if (termLabel != null && !termLabel.isEmpty()) {
throw new HpoAnnotationModelException("Onset ID empty but Onset label present");
} else {
return; // OK!
}
}
TermId tid = TermId.of(id);
if (!ontology.getTermMap().containsKey(tid)) {
throw new HpoAnnotationModelException(String.format("Onset ID not found: \"%s\"", id));
}
TermId current = ontology.getTermMap().get(tid).id();
if (!current.equals(tid)) {
throw new ObsoleteTermIdRuntimeException(String.format("Usage of (obsolete) alt_id %s for %s (%s)",
tid.getValue(),
current.getValue(),
ontology.getTermMap().get(tid).getName()));
}
if (!onsetSubhierarchyTermIds.contains(tid)) {
throw new HpoAnnotationModelException("Invalid ID in onset ID field: \"" + tid + "\"");
}
// if we get here, the Age of onset id was OK
// now let's check the label
if (termLabel == null || termLabel.isEmpty()) {
throw new HpoAnnotationModelException("Missing HPO term label for onset id=" + id);
}
String currentLabel = ontology.getTermMap().get(tid).getName();
if (!currentLabel.equals(termLabel)) {
String errmsg = String.format("Wrong onset term label %s instead of %s for %s",
termLabel,
currentLabel,
ontology.getTermMap().get(tid).getName());
throw new HpoAnnotationModelException(errmsg);
}
}
private static void checkEvidence(String evi) throws HpoAnnotationModelException {
if (!EVIDENCE_CODES.contains(evi)) {
throw new HpoAnnotationModelException(String.format("Invalid evidence code: \"%s\"", evi));
}
}
/**
* There are 3 correct formats for frequency. For example, 4/7, 32% (or 32.6%), or
* an HPO term from the frequency subontology.
*/
private static void checkFrequency(String freq, Ontology ontology) throws HpoAnnotationModelException {
// it is ok not to have frequency data
if (freq == null || freq.isEmpty()) {
return;
}
Matcher matcher = RATIO_PATTERN.matcher(freq);
if(matcher.matches()) {
int numerator = Integer.parseInt(matcher.group("numerator"));
int denominator = Integer.parseInt(matcher.group("denominator"));
if (numerator > denominator || denominator == 0) {
throw new HpoAnnotationModelException(String.format("Malformed frequency term: \"%s\"", freq));
} else {
return;
}
}
matcher = PERCENTAGE_PATTERN.matcher(freq);
if(matcher.matches()){
float percent = Float.parseFloat(matcher.group("value"));
if (percent > 100f || percent <= 0f) {
throw new HpoAnnotationModelException(String.format("Malformed frequency term: \"%s\"", freq));
} else {
return;
}
}
if(!freq.matches("HP:\\d{7}")) {
// cannot be a valid frequency term
throw new HpoAnnotationModelException(String.format("Malformed frequency term: \"%s\"", freq));
}
// if we get here and we can validate that the frequency term comes from the right subontology,
// then the item is valid
TermId id;
try {
id = TermId.of(freq);
} catch (PhenolRuntimeException pre) {
throw new HpoAnnotationModelException(String.format("Could not parse frequency term id: \"%s\"", freq));
}
if (!frequencySubhierarchyTermIds.contains(id)) {
throw new HpoAnnotationModelException(String.format("Usage of incorrect term for frequency: %s [%s]",
ontology.getTermMap().get(id).getName(),
ontology.getTermMap().get(id).id().getValue()));
}
}
/**
* The sex entry is used for annotations that are specific to either males or females. It is usually
* empty. If present, it must be either MALE or FEMALE (for now we do no enforce capitalization).
*
* @param sex THe sex-specificity entry
* @throws HpoAnnotationModelException if the sex-specifity field is malformed.
*/
private static void checkSexEntry(String sex) throws HpoAnnotationModelException {
if (sex == null || sex.isEmpty()) return; // OK, not required
if (!sex.equalsIgnoreCase("MALE") && !sex.equalsIgnoreCase("FEMALE"))
throw new HpoAnnotationModelException(String.format("Malformed sex entry: \"%s\"", sex));
}
/**
* The negation string can be null or empty but if it is present it must be "NOT"
*
* @param negation Must be either the empty/null String or "NOT"
*/
private static void checkNegation(String negation) throws HpoAnnotationModelException {
if (negation != null && !negation.isEmpty() && !negation.equals("NOT")) {
throw new HpoAnnotationModelException(String.format("Malformed negation entry: \"%s\"", negation));
}
}
private static void checkModifier(String modifierString, Ontology ontology) throws HpoAnnotationModelException {
if (modifierString == null || modifierString.isEmpty()) return; // OK, not required
// If something is present in this field, it must be in the form of
// HP:0000001;HP:0000002;...
TermId clinicalModifier = TermId.of("HP:0012823");
TermId temporalPattern = TermId.of("HP:0011008");
TermId paceOfProgression = TermId.of("HP:0003679");
String[] A = modifierString.split(";");
for (String a : A) {
try {
TermId tid = TermId.of(a);
Set ancs = ontology.getAncestorTermIds(tid);
if (!ancs.contains(clinicalModifier) && !ancs.contains(temporalPattern) &&
!ancs.contains(paceOfProgression)) {
throw new HpoAnnotationModelException(String.format("Use of wrong HPO term in modifier field: %s [%s]",
ontology.getTermMap().get(tid).getName(),
tid.getValue()));
}
} catch (PhenolRuntimeException e) {
throw new HpoAnnotationModelException(String.format("Malformed modifier term id: \"%s\"", a));
}
}
}
private static void checkPublication(String pub) throws HpoAnnotationModelException {
if (pub == null || pub.isEmpty()) {
throw new HpoAnnotationModelException("Empty citation string");
}
int index = pub.indexOf(":");
if (index <= 0) { // there needs to be a colon in the middle of the string
throw new HpoAnnotationModelException(String.format("Malformed citation id (not a CURIE): \"%s\"", pub));
}
if (pub.contains("::")) { // should only be one colon separating prefix and id
throw new HpoAnnotationModelException(String.format("Malformed citation id (double colon): \"%s\"", pub));
}
if (pub.contains(" ")) {
throw new HpoAnnotationModelException(String.format("Malformed citation id (contains space): \"%s\"", pub));
}
String prefix = pub.substring(0, index);
if (!VALID_CITATION_PREFIXES.contains(prefix)) {
throw new HpoAnnotationModelException(String.format("Did not recognize publication prefix: \"%s\" ", pub));
}
int len = pub.length();
if (len - index < 2) {
throw new HpoAnnotationModelException(String.format("Malformed publication string: \"%s\" ", pub));
}
}
private static void checkBiocuration(String entrylist) throws HpoAnnotationModelException {
if (entrylist == null || entrylist.isEmpty()) {
throw new HpoAnnotationModelException("empty biocuration entry");
}
String[] fields = entrylist.split(";");
for (String f : fields) {
Matcher matcher = biocurationPattern.matcher(f);
if (!matcher.find()) {
throw new HpoAnnotationModelException(String.format("Malformed biocuration entry: \"%s\"", f));
}
}
}
private String getAspect(TermId tid, Ontology ontology) throws HpoAnnotationModelException {
Term term = ontology.getTermMap().get(tid);
if (term == null) {
throw new HpoAnnotationModelException("Cannot compute Aspect of NULL term");
}
TermId primaryTid = term.id(); // update in case term is an alt_id
if (existsPath(ontology, primaryTid, HpoSubOntologyRootTermIds.PHENOTYPIC_ABNORMALITY)) {
return "P"; // organ/phenotype abnormality
} else if (existsPath(ontology, primaryTid, HpoModeOfInheritanceTermIds.INHERITANCE_ROOT)) {
return "I";
} else if (existsPath(ontology, primaryTid, HpoClinicalModifierTermIds.CLINICAL_COURSE)) {
return "C";
} else if (existsPath(ontology, primaryTid, HpoSubOntologyRootTermIds.CLINICAL_MODIFIER)) {
return "M";
} else if (primaryTid.equals(HpoSubOntologyRootTermIds.PHENOTYPIC_ABNORMALITY)) {
return "P"; // the Orphanet annotations include some entries to the phenotype root
} else if (primaryTid.equals(HpoModeOfInheritanceTermIds.INHERITANCE_ROOT)) {
return "I"; // the Orphanet annotations include some entries to the rrot
} else {
throw new HpoAnnotationModelException("Could not determine aspect of TermId " + tid.getValue());
}
}
public String toBigFileLine(Ontology ontology) throws HpoAnnotationModelException {
String[] elems = {
getDiseaseID(), //DB_Object_ID
getDiseaseName(), // DB_Name
getNegation(), // Qualifier
getPhenotypeId().getValue(), // HPO_ID
getPublication(), // DB_Reference
getEvidenceCode(), // Evidence_Code
getAgeOfOnsetId() != null ? getAgeOfOnsetId() : EMPTY_STRING, // Onset
getFrequencyModifier() != null ? getFrequencyModifier() : EMPTY_STRING, // Frequency
getSex(), // Sex
getModifier(), // Modifier
getAspect(getPhenotypeId(), ontology), // Aspect
getBiocuration() // Biocuration
};
return String.join("\t", elems);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy