protobuf.opencb.evidence.proto Maven / Gradle / Ivy
The newest version!
syntax = "proto3";
package protobuf.opencb;
option java_package = "org.opencb.biodata.models.variant.protobuf";
option java_outer_classname = "EvidenceEntryProto";
option java_generate_equals_and_hash = true;
/**
The source of an evidence.
*/
message EvidenceSource {
/**
Name of source
*/
string name = 1;
/**
Version of source
*/
string version = 2;
/**
The source date.
*/
string date = 3;
}
/**
The submission information
*/
message EvidenceSubmission {
/**
The submitter
*/
string submitter = 1;
/**
The submission date
*/
string date = 2;
/**
The submission id
*/
string id = 3;
}
/**
The somatic information.
*/
message SomaticInformation {
/**
The primary site
*/
string primarySite = 1;
/**
The primary site subtype
*/
string siteSubtype = 2;
/**
The primary histology
*/
string primaryHistology = 3;
/**
The histology subtype
*/
string histologySubtype = 4;
/**
The tumour origin
*/
string tumourOrigin = 5;
/**
The sample source, e.g. blood-bone marrow, cell-line, pancreatic
*/
string sampleSource = 6;
}
/**
An enumeration for the different mode of inheritances:
* `monoallelic_not_imprinted`: MONOALLELIC, autosomal or pseudoautosomal, not imprinted
* `monoallelic_maternally_imprinted`: MONOALLELIC, autosomal or pseudoautosomal, maternally imprinted (paternal allele expressed)
* `monoallelic_paternally_imprinted`: MONOALLELIC, autosomal or pseudoautosomal, paternally imprinted (maternal allele expressed)
* `monoallelic`: MONOALLELIC, autosomal or pseudoautosomal, imprinted status unknown
* `biallelic`: BIALLELIC, autosomal or pseudoautosomal
* `monoallelic_and_biallelic`: BOTH monoallelic and biallelic, autosomal or pseudoautosomal
* `monoallelic_and_more_severe_biallelic`: BOTH monoallelic and biallelic, autosomal or pseudoautosomal (but BIALLELIC mutations cause a more SEVERE disease form), autosomal or pseudoautosomal
* `xlinked_biallelic`: X-LINKED: hemizygous mutation in males, biallelic mutations in females
* `xlinked_monoallelic`: X linked: hemizygous mutation in males, monoallelic mutations in females may cause disease (may be less severe, later onset than males)
* `mitochondrial`: MITOCHONDRIAL
* `unknown`: Unknown
* `NA`: Not applicable
*/
enum ModeOfInheritance {
monoallelic = 0;
monoallelic_not_imprinted = 1;
monoallelic_maternally_imprinted = 2;
monoallelic_paternally_imprinted = 3;
biallelic = 4;
monoallelic_and_biallelic = 5;
monoallelic_and_more_severe_biallelic = 6;
xlinked_biallelic = 7;
xlinked_monoallelic = 8;
mitochondrial = 9;
unknown = 10;
NA = 11;
}
/**
The entity representing a phenotype and its inheritance pattern.
*/
message HeritableTrait {
/**
The trait (e.g.: HPO term, MIM term, DO term etc.)
*/
string trait = 1;
/**
The mode of inheritance
*/
ModeOfInheritance inheritanceMode = 2;
}
/**
The feature types
*/
enum FeatureTypes {
regulatory_region = 0;
gene = 1;
transcript = 2;
protein = 3;
}
/**
The genomic feature
*/
message GenomicFeature {
/**
Feature Type
*/
FeatureTypes featureType = 1;
/**
Feature used, this should be a feature ID from Ensembl, (i.e, ENST00000544455)
*/
string ensemblId = 2;
/**
Others IDs. Fields like the HGNC symbol if available should be added here
*/
map xrefs = 3;
}
/**
Mendelian variants classification with ACMG terminology as defined in Richards, S. et al. (2015). Standards and
guidelines for the interpretation of sequence variants: a joint consensus recommendation of the American College
of Medical Genetics and Genomics and the Association for Molecular Pathology. Genetics in Medicine, 17(5),
405–423. https://doi.org/10.1038/gim.2015.30.
Classification for pharmacogenomic variants, variants associated to
disease and somatic variants based on the ACMG recommendations and ClinVar classification
(https://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/).
* `benign_variant` : Benign variants interpreted for Mendelian disorders
* `likely_benign_variant` : Likely benign variants interpreted for Mendelian disorders with a certainty of at least 90%
* `pathogenic_variant` : Pathogenic variants interpreted for Mendelian disorders
* `likely_pathogenic_variant` : Likely pathogenic variants interpreted for Mendelian disorders with a certainty of at
least 90%
* `uncertain_significance` : Uncertain significance variants interpreted for Mendelian disorders. Variants with
conflicting evidences should be classified as uncertain_significance
*/
enum ClinicalSignificance {
benign = 0;
likely_benign = 1;
VUS = 2;
likely_pathogenic = 3;
pathogenic = 4;
uncertain_significance = 5;
}
/**
Pharmacogenomics drug response variant classification
* `responsive` : A variant that confers response to a treatment
* `resistant` : A variant that confers resistance to a treatment
* `toxicity` : A variant that is associated with drug-induced toxicity
* `indication` : A variant that is required in order for a particular drug to be prescribed
* `contraindication` : A variant that if present, a particular drug should not be prescribed
* `dosing` : A variant that results in an alteration in dosing of a particular drug in order to achieve INR, reduce toxicity or increase efficacy
* `increased_monitoring` : increase vigilance or increased dosage monitoring may be required for a patient with this variant to look for signs of adverse drug reactions
* `efficacy` : a variant that affects the efficacy of the treatment
*/
enum DrugResponseClassification {
responsive = 0;
resistant = 1;
toxicity = 2;
indication = 3;
contraindication = 4;
dosing = 5;
increased_monitoring = 6;
efficacy = 7;
}
/**
Association of variants to a given trait.
* `established_risk_allele` : Established risk allele for variants associated to disease
* `likely_risk_allele` : Likely risk allele for variants associated to disease
* `uncertain_risk_allele` : Uncertain risk allele for variants associated to disease
* `protective` : Protective allele
*/
enum TraitAssociation {
established_risk_allele = 0;
likely_risk_allele = 1;
uncertain_risk_allele = 2;
protective = 3;
}
/**
Variant classification according to its relation to cancer aetiology.
* `driver` : Driver variants
* `passenger` : Passenger variants
* `modifier` : Modifier variants
*/
enum TumorigenesisClassification {
driver = 0;
passenger = 1;
modifier = 2;
}
/**
Evidence of pathogenicity and benign impact as defined in Richards, S. et al. (2015). Standards and guidelines for the interpretation
of sequence variants: a joint consensus recommendation of the American College of Medical Genetics and Genomics and
the Association for Molecular Pathology. Genetics in Medicine, 17(5), 405–423. https://doi.org/10.1038/gim.2015.30
Evidence of pathogenicity:
* `very_strong`:
- PVS1 null variant (nonsense, frameshift, canonical ±1 or 2 splice sites, initiation codon, single or multiexon
deletion) in a gene where LOF is a known mechanism of disease
* `strong`:
- PS1 Same amino acid change as a previously established pathogenic variant regardless of nucleotide change
- PS2 De novo (both maternity and paternity confirmed) in a patient with the disease and no family history
- PS3 Well-established in vitro or in vivo functional studies supportive of a damaging effect on the gene or gene
product
- PS4 The prevalence of the variant in affected individuals is significantly increased compared with the prevalence
in controls
* `moderate`:
- PM1 Located in a mutational hot spot and/or critical and well-established functional domain (e.g., active site of
an enzyme) without benign variation
- PM2 Absent from controls (or at extremely low frequency if recessive) in Exome Sequencing Project, 1000 Genomes
Project, or Exome Aggregation Consortium
- PM3 For recessive disorders, detected in trans with a pathogenic variant
- PM4 Protein length changes as a result of in-frame deletions/insertions in a nonrepeat region or stop-loss
variants
- PM5 Novel missense change at an amino acid residue where a different missense change determined to be pathogenic
has been seen before
- PM6 Assumed de novo, but without confirmation of paternity and maternity
* `supporting`:
- PP1 Cosegregation with disease in multiple affected family members in a gene definitively known to cause the
disease
- PP2 Missense variant in a gene that has a low rate of benign missense variation and in which missense variants are
a common mechanism of disease
- PP3 Multiple lines of computational evidence support a deleterious effect on the gene or gene product
(conservation, evolutionary, splicing impact, etc.)
- PP4 Patient’s phenotype or family history is highly specific for a disease with a single genetic etiology
- PP5 Reputable source recently reports variant as pathogenic, but the evidence is not available to the laboratory
to perform an independent evaluation
Evidence of benign impact:
* `stand_alone`:
- BA1 Allele frequency is >5% in Exome Sequencing Project, 1000 Genomes Project, or Exome Aggregation
Consortium
* `strong`:
- BS1 Allele frequency is greater than expected for disorder
- BS2 Observed in a healthy adult individual for a recessive (homozygous), dominant (heterozygous), or X-linked
(hemizygous) disorder, with full penetrance expected at an early age
- BS3 Well-established in vitro or in vivo functional studies show no damaging effect on protein function or
splicing
- BS4 Lack of segregation in affected members of a family
* `supporting`:
- BP1 Missense variant in a gene for which primarily truncating variants are known to cause disease
- BP2 Observed in trans with a pathogenic variant for a fully penetrant dominant gene/disorder or observed in cis
with a pathogenic variant in any inheritance pattern
- BP3 In-frame deletions/insertions in a repetitive region without a known function
- BP4 Multiple lines of computational evidence suggest no impact on gene or gene product (conservation,
evolutionary, splicing impact, etc.)
- BP5 Variant found in a case with an alternate molecular basis for disease
- BP6 Reputable source recently reports variant as benign, but the evidence is not available to the laboratory to
perform an independent evaluation
- BP7 A synonymous (silent) variant for which splicing prediction algorithms predict no impact to the splice
consensus sequence nor the creation of a new splice site AND the nucleotide is not highly conserved
*/
enum EvidenceImpact {
very_strong = 0;
strong = 1;
moderate = 2;
supporting = 3;
stand_alone = 4;
}
/**
This is the list of ethnics in ONS16
* `D`: Mixed: White and Black Caribbean
* `E`: Mixed: White and Black African
* `F`: Mixed: White and Asian
* `G`: Mixed: Any other mixed background
* `A`: White: British
* `B`: White: Irish
* `C`: White: Any other White background
* `L`: Asian or Asian British: Any other Asian background
* `M`: Black or Black British: Caribbean
* `N`: Black or Black British: African
* `H`: Asian or Asian British: Indian
* `J`: Asian or Asian British: Pakistani
* `K`: Asian or Asian British: Bangladeshi
* `P`: Black or Black British: Any other Black background
* `S`: Other Ethnic Groups: Any other ethnic group
* `R`: Other Ethnic Groups: Chinese
* `Z`: Not stated
*/
enum EthnicCategory {
D = 0;
E = 1;
F = 2;
G = 3;
A = 4;
B = 5;
C = 6;
L = 7;
M = 8;
N = 9;
H = 10;
J = 11;
K = 12;
P = 13;
S = 14;
R = 15;
Z = 16;
}
/**
Penetrance assumed in the analysis
*/
enum Penetrance {
complete = 0;
incomplete = 1;
}
/**
Variant effect with Sequence Ontology terms.
* `SO_0002052`: dominant_negative_variant (http://purl.obolibrary.org/obo/SO_0002052)
* `SO_0002053`: gain_of_function_variant (http://purl.obolibrary.org/obo/SO_0002053)
* `SO_0001773`: lethal_variant (http://purl.obolibrary.org/obo/SO_0001773)
* `SO_0002054`: loss_of_function_variant (http://purl.obolibrary.org/obo/SO_0002054)
* `SO_0001786`: loss_of_heterozygosity (http://purl.obolibrary.org/obo/SO_0001786)
* `SO_0002055`: null_variant (http://purl.obolibrary.org/obo/SO_0002055)
*/
enum VariantFunctionalEffect {
dominant_negative_variant = 0;
gain_of_function_variant = 1;
lethal_variant = 2;
loss_of_function_variant = 3;
loss_of_heterozygosity = 4;
null_variant = 5;
}
/**
Variant origin.
* `SO_0001781`: de novo variant. http://purl.obolibrary.org/obo/SO_0001781
* `SO_0001778`: germline variant. http://purl.obolibrary.org/obo/SO_0001778
* `SO_0001775`: maternal variant. http://purl.obolibrary.org/obo/SO_0001775
* `SO_0001776`: paternal variant. http://purl.obolibrary.org/obo/SO_0001776
* `SO_0001779`: pedigree specific variant. http://purl.obolibrary.org/obo/SO_0001779
* `SO_0001780`: population specific variant. http://purl.obolibrary.org/obo/SO_0001780
* `SO_0001777`: somatic variant. http://purl.obolibrary.org/obo/SO_0001777
*/
enum AlleleOrigin {
de_novo_variant = 0;
germline_variant = 1;
maternal_variant = 2;
paternal_variant = 3;
pedigree_specific_variant = 4;
population_specific_variant = 5;
somatic_variant = 6;
}
/**
Confidence based on the Confidence Information Ontology
* `CIO_0000029`: high confidence level http://purl.obolibrary.org/obo/CIO_0000029
* `CIO_0000031`: low confidence level http://purl.obolibrary.org/obo/CIO_0000031
* `CIO_0000030`: medium confidence level http://purl.obolibrary.org/obo/CIO_0000030
* `CIO_0000039`: rejected http://purl.obolibrary.org/obo/CIO_0000039
*/
enum Confidence {
low_confidence_level = 0; // CIO_0000031
medium_confidence_level = 1; // CIO_0000030
high_confidence_level = 2; // CIO_0000029
rejected = 3; // CIO_0000039
}
/**
The consistency of evidences for a given phenotype. This aggregates all evidences for a given phenotype and all
evidences with no phenotype associated (e.g.: in silico impact prediction, population frequency).
This is based on the Confidence Information Ontology terms.
* `CIO_0000033`: congruent, all evidences are consistent. http://purl.obolibrary.org/obo/CIO_0000033
* `CIO_0000034`: conflict, there are conflicting evidences. This should correspond to a `VariantClassification` of
`uncertain_significance` for mendelian disorders. http://purl.obolibrary.org/obo/CIO_0000034
* `CIO_0000035`: strongly conflicting. http://purl.obolibrary.org/obo/CIO_0000035
* `CIO_0000036`: weakly conflicting. http://purl.obolibrary.org/obo/CIO_0000036
*/
enum ConsistencyStatus {
congruent = 0; // CIO_0000033
conflict = 1; // CIO_0000034
weakly_conflicting = 2; // CIO_0000035
strongly_conflicting = 3; // CIO_0000036
}
/**
A property in the form of name-value pair.
Names are restricted to ontology ids, they should be checked against existing ontologies in resources like
Ontology Lookup Service.
*/
message Property {
/**
The ontology term id or accession in OBO format ${ONTOLOGY_ID}:${TERM_ID} (http://www.obofoundry.org/id-policy.html)
*/
string id = 1;
/**
The ontology term name
*/
string name = 2;
/**
Optional value for the ontology term, the type of the value is not checked
(i.e.: we could set the pvalue term to "significant" or to "0.0001")
*/
string value = 3;
}
/**
The variant classification according to different properties.
*/
message VariantClassification {
/**
The variant's clinical significance.
*/
ClinicalSignificance clinicalSignificance = 1;
/**
The variant's pharmacogenomics classification.
*/
DrugResponseClassification drugResponseClassification = 2;
/**
The variant's trait association.
*/
TraitAssociation traitAssociation = 3;
/**
The variant's tumorigenesis classification.
*/
TumorigenesisClassification tumorigenesisClassification = 4;
/**
The variant functional effect
*/
VariantFunctionalEffect functionalEffect = 5;
}
/**
An entry for an evidence
*/
message EvidenceEntry {
/**
Source of the evidence
*/
EvidenceSource source = 1;
/**
The list of submissions
*/
repeated EvidenceSubmission submissions = 2;
/**
The somatic information
*/
SomaticInformation somaticInformation = 3;
/**
URL of source if any
*/
string url = 4;
/**
ID of record in the source
*/
string id = 5;
/**
The reference genome assembly
*/
string assembly = 6;
/**
List of allele origins
*/
repeated AlleleOrigin alleleOrigin = 7;
/**
Heritable traits associated to this evidence
*/
repeated HeritableTrait heritableTraits = 8;
/**
The transcript to which the evidence refers
*/
repeated GenomicFeature genomicFeatures = 9;
/**
The variant classification
*/
VariantClassification variantClassification = 10;
/**
Impact of evidence. Should be coherent with the classification of impact if provided.
*/
EvidenceImpact impact = 11;
/**
The curation confidence.
*/
Confidence confidence = 12;
/**
The consistency status. This is applicable to complex evidences (e.g.: ClinVar)
*/
ConsistencyStatus consistencyStatus = 13;
/**
Ethnicity
*/
EthnicCategory ethnicity = 14;
/**
The penetrance of the phenotype for this genotype. Value in the range [0, 1]
*/
Penetrance penetrance = 15;
/**
Variable expressivity of a given phenotype for the same genotype
*/
bool variableExpressivity = 16;
/**
Evidence description
*/
string description = 17;
/**
A list of additional properties in the form name-value.
*/
repeated Property additionalProperties = 18;
/**
Bibliography
*/
repeated string bibliography = 19;
}