avro.variant.avdl Maven / Gradle / Ivy
The newest version!
@namespace("org.opencb.biodata.models.variant.avro")
protocol Variants {
import idl "variantAnnotation.avdl";
/**
* Type of variation, which depends mostly on its length.
*
* - SNVs involve a single nucleotide, without changes in length
* - MNVs involve multiple nucleotides, without changes in length
* - Indels are insertions or deletions of less than SV_THRESHOLD (50) nucleotides
* - Structural variations are large changes of more than SV_THRESHOLD nucleotides
* - Copy-number variations alter the number of copies of a region
*
*/
enum VariantType {
SNV, // SO:0001483
MNV, // SO:0002007
INDEL, // SO:1000032
SV, // SO:0001537
INSERTION, // SO:0000667
DELETION, // SO:0000159
TRANSLOCATION, // SO:0000199
INVERSION, // SO:1000036
COPY_NUMBER, // SO:0001019
COPY_NUMBER_GAIN, // SO:0001742
COPY_NUMBER_LOSS, // SO:0001743
DUPLICATION, // SO:1000035
TANDEM_DUPLICATION, // SO:1000173
BREAKEND,
NO_VARIATION, // Defined in HTSJDK
SYMBOLIC, // Defined in HTSJDK
MIXED, // Defined in HTSJDK
SNP, // @Deprecated
MNP, // @Deprecated
CNV // @Deprecated
}
record VariantStats {
/**
* Unique cohort identifier within the study.
**/
string cohortId;
/**
* Count of samples with non-missing genotypes in this variant from the cohort.
* This value is used as denominator for genotypeFreq.
**/
union { null, int } sampleCount;
/**
* Count of files with samples from the cohort that reported this variant.
* This value is used as denominator for filterFreq.
**/
union { null, int } fileCount;
/**
* Total number of alleles in called genotypes. It does not include missing alleles.
* This value is used as denominator for refAlleleFreq and altAlleleFreq.
**/
union { null, int } alleleCount;
/**
* Number of reference alleles found in this variant.
**/
union { null, int } refAlleleCount;
/**
* Number of main alternate alleles found in this variants. It does not include secondary alternates.
**/
union { null, int } altAlleleCount;
/**
* Reference allele frequency calculated from refAlleleCount and alleleCount, in the range [0,1]
**/
union { null, float } refAlleleFreq;
/**
* Alternate allele frequency calculated from altAlleleCount and alleleCount, in the range [0,1]
**/
union { null, float } altAlleleFreq;
/**
* Number of missing alleles.
**/
union { null, int } missingAlleleCount;
/**
* Number of genotypes with all alleles missing (e.g. ./.). It does not count partially missing genotypes like "./0" or "./1".
**/
union { null, int } missingGenotypeCount;
/**
* Number of occurrences for each genotype.
* This does not include genotype with all alleles missing (e.g. ./.), but it includes partially missing genotypes like "./0" or "./1".
* Total sum of counts should be equal to the count of samples.
**/
map genotypeCount = {};
/**
* Genotype frequency for each genotype found calculated from the genotypeCount and samplesCount, in the range [0,1]
* The sum of frequencies should be 1.
**/
map genotypeFreq = {};
/**
* The number of occurrences for each FILTER value in files from samples in this cohort reporting this variant.
* As each file can contain more than one filter value (usually separated by ';'), the total sum of counts could be greater than the count of files.
**/
map filterCount;
/**
* Frequency of each filter calculated from the filterCount and filesCount, in the range [0,1]
**/
map filterFreq;
/**
* The number of files from samples in this cohort reporting this variant with valid QUAL values.
* This value is used as denominator to obtain the qualityAvg.
*/
union { null, int } qualityCount;
/**
* The average Quality value for files with valid QUAL values from samples in this cohort reporting this variant.
* Some files may not have defined the QUAL value, so the sampling could be less than the filesCount.
**/
union { null, float } qualityAvg;
/**
* Minor allele frequency. Frequency of the less common allele between the reference and the main alternate alleles.
* This value does not take into acconunt secondary alternates.
**/
union { null, float } maf;
/**
* Minor genotype frequency. Frequency of the less common genotype seen in this variant.
* This value takes into account all values from the genotypeFreq map.
**/
union { null, float } mgf;
/**
* Allele with minor frequency.
**/
union { null, string } mafAllele;
/**
* Genotype with minor frequency.
**/
union { null, string } mgfGenotype;
}
record VariantScore {
/**
* Variant score ID.
*/
string id;
/**
* Main cohort used for calculating the score.
*/
string cohort1;
/**
* Optional secondary cohort used for calculating the score.
*/
union { null, string } cohort2 = null;
/**
* Score value
*/
float score;
/**
* Score p value
*/
union { null, float } pValue = null;
}
record OriginalCall {
/**
* Original variant ID before normalization including all secondary alternates.
*/
string variantId;
/**
* Alternate allele index of the original multi-allellic variant call in which was decomposed.
*/
union {null, int} alleleIndex;
}
record FileEntry {
/**
* Unique identifier of the source file.
*/
union { null, string } fileId;
/**
* Original call position for the variant, if the file was normalized.
*
* {position}:{reference}:{alternate}(,{other_alternate})*:{allele_index}
*/
union { null, OriginalCall } call;
/**
* Optional data that probably depend on the format of the file the
* variant was initially read from.
*/
map data;
}
record AlternateCoordinate {
union { null, string } chromosome;
/**
* First position 1-based of the alternate. If null, the start is the same of the variant.
*/
union { null, int } start;
/**
* End position 1-based of the alternate. If null, the end is the same of the variant.
*/
union { null, int } end;
/**
* Reference allele. If null, the reference is the same of the variant.
*/
union { null, string } reference;
/**
* Alternate allele.
*/
string alternate;
VariantType type;
}
record SampleEntry {
union { null, string } sampleId;
union { null, int } fileIndex;
array data;
}
enum IssueType {
DUPLICATION,
DISCREPANCY,
MENDELIAN_ERROR,
DE_NOVO,
COMPOUND_HETEROZYGOUS
}
record IssueEntry {
IssueType type;
SampleEntry sample;
map data;
}
record StudyEntry {
/**
* Unique identifier of the study.
*/
union { null, string } studyId;
/**
* List of files from the study where the variant was present.
*/
array files = [];
/**
* Alternate alleles that appear along with a variant alternate.
*/
union { null, array } secondaryAlternates = null;
/**
* Fields stored for each sample.
*/
array sampleDataKeys;
/**
* Genotypes and other sample-related information. Each position is related
* with one sample. The content are lists of values in the same order than the
* sampleDataKeys array. The length of this lists must be the same as the sampleDataKeys field.
*/
array samples;
array issues = [];
/**
* Statistics of the genomic variation, such as its alleles/genotype count
* or its minimum allele frequency, grouped by cohort name.
*/
array stats;
array scores = [];
}
// /**
// * Confidence interval around a position for imprecise variants
// */
// record ConfidenceInterval {
// int right;
//// int behind;
// int left;
//// int forward;
// }
/**
* @Deprecated, use VariantType instead
*/
@javaAnnotation("Deprecated")
enum StructuralVariantType {
COPY_NUMBER_GAIN, // SO:0001742
COPY_NUMBER_LOSS, // SO:0001743
TANDEM_DUPLICATION // SO:1000173
}
/**
* SE | (Start -> End) | s | t[p[ | piece extending to the right of p is joined after t
* SS | (Start -> Start) | s | t]p] | reverse comp piece extending left of p is joined after t
* ES | (End -> Start) | s | ]p]t | piece extending to the left of p is joined before t
* EE | (End -> End) | s | [p[t | reverse comp piece extending right of p is joined before t
*/
enum BreakendOrientation {
SE,
SS,
ES,
EE
}
record BreakendMate {
union { null, string } chromosome;
union { null, int } position;
union { null, int } ciPositionLeft;
union { null, int } ciPositionRight;
}
record Breakend {
union { null, BreakendMate } mate;
union { null, BreakendOrientation} orientation;
union { null, string } insSeq;
}
record StructuralVariation {
union {null, int} ciStartLeft;
union {null, int} ciStartRight;
union {null, int} ciEndLeft;
union {null, int} ciEndRight;
/**
* Number of copies for CNV variants.
*/
union {null, int} copyNumber;
/**
* Inserted sequence for long INS
**/
union { null, string } leftSvInsSeq;
union { null, string } rightSvInsSeq;
/**
* @deprecated
*/
union {null, StructuralVariantType} @javaAnnotation("Deprecated") type;
union { null, Breakend } breakend = null;
}
record VariantAvro {
/**
* The variant ID.
*/
union { null, string } id;
/**
* Other names used for this genomic variation.
*/
array names = [];
/**
* Chromosome where the genomic variation occurred.
*/
string chromosome;
/**
* Normalized position where the genomic variation starts.
*
* - SNVs have the same start and end position
* - Insertions start in the last present position: if the first nucleotide
* is inserted in position 6, the start is position 5
* - Deletions start in the first previously present position: if the first
* deleted nucleotide is in position 6, the start is position 6
*
*/
int start;
/**
* Normalized position where the genomic variation ends.
*
* - SNVs have the same start and end positions
* - Insertions end in the first present position: if the last nucleotide
* is inserted in position 9, the end is position 10
* - Deletions ends in the last previously present position: if the last
* deleted nucleotide is in position 9, the end is position 9
*
*/
int end;
/**
* Reference allele.
*/
string reference;
/**
* Alternate allele.
*/
string alternate;
/**
* Reference strand for this variant
*/
union { null, string } strand = null;
/**
* Information regarding Structural Variants
*/
union { null, StructuralVariation } sv = null;
/**
* Length of the genomic variation, which depends on the variation type.
*
* - SNVs have a length of 1 nucleotide
* - Indels have the length of the largest allele
*
*/
int length;
/**
* Type of variation: single nucleotide, indel or structural variation.
*/
VariantType type;
/**
* Information specific to each study the variant was read from, such as
* samples or statistics.
*/
array studies;
/**
* Annotations of the genomic variation.
*/
union { null, VariantAnnotation } annotation = null;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy