All Downloads are FREE. Search and download functionalities are using the official Maven repository.

avro.variant.avdl Maven / Gradle / Ivy

The newest version!

@namespace("org.opencb.biodata.models.variant.avro")

protocol Variants {

    import idl "variantAnnotation.avdl";

    /**
     * Type of variation, which depends mostly on its length.
     * 
    *
  • SNVs involve a single nucleotide, without changes in length
  • *
  • MNVs involve multiple nucleotides, without changes in length
  • *
  • Indels are insertions or deletions of less than SV_THRESHOLD (50) nucleotides
  • *
  • Structural variations are large changes of more than SV_THRESHOLD nucleotides
  • *
  • Copy-number variations alter the number of copies of a region
  • *
*/ enum VariantType { SNV, // SO:0001483 MNV, // SO:0002007 INDEL, // SO:1000032 SV, // SO:0001537 INSERTION, // SO:0000667 DELETION, // SO:0000159 TRANSLOCATION, // SO:0000199 INVERSION, // SO:1000036 COPY_NUMBER, // SO:0001019 COPY_NUMBER_GAIN, // SO:0001742 COPY_NUMBER_LOSS, // SO:0001743 DUPLICATION, // SO:1000035 TANDEM_DUPLICATION, // SO:1000173 BREAKEND, NO_VARIATION, // Defined in HTSJDK SYMBOLIC, // Defined in HTSJDK MIXED, // Defined in HTSJDK SNP, // @Deprecated MNP, // @Deprecated CNV // @Deprecated } record VariantStats { /** * Unique cohort identifier within the study. **/ string cohortId; /** * Count of samples with non-missing genotypes in this variant from the cohort. * This value is used as denominator for genotypeFreq. **/ union { null, int } sampleCount; /** * Count of files with samples from the cohort that reported this variant. * This value is used as denominator for filterFreq. **/ union { null, int } fileCount; /** * Total number of alleles in called genotypes. It does not include missing alleles. * This value is used as denominator for refAlleleFreq and altAlleleFreq. **/ union { null, int } alleleCount; /** * Number of reference alleles found in this variant. **/ union { null, int } refAlleleCount; /** * Number of main alternate alleles found in this variants. It does not include secondary alternates. **/ union { null, int } altAlleleCount; /** * Reference allele frequency calculated from refAlleleCount and alleleCount, in the range [0,1] **/ union { null, float } refAlleleFreq; /** * Alternate allele frequency calculated from altAlleleCount and alleleCount, in the range [0,1] **/ union { null, float } altAlleleFreq; /** * Number of missing alleles. **/ union { null, int } missingAlleleCount; /** * Number of genotypes with all alleles missing (e.g. ./.). It does not count partially missing genotypes like "./0" or "./1". **/ union { null, int } missingGenotypeCount; /** * Number of occurrences for each genotype. * This does not include genotype with all alleles missing (e.g. ./.), but it includes partially missing genotypes like "./0" or "./1". * Total sum of counts should be equal to the count of samples. **/ map genotypeCount = {}; /** * Genotype frequency for each genotype found calculated from the genotypeCount and samplesCount, in the range [0,1] * The sum of frequencies should be 1. **/ map genotypeFreq = {}; /** * The number of occurrences for each FILTER value in files from samples in this cohort reporting this variant. * As each file can contain more than one filter value (usually separated by ';'), the total sum of counts could be greater than the count of files. **/ map filterCount; /** * Frequency of each filter calculated from the filterCount and filesCount, in the range [0,1] **/ map filterFreq; /** * The number of files from samples in this cohort reporting this variant with valid QUAL values. * This value is used as denominator to obtain the qualityAvg. */ union { null, int } qualityCount; /** * The average Quality value for files with valid QUAL values from samples in this cohort reporting this variant. * Some files may not have defined the QUAL value, so the sampling could be less than the filesCount. **/ union { null, float } qualityAvg; /** * Minor allele frequency. Frequency of the less common allele between the reference and the main alternate alleles. * This value does not take into acconunt secondary alternates. **/ union { null, float } maf; /** * Minor genotype frequency. Frequency of the less common genotype seen in this variant. * This value takes into account all values from the genotypeFreq map. **/ union { null, float } mgf; /** * Allele with minor frequency. **/ union { null, string } mafAllele; /** * Genotype with minor frequency. **/ union { null, string } mgfGenotype; } record VariantScore { /** * Variant score ID. */ string id; /** * Main cohort used for calculating the score. */ string cohort1; /** * Optional secondary cohort used for calculating the score. */ union { null, string } cohort2 = null; /** * Score value */ float score; /** * Score p value */ union { null, float } pValue = null; } record OriginalCall { /** * Original variant ID before normalization including all secondary alternates. */ string variantId; /** * Alternate allele index of the original multi-allellic variant call in which was decomposed. */ union {null, int} alleleIndex; } record FileEntry { /** * Unique identifier of the source file. */ union { null, string } fileId; /** * Original call position for the variant, if the file was normalized. * * {position}:{reference}:{alternate}(,{other_alternate})*:{allele_index} */ union { null, OriginalCall } call; /** * Optional data that probably depend on the format of the file the * variant was initially read from. */ map data; } record AlternateCoordinate { union { null, string } chromosome; /** * First position 1-based of the alternate. If null, the start is the same of the variant. */ union { null, int } start; /** * End position 1-based of the alternate. If null, the end is the same of the variant. */ union { null, int } end; /** * Reference allele. If null, the reference is the same of the variant. */ union { null, string } reference; /** * Alternate allele. */ string alternate; VariantType type; } record SampleEntry { union { null, string } sampleId; union { null, int } fileIndex; array data; } enum IssueType { DUPLICATION, DISCREPANCY, MENDELIAN_ERROR, DE_NOVO, COMPOUND_HETEROZYGOUS } record IssueEntry { IssueType type; SampleEntry sample; map data; } record StudyEntry { /** * Unique identifier of the study. */ union { null, string } studyId; /** * List of files from the study where the variant was present. */ array files = []; /** * Alternate alleles that appear along with a variant alternate. */ union { null, array } secondaryAlternates = null; /** * Fields stored for each sample. */ array sampleDataKeys; /** * Genotypes and other sample-related information. Each position is related * with one sample. The content are lists of values in the same order than the * sampleDataKeys array. The length of this lists must be the same as the sampleDataKeys field. */ array samples; array issues = []; /** * Statistics of the genomic variation, such as its alleles/genotype count * or its minimum allele frequency, grouped by cohort name. */ array stats; array scores = []; } // /** // * Confidence interval around a position for imprecise variants // */ // record ConfidenceInterval { // int right; //// int behind; // int left; //// int forward; // } /** * @Deprecated, use VariantType instead */ @javaAnnotation("Deprecated") enum StructuralVariantType { COPY_NUMBER_GAIN, // SO:0001742 COPY_NUMBER_LOSS, // SO:0001743 TANDEM_DUPLICATION // SO:1000173 } /** * SE | (Start -> End) | s | t[p[ | piece extending to the right of p is joined after t * SS | (Start -> Start) | s | t]p] | reverse comp piece extending left of p is joined after t * ES | (End -> Start) | s | ]p]t | piece extending to the left of p is joined before t * EE | (End -> End) | s | [p[t | reverse comp piece extending right of p is joined before t */ enum BreakendOrientation { SE, SS, ES, EE } record BreakendMate { union { null, string } chromosome; union { null, int } position; union { null, int } ciPositionLeft; union { null, int } ciPositionRight; } record Breakend { union { null, BreakendMate } mate; union { null, BreakendOrientation} orientation; union { null, string } insSeq; } record StructuralVariation { union {null, int} ciStartLeft; union {null, int} ciStartRight; union {null, int} ciEndLeft; union {null, int} ciEndRight; /** * Number of copies for CNV variants. */ union {null, int} copyNumber; /** * Inserted sequence for long INS **/ union { null, string } leftSvInsSeq; union { null, string } rightSvInsSeq; /** * @deprecated */ union {null, StructuralVariantType} @javaAnnotation("Deprecated") type; union { null, Breakend } breakend = null; } record VariantAvro { /** * The variant ID. */ union { null, string } id; /** * Other names used for this genomic variation. */ array names = []; /** * Chromosome where the genomic variation occurred. */ string chromosome; /** * Normalized position where the genomic variation starts. *
    *
  • SNVs have the same start and end position
  • *
  • Insertions start in the last present position: if the first nucleotide * is inserted in position 6, the start is position 5
  • *
  • Deletions start in the first previously present position: if the first * deleted nucleotide is in position 6, the start is position 6
  • *
*/ int start; /** * Normalized position where the genomic variation ends. *
    *
  • SNVs have the same start and end positions
  • *
  • Insertions end in the first present position: if the last nucleotide * is inserted in position 9, the end is position 10
  • *
  • Deletions ends in the last previously present position: if the last * deleted nucleotide is in position 9, the end is position 9
  • *
*/ int end; /** * Reference allele. */ string reference; /** * Alternate allele. */ string alternate; /** * Reference strand for this variant */ union { null, string } strand = null; /** * Information regarding Structural Variants */ union { null, StructuralVariation } sv = null; /** * Length of the genomic variation, which depends on the variation type. *
    *
  • SNVs have a length of 1 nucleotide
  • *
  • Indels have the length of the largest allele
  • *
*/ int length; /** * Type of variation: single nucleotide, indel or structural variation. */ VariantType type; /** * Information specific to each study the variant was read from, such as * samples or statistics. */ array studies; /** * Annotations of the genomic variation. */ union { null, VariantAnnotation } annotation = null; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy