All Downloads are FREE. Search and download functionalities are using the official Maven repository.

avro.references.avdl Maven / Gradle / Ivy

There is a newer version: 0.6.0a5
Show newest version
@namespace("org.ga4gh.models")

/**
Defines types used by the GA4GH References API.
*/
protocol References {

import idl "common.avdl";

/**
A `Reference` is a canonical assembled contig, intended to act as a
reference coordinate space for other genomic annotations. A single
`Reference` might represent the human chromosome 1, for instance.

`Reference`s are designed to be immutable.
*/
record Reference {

  /**
  The reference ID. Unique within the repository.
  */
  string id;

  /** The length of this reference's sequence. */
  long length;

  /**
  The MD5 checksum uniquely representing this `Reference` as a lower-case
  hexadecimal string, calculated as the MD5 of the upper-case sequence
  excluding all whitespace characters (this is equivalent to SQ:M5 in SAM).
  */
  string md5checksum;

  /**
  The name of this reference. (e.g. '22').
  */
  string name;

  /**
  The URI from which the sequence was obtained. Specifies a FASTA format
  file/string with one name, sequence pair. In most cases, clients should call
  the `getReferenceBases()` method to obtain sequence bases for a `Reference`
  instead of attempting to retrieve this URI.
  */
  union { null, string } sourceURI = null;

  /**
  All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) which must include
  a version number, e.g. `GCF_000001405.26`.
  */
  array sourceAccessions;

  /**
  A sequence X is said to be derived from source sequence Y, if X and Y
  are of the same length and the per-base sequence divergence at A/C/G/T bases
  is sufficiently small. Two sequences derived from the same official
  sequence share the same coordinates and annotations, and
  can be replaced with the official sequence for certain use cases.
  */
  boolean isDerived = false;

  /**
  The `sourceDivergence` is the fraction of non-indel bases that do not match the
  reference this record was derived from.
  */
  union { null, float } sourceDivergence = null;

  /** ID from http://www.ncbi.nlm.nih.gov/taxonomy (e.g. 9606->human). */
  union { null, int } ncbiTaxonId = null;

}

/**
A `ReferenceSet` is a set of `Reference`s which typically comprise a
reference assembly, such as `GRCh38`. A `ReferenceSet` defines a common
coordinate space for comparing reference-aligned experimental data.
*/

record ReferenceSet {
  /** The reference set ID. Unique in the repository. */
  string id;

  /** The reference set name. */
  union { null, string } name = null;

  /**
  Order-independent MD5 checksum which identifies this `ReferenceSet`.

  To compute this checksum, make a list of `Reference.md5checksum` for all
  `Reference`s in this set. Then sort that list, and take the MD5 hash of
  all the strings concatenated together. Express the hash as a lower-case
  hexadecimal string.
  */
  string md5checksum;

  /**
  ID from http://www.ncbi.nlm.nih.gov/taxonomy (e.g. 9606->human) indicating
  the species which this assembly is intended to model. Note that contained
  `Reference`s may specify a different `ncbiTaxonId`, as assemblies may
  contain reference sequences which do not belong to the modeled species, e.g.
  EBV in a human reference genome.
  */
  union { null, int } ncbiTaxonId = null;

  /** Optional free text description of this reference set. */
  union { null, string } description = null;

  // next information about the source of the sequences

  /** Public id of this reference set, such as `GRCh37`. */
  union { null, string } assemblyId = null;

  /** Specifies a FASTA format file/string. */
  union { null, string } sourceURI = null;

  /**
  All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
  with a version number, e.g. `NC_000001.11`.
  */
  array sourceAccessions;

  /**
  A reference set may be derived from a source if it contains
  additional sequences, or some of the sequences within it are derived
  (see the definition of `isDerived` in `Reference`).
  */
  boolean isDerived = false;
}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy