
org.snpeff.interval.Genes Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.interval;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.snpeff.snpEffect.Config;
import org.snpeff.util.Timer;
/**
* A collection of genes (marker intervals)
* Note: It is assumed that all genes belong to the same genome
*
* @author pcingola
*/
public class Genes implements Iterable, Serializable {
private static final long serialVersionUID = 9022385501946879197L;
public boolean debug = false;
Genome genome;
HashMap genesById;
public Genes(Genome genome) {
genesById = new HashMap<>();
this.genome = genome;
}
/**
* Add a gene interval to this collection
*/
public void add(Gene gene) {
genesById.put(gene.getId(), gene);
}
/**
* In a circular genome, a gene can have negative coordinates or crosses
* over chromosome end. These genes are mirrored to the opposite end of
* the chromosome so that they can be referenced by both circular
* coordinates.
*/
public void createCircularGenes() {
List newGenes = new LinkedList<>();
// Check if any gene spans across chromosome limits
for (Gene g : genome.getGenes()) {
if (g.isCircular()) {
newGenes.add(g.circularClone());
}
}
// Add all newly created genes
if (!newGenes.isEmpty()) {
for (Gene g : newGenes)
genome.getGenes().add(g);
if (Config.get().isVerbose()) Timer.showStdErr("Total: " + newGenes.size() + " added as circular mirrored genes (appended '" + Gene.CIRCULAR_GENE_ID + "' to IDs).");
}
}
/**
* Creates a list of Intergenic regions
*/
public List createIntergenic() {
ArrayList intergenics = new ArrayList<>(genesById.size());
// Create a list of genes sorted by position
ArrayList genesSorted = new ArrayList<>(genesById.size());
genesSorted.addAll(genesById.values());
Collections.sort(genesSorted);
// For each gene, transcript
Gene genePrev = null;
Chromosome chrPrev = null;
for (Gene gene : genesSorted) {
// Chromosome change? Invalidate genePrev
if (chrPrev != gene.getChromosome()) {
// Add last intergenic region from previous chromosome
if (chrPrev != null && genePrev != null) {
Intergenic intergenic = Intergenic.createIntergenic(genePrev, null);
if (intergenic != null) intergenics.add(intergenic);
}
genePrev = null;
}
// Intergenic region's [start, end] interval
int start = (genePrev != null ? genePrev.getEnd() + 1 : 0);
int end = gene.getStart() - 1;
// Valid intergenic region?
if (start < end) {
Intergenic intergenic = Intergenic.createIntergenic(genePrev, gene);
if (intergenic != null) intergenics.add(intergenic);
}
// Is it null or ends before this one? update 'genePrev'
if ((genePrev == null) || (gene.getEnd() > genePrev.getEnd())) genePrev = gene;
// Update chrPrev
chrPrev = gene.getChromosome();
}
// Add intergenic region for last gene in the list
if (genePrev != null) {
Intergenic intergenic = Intergenic.createIntergenic(genePrev, null);
if (intergenic != null) intergenics.add(intergenic);
}
return intergenics;
}
/**
* Create splice sites.
*
* @param createIfMissing : If true, create canonical splice sites if they are missing.
*
* For a definition of splice site, see comments at the beginning of SpliceSite.java
*/
public void createSpliceSites(int spliceSiteSize, int spliceRegionExonSize, int spliceRegionIntronMin, int spliceRegionIntronMax) {
// For each gene, transcript
for (Gene gene : this)
for (Transcript tr : gene)
tr.createSpliceSites(spliceSiteSize, spliceRegionExonSize, spliceRegionIntronMin, spliceRegionIntronMax);
}
/**
* Creates a list of UP/DOWN stream regions (for each transcript)
* Upstream (downstream) stream is defined as upDownLength before (after) transcript
*
* Note: If upDownLength <=0 no interval is created
*/
public List createUpDownStream(int upDownLength) {
ArrayList list = new ArrayList<>();
if (upDownLength <= 0) return list;
// For each gene, transcript
for (Gene gene : this) {
for (Transcript tr : gene) {
tr.createUpDownStream(upDownLength);
list.add(tr.getUpstream());
list.add(tr.getDownstream());
}
}
return list;
}
/**
* Find a transcript by ID
*/
public Transcript findTranscript(String trId) {
for (Gene g : this)
for (Transcript tr : g)
if (tr.getId().equals(trId)) return tr;
return null;
}
/**
* Obtain a gene interval
*/
public Gene get(String geneId) {
return genesById.get(geneId);
}
/**
* Obtain a gene by GeneName
* WARNING: The first match is returned. If multiple genes share the
* same gene name, no order can be expected for this method.
*/
public Gene getGeneByName(String geneName) {
for (Gene g : this)
if (g.getGeneName().equals(geneName)) return g;
return null;
}
@Override
public Iterator iterator() {
return genesById.values().iterator();
}
public int size() {
return genesById.size();
}
public Collection sorted() {
ArrayList genes = new ArrayList<>();
genes.addAll(genesById.values());
Collections.sort(genes, new IntervalComparatorByStart());
return genes;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (Gene gint : this)
sb.append(gint + "\n");
return sb.toString();
}
public Collection values() {
return genesById.values();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy