org.snpeff.interval.Transcript Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.interval;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.snpeff.interval.codonChange.CodonChange;
import org.snpeff.serializer.MarkerSerializer;
import org.snpeff.snpEffect.Config;
import org.snpeff.snpEffect.EffectType;
import org.snpeff.snpEffect.VariantEffect.ErrorWarningType;
import org.snpeff.snpEffect.VariantEffects;
import org.snpeff.stats.ObservedOverExpectedCpG;
import org.snpeff.util.Gpr;
import org.snpeff.util.GprSeq;

/**
 * Codon position
 * @author pcingola
 */
class CodonPosition {

	public int codonNum = -1;
	public int codonIndex = -1;
}

/**
 * Interval for a transcript, as well as some other information: exons, utrs, cds, etc.
 *
 * @author pcingola
 */
public class Transcript extends IntervalAndSubIntervals {

	private static final long serialVersionUID = -2665025617916107311L;

	boolean aaCheck; // Has this transcript been checked against a protein sequence?
	boolean canonical; // Is this a canonical transcript?
	boolean corrected; // Have coordinates been corrected? (e.g. frame correction)
	boolean dnaCheck; // Has this transcript been checked against a CDS/cDNA sequence?
	boolean proteinCoding; // Is this a protein-coding transcript?
	boolean ribosomalSlippage; // Ribosomal slippage causes changes in reading frames. This might be represented as negative length introns (overlapping exons).
	int cdsStart, cdsEnd; // CDS start and end coordinates. Note: If the transcript is in reverse strand, then cdsStart > cdsEnd
	int spliceSiteSize, spliceRegionExonSize, spliceRegionIntronMin, spliceRegionIntronMax; // Splice sizes
	int upDownLength; // Upstream and downstream size
	BioType bioType; // Transcript biotype
	String cds; // Coding sequence
	String mRna; // mRna sequence (includes 5'UTR and 3'UTR)
	String protein; // Protein sequence
	String version = ""; // Transcript version
	List utrs; // UTRs
	List cdss; // CDS information
	List introns; // Intron markers
	Upstream upstream; // Upstream interval
	Downstream downstream; // Downstream interval
	Exon firstCodingExon; // First coding exon. I.e. where transcription start site (TSS) is.
	int cds2pos[], aa2pos[];
	TranscriptSupportLevel transcriptSupportLevel = null;

	public Transcript() {
		super();
		utrs = new ArrayList<>();
		cdss = new ArrayList<>();
		type = EffectType.TRANSCRIPT;
	}

	public Transcript(Gene gene, int start, int end, boolean strandMinus, String id) {
		super(gene, start, end, strandMinus, id);
		type = EffectType.TRANSCRIPT;
	}

	/**
	 * Calculate chromosome position as function of Amino Acid number
	 *
	 * @returns An array mapping 'pos[aaNumber] = chromosmalPos'
	 */
	public synchronized int[] aaNumber2Pos() {
		if (aa2pos != null) return aa2pos;

		calcCdsStartEnd();

		aa2pos = new int[protein().length()];
		for (int i = 0; i < aa2pos.length; i++)
			aa2pos[i] = -1;

		int cdsMin = Math.min(cdsStart, cdsEnd);
		int cdsMax = Math.max(cdsStart, cdsEnd);

		// For each exon, add CDS position to array
		int aaNum = 0;
		int step = isStrandPlus() ? 1 : -1;
		int codonFrame = 0;
		for (Exon exon : sortedStrand()) {
			int min = isStrandPlus() ? exon.getStart() : exon.getEnd();

			int aaIdxStart = -1, aaIdxEnd = -1;

			for (int pos = min; exon.intersects(pos) && aaNum < aa2pos.length; pos += step) {
				// Is this within a CDS?
				if ((cdsMin <= pos) && (pos <= cdsMax)) {
					// Update AA indexes for this exon
					if (aaIdxStart < 0) aaIdxStart = aaNum;
					aaIdxEnd = aaNum;

					// First codon base? Add to map
					if (codonFrame == 0) aa2pos[aaNum] = pos;

					// Last base in this codon? Increment AA number
					if (codonFrame == 2) aaNum++;

					// Update codon base
					codonFrame = (codonFrame + 1) % 3;
				}
			}

			// Update exons' AA indexes
			if (aaIdxStart >= 0) exon.setAaIdx(aaIdxStart, aaIdxEnd);
		}

		return aa2pos;
	}

	/**
	 * Find a genomic position of the first base in a Amino Acid 'aaNum'
	 */
	public int aaNumber2Pos(int aaNum) {
		aaNumber2Pos();

		return -1;
	}

	/**
	 * Add a CDS
	 */
	public void add(Cds cdsInt) {
		cdss.add(cdsInt);
		cds = null;
	}

	/**
	 * Add an intron
	 */
	public void add(Intron intron) {
		if (introns == null) introns = new ArrayList<>();
		introns.add(intron);

		// Introns should be sorted by strand
		if (isStrandPlus()) Collections.sort(introns);
		else Collections.sort(introns, Collections.reverseOrder());
	}

	/**
	 * Add a SpliceSite
	 */
	public void add(SpliceSite spliceSite) {
		for (Exon ex : this)
			if (ex.intersects(spliceSite)) ex.add(spliceSite);

		for (Intron intr : introns())
			if (intr.intersects(spliceSite)) intr.add(spliceSite);
	}

	/**
	 * Add a UTR
	 */
	public void add(Utr utr) {
		utrs.add(utr);
		cds = null;
	}

	/**
	 * Add missing UTRs. See utrFromCds() method.
	 */
	boolean addMissingUtrs(Markers missingUtrs, boolean verbose) {
		missingUtrs.sort(false, isStrandMinus());

		// Get min/max CDS positions
		int minCds = Integer.MAX_VALUE;
		int maxCds = 0;
		for (Cds c : cdss) {
			minCds = Math.min(minCds, c.getStart());
			maxCds = Math.max(maxCds, c.getEnd());
		}

		if (verbose) {
			System.out.println("Transcript '" + id + "' has missing UTRs." //
					+ " Strand: " + (strandMinus ? "-" : "+") //
					+ " (minCds: " + minCds //
					+ " , maxCds: " + maxCds + "):" //
			);
		}

		// Add intervals
		boolean retVal = false;
		for (Marker mu : missingUtrs) {
			Exon exon = queryExon(mu);
			if (exon == null) throw new RuntimeException("Cannot find exon for UTR: " + mu);
			Utr toAdd = null;

			if (isStrandPlus()) {
				if (mu.getEnd() <= minCds) toAdd = new Utr5prime(exon, mu.getStart(), mu.getEnd(), strandMinus, mu.getId());
				else if (mu.getStart() >= maxCds) toAdd = new Utr3prime(exon, mu.getStart(), mu.getEnd(), strandMinus, mu.getId());
			} else {
				if (mu.getStart() >= maxCds) toAdd = new Utr5prime(exon, mu.getStart(), mu.getEnd(), strandMinus, mu.getId());
				else if (mu.getEnd() <= minCds) toAdd = new Utr3prime(exon, mu.getStart(), mu.getEnd(), strandMinus, mu.getId());
			}

			// OK?
			if (toAdd != null) {
				add(toAdd);
				if (verbose) System.out.println("\tAdding " + toAdd);
				retVal = true;
			}
		}

		return retVal;
	}

	/**
	 * Adjust transcript coordinates
	 */
	public boolean adjust() {
		boolean changed = false;
		int strandSumTr = 0;
		int newStart = Integer.MAX_VALUE;
		int newEnd = Integer.MIN_VALUE;

		int countStrandPlus = 0, countStrandMinus = 0;
		for (Exon exon : sortedStrand()) {
			newStart = Math.min(newStart, exon.getStart());
			newEnd = Math.max(newEnd, exon.getEnd());

			// Common exon strand
			if (exon.isStrandPlus()) countStrandPlus++;
			else countStrandMinus++;
		}

		// UTRs
		for (Utr utr : getUtrs()) {
			newStart = Math.min(newStart, utr.getStart());
			newEnd = Math.max(newEnd, utr.getEnd());
		}

		// Sanity check
		strandSumTr = countStrandPlus - countStrandMinus; // Some exons have incorrect strands, we use the strand indicated by most exons
		boolean newStrandMinus = strandSumTr < 0;
		if ((countStrandPlus > 0) && (countStrandMinus > 0)) Gpr.debug("Transcript '" + id + "' has " + countStrandPlus + " exons on the plus and " + countStrandMinus + " exons on the minus strand! This should never happen!");

		// Change transcript strand?
		if (strandMinus != newStrandMinus) {
			changed = true;
			setStrandMinus(newStrandMinus); // Change strand
		}

		// Changed? Update values
		if (newStart < Integer.MAX_VALUE && newEnd > Integer.MIN_VALUE) {
			// Change start?
			if (start != newStart) {
				setStart(newStart);
				changed = true;
			}

			// Change end?
			if (end != newEnd) {
				setEnd(newEnd);
				changed = true;
			}
		}

		return changed;
	}

	/**
	 * Create a new transcript after applying changes in variant
	 *
	 * Note: If this transcript is unaffected, no new transcript is created (same transcript is returned)
	 */
	@Override
	public Transcript apply(Variant variant) {
		// Variant after this marker: No effect
		if (!shouldApply(variant)) return this;

		//---
		// Create new transcript
		//---
		Transcript newTr = (Transcript) super.apply(variant);
		if (newTr == null) return null;

		//---
		// Add changed UTRs
		//---
		for (Utr utr : utrs) {
			Utr newUtr = (Utr) utr.apply(variant);
			if (newUtr != null) {
				Exon newExon = newTr.findExon(newUtr);

				if (newExon != null) {
					newUtr.setParent(newExon);
					newTr.utrs.add(newUtr);
				} else {
					// This might happen when a duplication affecting part of an exon
					// E.g. If the duplication affects the coding part and NOT the 3'UTR then the UTR doesn't have a
					if (Config.get().isDebug()) Gpr.debug("WARNING: applying variant: Could not find 'new' parent exon for 'new' UTR" //
							+ "\n\t\tVariant           : " + variant //
							+ "\n" //
							+ "\n\t\tUTR        (ori) :" + utr //
							+ "\n" //
							+ "\n\t\tTranscript (ori) :" + this //
							+ "\n" //
							+ "\n\t\tUTR        (new) :" + newUtr //
							+ "\n" //
							+ "\n\t\tTranscript (new) :" + newTr //
					);
				}
			}
		}

		// Rank exons: if a variant deletes one or more exons, ranks will change
		newTr.rankExons();

		// Up & Down stream
		newTr.createUpDownStream(upDownLength);

		// Introns
		newTr.introns();

		// Splice sites
		newTr.createSpliceSites(spliceSiteSize, spliceRegionExonSize, spliceRegionIntronMin, spliceRegionIntronMax);

		return newTr;
	}

	/**
	 * Find base at genomic coordinate 'pos'
	 */
	public String baseAt(int pos) {
		calcCdsStartEnd();
		Exon ex = findExon(pos);
		if (ex == null) return null;
		return ex.basesAt(pos - ex.getStart(), 1);
	}

	/**
	 * Calculate distance from transcript start to a position
	 * mRNA is roughly the same than cDNA. Strictly speaking mRNA
	 * has a poly-A tail and 5'cap.
	 */
	public synchronized int baseNumber2MRnaPos(int pos) {
		int count = 0;
		for (Exon eint : sortedStrand()) {
			if (eint.intersects(pos)) {
				// Intersect this exon? Calculate the number of bases from the beginning
				int dist = 0;
				if (isStrandPlus()) dist = pos - eint.getStart();
				else dist = eint.getEnd() - pos;

				// Sanity check
				if (dist < 0) throw new RuntimeException("Negative distance for position " + pos + ". This should never happen!\n" + this);

				return count + dist;
			}

			count += eint.size();
		}
		return -1;
	}

	/**
	 * Calculate base number in a CDS where 'pos' maps
	 *
	 * @param usePrevBaseIntron: When 'pos' is intronic this method returns:
	 * 			- if( usePrevBaseIntron== false)  => The first base in the exon after 'pos' (i.e. first coding base after intron)
	 * 			- if( usePrevBaseIntron== true)   => The last base in the  exon before 'pos'  (i.e. last coding base before intron)
	 *
	 * @returns Base number or '-1' if it does not map to a coding base
	 */
	public synchronized int baseNumberCds(int pos, boolean usePrevBaseIntron) {
		// Doesn't hit this transcript?
		if (!intersects(pos)) return -1;

		// Is it in UTR instead of CDS?
		if (isUtr(pos)) return -1;

		// Calculate cdsStart and cdsEnd (if not already done)
		calcCdsStartEnd();

		// All exons..
		int firstCdsBaseInExon = 0; // Where the exon maps to the CDS (i.e. which CDS base number does the first base in this exon maps to).
		for (Exon eint : sortedStrand()) {
			if (eint.intersects(pos)) {
				int cdsBaseInExon; // cdsBaseInExon: base number relative to the beginning of the coding part of this exon (i.e. excluding 5'UTRs)
				if (isStrandPlus()) cdsBaseInExon = pos - Math.max(eint.getStart(), cdsStart);
				else cdsBaseInExon = Math.min(eint.getEnd(), cdsStart) - pos;

				cdsBaseInExon = Math.max(0, cdsBaseInExon);

				return firstCdsBaseInExon + cdsBaseInExon;
			} else {
				// Before exon begins?
				if ((isStrandPlus() && (pos < eint.getStart())) // Before exon begins (positive strand)?
						|| (isStrandMinus() && (pos > eint.getEnd()))) // Before exon begins (negative strand)?
					return firstCdsBaseInExon - (usePrevBaseIntron ? 1 : 0);
			}

			if (isStrandPlus()) firstCdsBaseInExon += Math.max(0, eint.getEnd() - Math.max(eint.getStart(), cdsStart) + 1);
			else firstCdsBaseInExon += Math.max(0, Math.min(cdsStart, eint.getEnd()) - eint.getStart() + 1);
		}

		return firstCdsBaseInExon - 1;
	}

	/**
	 * Return a codon that includes 'cdsBaseNumber'
	 */
	public String baseNumberCds2Codon(int cdsBaseNumber) {
		int codonNum = cdsBaseNumber / CodonChange.CODON_SIZE;
		int min = codonNum * CodonChange.CODON_SIZE;
		int max = codonNum * CodonChange.CODON_SIZE + CodonChange.CODON_SIZE;
		if ((min >= 0) && (max <= cds().length())) return cds().substring(min, max).toUpperCase();
		return null;
	}

	/**
	 * Calculate chromosome position as function of CDS number
	 *
	 * @returns An array mapping 'cds2pos[cdsBaseNumber] = chromosmalPos'
	 */
	public synchronized int[] baseNumberCds2Pos() {
		if (cds2pos != null) return cds2pos;

		calcCdsStartEnd();

		cds2pos = new int[cds().length()];
		for (int i = 0; i < cds2pos.length; i++)
			cds2pos[i] = -1;

		int cdsMin = Math.min(cdsStart, cdsEnd);
		int cdsMax = Math.max(cdsStart, cdsEnd);

		// For each exon, add CDS position to array
		int cdsBaseNum = 0;
		for (Exon exon : sortedStrand()) {
			int min = isStrandPlus() ? exon.getStart() : exon.getEnd();
			int step = isStrandPlus() ? 1 : -1;
			for (int pos = min; exon.intersects(pos) && cdsBaseNum < cds2pos.length; pos += step)
				if ((cdsMin <= pos) && (pos <= cdsMax)) cds2pos[cdsBaseNum++] = pos;
		}

		return cds2pos;
	}

	/**
	 * Calculate CDS start and CDS end
	 */
	synchronized void calcCdsStartEnd() {
		// Do we need to calculate these values?
		// Note: In circular genomes, one of cdsStart / cdsEnd might be less
		//       than zero (we must check both)
		if ((cdsStart < 0) && (cdsEnd < 0)) {
			// Calculate coding start (after 5 prime UTR)

			if (utrs.isEmpty()) {
				// No UTRs => Use all exons
				cdsStart = (isStrandPlus() ? end : start); // cdsStart is the position of the first base in the CDS (i.e. the first base after all 5'UTR)
				cdsEnd = (isStrandPlus() ? start : end); // cdsEnd is the position of the last base in the CDS (i.e. the first base before all 3'UTR)

				for (Exon ex : this) {
					if (isStrandPlus()) {
						cdsStart = Math.min(cdsStart, ex.getStart());
						cdsEnd = Math.max(cdsEnd, ex.getEnd());
					} else {
						cdsStart = Math.max(cdsStart, ex.getEnd());
						cdsEnd = Math.min(cdsEnd, ex.getStart());
					}
				}
			} else {
				// We have to take into account UTRs
				cdsStart = (isStrandPlus() ? start : end); // cdsStart is the position of the first base in the CDS (i.e. the first base after all 5'UTR)
				cdsEnd = (isStrandPlus() ? end : start); // cdsEnd is the position of the last base in the CDS (i.e. the first base before all 3'UTR)
				int cdsStartNotExon = cdsStart;

				for (Utr utr : utrs) {
					if (utr instanceof Utr5prime) {
						if (isStrandPlus()) cdsStart = Math.max(cdsStart, utr.getEnd() + 1);
						else cdsStart = Math.min(cdsStart, utr.getStart() - 1);
					} else if (utr instanceof Utr3prime) {
						if (isStrandPlus()) cdsEnd = Math.min(cdsEnd, utr.getStart() - 1);
						else cdsEnd = Math.max(cdsEnd, utr.getEnd() + 1);
					}
				}

				// Make sure cdsStart and cdsEnd lie within an exon
				if (isStrandPlus()) {
					cdsStart = firstExonPositionAfter(cdsStart);
					cdsEnd = lastExonPositionBefore(cdsEnd);
				} else {
					cdsStart = lastExonPositionBefore(cdsStart);
					cdsEnd = firstExonPositionAfter(cdsEnd);
				}

				// We were not able to find cdsStart & cdsEnd within exon limits.
				// Probably there is something wrong with the database and the transcript does
				// not have a single coding base (e.g. all of it is UTR).
				if (cdsStart < 0 || cdsEnd < 0) cdsStart = cdsEnd = cdsStartNotExon;
			}
		}
	}

	/**
	 * Retrieve coding sequence
	 */
	public synchronized String cds() {
		if (cds != null) return cds;

		// Concatenate all exons
		List exons = sortedStrand();
		StringBuilder sequence = new StringBuilder();
		int utr5len = 0, utr3len = 0;

		// 5 prime UTR length
		for (Utr utr : get5primeUtrs())
			utr5len += utr.size();

		// Append all exon sequences
		boolean missingSequence = false;
		for (Exon exon : exons) {
			missingSequence |= !exon.hasSequence(); // If there is no sequence, we are in trouble
			sequence.append(exon.getSequence());
		}

		if (missingSequence) cds = ""; // One or more exons does not have sequence. Nothing to do
		else {
			// OK, all exons have sequences

			// 3 prime UTR length
			for (Utr utr : get3primeUtrs())
				utr3len += utr.size();

			// Cut 5 prime UTR and 3 prime UTR points
			int subEnd = sequence.length() - utr3len;

			if (utr5len > subEnd) cds = "";
			else cds = sequence.substring(utr5len, subEnd);
		}

		return cds;
	}

	/**
	 * Create a marker of the coding region in this transcript
	 */
	public Marker cdsMarker() {
		return isStrandPlus() //
				? new Marker(this, getCdsStart(), getCdsEnd()) //
				: new Marker(this, getCdsEnd(), getCdsStart()) //
		;
	}

	@Override
	public Transcript cloneShallow() {
		Transcript clone = (Transcript) super.cloneShallow();

		clone.proteinCoding = proteinCoding;
		clone.canonical = canonical;
		clone.bioType = bioType;
		clone.aaCheck = aaCheck;
		clone.dnaCheck = dnaCheck;
		clone.corrected = corrected;
		clone.ribosomalSlippage = ribosomalSlippage;

		return clone;
	}

	/**
	 * Return an array of 3 genomic positions where amino acid number 'aaNum' maps
	 *
	 * @return aa2pos[0], aa2pos[1], aa2pos[2] are the coordinates (within the chromosome)
	 *         of the three bases conforming codon 'aaNum'. Any aa2pos[i] = -1 means that
	 *         it could a base in the codon could not be mapped.
	 */
	public int[] codonNumber2Pos(int codonNum) {
		if (cds2pos == null) baseNumberCds2Pos();

		// Initialize
		int codon[] = new int[3];
		int step = isStrandPlus() ? 1 : -1;
		int idxStart = isStrandPlus() ? 0 : 2;
		for (int i = idxStart, j = 3 * codonNum; (i < codon.length) && (i >= 0) && j < cds2pos.length; i += step, j++) {
			codon[i] = cds2pos[j];
		}

		return codon;
	}

	/**
	 * Collapses exons having gaps of zero (i.e. exons that followed by other exons).
	 * Does the same for CDSs and UTRs.
	 * @return true of any exon in the transcript was 'collapsed'
	 */
	public boolean collapseZeroGap() {
		if (ribosomalSlippage) return false; // Overlapping exons are representing ribosomal slippage, so they are not annotations errors and must not be corrected.

		boolean ret = false;
		introns = null; // These need to be recalculated

		//---
		// Collapse Exons
		//---
		Markers markers = new Markers();
		markers.addAll(subIntervals());
		Map collapse = MarkerUtil.collapseZeroGap(markers); // Create a map of collapsed exons

		// Replace exons
		for (Marker exon : collapse.keySet()) {
			Exon collapsedExon = (Exon) collapse.get(exon);

			// Is this exon to be replaced? (i.e. collapseZeroGap returns different coordinates)
			if (exon.size() != collapsedExon.size() //
					|| exon.getStart() != collapsedExon.getStart() //
					|| exon.getEnd() != collapsedExon.getEnd() //
			) {
				ret = true;

				// Show debugging information
				if (Config.get().isDebug()) System.err.println("\t\t\tTranscript " + getId() + ": Collapsing exon " + exon.getId() + "\t[ " + exon.getStart() + " - " + exon.getEnd() + " ]\t=>\t[ " + collapsedExon.getStart() + " - " + collapsedExon.getEnd() + " ]");

				// Replace exon
				remove((Exon) exon);
				if (!containsId(collapsedExon.getId())) add(collapsedExon); // Add collapsedExon. Make sure we don't add it twice (since many exons can be collapsed into one).

				// Change parent exon in UTRs
				for (Marker m : getUtrs()) {
					Utr utr = (Utr) m;
					if (utr.getParent() == exon) utr.setParent(collapsedExon);
				}
			}
		}

		//---
		// Collapse CDS
		//---
		collapse = MarkerUtil.collapseZeroGap(new Markers(cdss));
		cdss = new ArrayList<>(); // Re-create CDSs list
		Markers uniqCollapsedCds = new Markers(collapse.values()).unique(); // Create a set of unique CDSs and add them to CDSs list
		for (Marker cds : uniqCollapsedCds)
			cdss.add((Cds) cds);

		//---
		// Collapse UTRs
		//---
		collapse = MarkerUtil.collapseZeroGap(new Markers(utrs));
		Markers uniqCollapsedUtrs = new Markers(collapse.values()).unique(); // Create a set of unique UTRs, and add them to the list
		utrs = new ArrayList<>(); // Re-generate UTRs list
		for (Marker utr : uniqCollapsedUtrs)
			utrs.add((Utr) utr);

		return ret;
	}

	/**
	 * Calculate CpG bias: number of CpG / expected[CpG]
	 */
	public double cpgExonBias() {
		ObservedOverExpectedCpG oe = new ObservedOverExpectedCpG();
		return oe.oe(this);
	}

	/**
	 * Count total CpG in this transcript's exons
	 */
	public int cpgExons() {
		ObservedOverExpectedCpG oe = new ObservedOverExpectedCpG();
		return oe.observed(this);
	}

	/**
	 * Find all splice sites.
	 */
	public void createSpliceSites(int spliceSiteSize, int spliceRegionExonSize, int spliceRegionIntronMin, int spliceRegionIntronMax) {
		this.spliceSiteSize = spliceSiteSize;
		this.spliceRegionExonSize = spliceRegionExonSize;
		this.spliceRegionIntronMin = spliceRegionIntronMin;
		this.spliceRegionIntronMax = spliceRegionIntronMax;

		// Create spliceSiteRegion on the Exon side
		ArrayList exons = (ArrayList) sortedStrand();
		if (exons.size() > 0) {
			for (int i = 0; i < exons.size(); i++) { // Iterate like this to check rank
				Exon exon = exons.get(i);

				if (i > 0) exon.createSpliceSiteRegionStart(spliceRegionExonSize); // Splice site region at the start
				if (i < (exon.size() - 1)) exon.createSpliceSiteRegionEnd(spliceRegionExonSize); // Splice site region at the end

				// Sanity check
				int rank = i + 1;
				if (exon.getRank() != rank) {
					String msg = "Rank numbers do not march: " + rank + " != " + exon.getRank() + "\n\tTranscript: " + this;
					throw new RuntimeException(msg);
				}
			}
		}

		// Create spliceSite (donor/acceptor) and spliceSiteRegion on the Intron side
		List introns = introns();
		if (introns != null) {
			for (int i = 0; i < introns.size(); i++) {
				Intron intron = introns.get(i);
				intron.createSpliceSiteAcceptor(spliceSiteSize); // Acceptor splice site
				intron.createSpliceSiteDonor(spliceSiteSize); // Acceptor splice site

				// Splice region
				intron.createSpliceSiteRegionStart(spliceRegionIntronMin, spliceRegionIntronMax);
				intron.createSpliceSiteRegionEnd(spliceRegionIntronMin, spliceRegionIntronMax);
			}
		}
	}

	/**
	 * Creates a list of UP/DOWN stream regions (for each transcript)
	 * Upstream (downstream) stream is defined as upDownLength before (after) transcript
	 */
	public void createUpDownStream(int upDownLength) {
		this.upDownLength = upDownLength;

		Chromosome chr = getChromosome();
		int chrMin = chr.getStart(), chrMax = chr.getEnd();

		// Create up/down stream intervals and add them to the list
		int beforeStart = Math.max(start - upDownLength, chrMin);
		int beforeEnd = Math.max(start - 1, chrMin);
		int afterStart = Math.min(end + 1, chrMax);
		int afterEnd = Math.min(end + upDownLength, chrMax);

		if (isStrandPlus()) {
			if (beforeStart < beforeEnd) upstream = new Upstream(this, beforeStart, beforeEnd, false, id);
			if (afterStart < afterEnd) downstream = new Downstream(this, afterStart, afterEnd, false, id);
		} else {
			if (afterStart < afterEnd) upstream = new Upstream(this, afterStart, afterEnd, false, id);
			if (beforeStart < beforeEnd) downstream = new Downstream(this, beforeStart, beforeEnd, false, id);
		}
	}

	/**
	 * Deletes redundant exons (i.e. exons that are totally included in other exons).
	 * Does the same for CDSs.
	   Does the same for UTRs.
	 */
	public boolean deleteRedundant() {
		boolean ret = false;
		introns = null; // These need to be recalculated

		//---
		// Delete redundant exons
		//---
		Map includedIn = MarkerUtil.redundant(subIntervals());
		for (Marker exon : includedIn.keySet()) {
			ret = true;
			remove((Exon) exon);

			// Change parent exon in UTRs
			for (Marker m : getUtrs()) {
				Utr utr = (Utr) m;
				if (utr.getParent() == exon) utr.setParent(includedIn.get(exon));
			}
		}

		//---
		// Delete redundant CDS
		//---
		includedIn = MarkerUtil.redundant(cdss);
		for (Marker cds : includedIn.keySet())
			cdss.remove(cds);

		//---
		// Delete redundant UTRs
		//---
		includedIn = MarkerUtil.redundant(utrs);
		for (Marker utr : includedIn.keySet())
			utrs.remove(utr);

		return ret;
	}

	/**
	 * Find a CDS that matches exactly the exon
	 */
	public Cds findCds(Exon exon) {
		for (Cds cds : cdss)
			if (exon.includes(cds)) return cds;
		return null;
	}

	/**
	 * Return the an exon that intersects 'pos'
	 */
	public Exon findExon(int pos) {
		for (Exon exon : this)
			if (exon.intersects(pos)) return exon;
		return null;
	}

	/**
	 * Return an exon intersecting 'marker' (first exon found)
	 */
	public Exon findExon(Marker marker) {
		for (Exon exon : this)
			if (exon.intersects(marker)) return exon;
		return null;
	}

	/**
	 * Return an intron overlapping position 'pos'
	 */
	public Intron findIntron(int pos) {
		// Is 'pos' in intron?
		for (Intron intron : introns())
			if (intron.intersects(pos)) return intron;
		return null;
	}

	/**
	 * Return the UTR that hits position 'pos'
	 * @return An UTR intersecting 'pos' (null if not found)
	 */
	public Utr findUtr(int pos) {
		// Is it in UTR?
		for (Utr utr : utrs)
			if (utr.intersects(pos)) return utr;
		return null;
	}

	/**
	 * Return the UTR that intersects 'marker' (null if not found)
	 */
	public List findUtrs(Marker marker) {
		List utrs = new LinkedList<>();

		// Is it in UTR instead of CDS?
		for (Utr utr : utrs)
			if (utr.intersects(marker)) utrs.add(utr);

		return utrs.isEmpty() ? null : utrs;
	}

	/**
	 * Find the first position after 'pos' within an exon
	 */
	int firstExonPositionAfter(int pos) {
		for (Exon ex : sorted()) {
			if (pos <= ex.getStart()) return ex.getStart();
			if (pos <= ex.getEnd()) return pos;
		}

		System.err.println("WARNING: Cannot find first exonic position after " + pos + " for transcript '" + id + "'");
		return -1;
	}

	/**
	 * Correct exons based on frame information.
	 *
	 * E.g. if the frame information (form a genomic
	 * database file, such as a GTF) does not
	 * match the calculated frame, we correct exon's
	 * boundaries to make them match.
	 *
	 * This is performed in two stages:
	 *    i) First exon is corrected by adding a fake 5'UTR
	 *    ii) Other exons are corrected by changing the start (or end) coordinates.
	 */
	public synchronized boolean frameCorrection() {
		// Copy frame information form CDSs to Exons (if missing)
		frameFromCds();

		// First exon is corrected by adding a fake 5'UTR
		boolean changedFirst = frameCorrectionFirstCodingExon();

		// Other exons are corrected by changing the start (or end) coordinates.
		// boolean changedNonFirst = false;
		// Gpr.debug("UNCOMMENT!");
		boolean changedNonFirst = frameCorrectionNonFirstCodingExon();

		boolean changed = changedFirst || changedNonFirst;

		// We have to reset cached CDS data if anything changed
		if (changed) {
			resetCache();
			corrected = true;
		}

		// Return true if there was any adjustment
		return changed;
	}

	/**
	 * Fix transcripts having non-zero frames in first exon
	 *
	 * Transcripts whose first exon has a non-zero frame indicate problems.
	 * We add a 'fake' UTR5 to compensate for reading frame.
	 */
	synchronized boolean frameCorrectionFirstCodingExon() {
		List exons = sortedStrand();

		// No exons? Nothing to do
		if ((exons == null) || exons.isEmpty()) return false;

		Exon exonFirst = getFirstCodingExon(); // Get first exon
		// Exon exonFirst =  exons.get(0); // Get first exon
		if (exonFirst.getFrame() <= 0) return false; // Frame OK (or missing), nothing to do

		// First exon is not zero? => Create a UTR5 prime to compensate
		Utr5prime utr5 = null;
		int frame = exonFirst.getFrame();

		if (isStrandPlus()) {
			int end = exonFirst.getStart() + (frame - 1);
			utr5 = new Utr5prime(exonFirst, exonFirst.getStart(), end, isStrandMinus(), exonFirst.getId());
		} else {
			int start = exonFirst.getEnd() - (frame - 1);
			utr5 = new Utr5prime(exonFirst, start, exonFirst.getEnd(), isStrandMinus(), exonFirst.getId());
		}

		// Reset frame, since it was already corrected
		exonFirst.setFrame(0);
		Cds cds = findCds(exonFirst);
		if (cds != null) cds.frameCorrection(cds.getFrame());

		// Add UTR5'
		add(utr5);

		// Update counter
		return true;
	}

	/**
	 * Correct exons according to frame information
	 */
	synchronized boolean frameCorrectionNonFirstCodingExon() {
		boolean corrected = false;

		// Concatenate all exons to create a CDS
		List exons = sortedStrand();
		StringBuilder sequence = new StringBuilder();

		// We don't need to correct if there is no sequence! (the problem only exists due to sequence frame)
		for (Exon exon : exons)
			if (!exon.hasSequence()) return false;

		// 5'UTR length
		int utr5Start = Integer.MAX_VALUE, utr5End = -1;
		for (Utr utr : get5primeUtrs()) {
			utr.size();
			utr5Start = Math.min(utr5Start, utr.getStart());
			utr5End = Math.max(utr5End, utr.getEnd());
		}

		// Create UTR
		Marker utr5 = utr5End >= 0 ? new Marker(this, utr5Start, utr5End, strandMinus, "") : null;

		// Append all exon sequences
		for (Exon exon : exons) {
			String seq = "";
			int utrOverlap = 0;
			boolean utrIncluded = false;

			// Check if exon overlaps UTR
			if (utr5 != null && utr5.includes(exon)) {
				// The whole exon is included => No sequence change
				seq = "";
				utrIncluded = true;
			} else {
				// Add sequence
				seq = exon.getSequence();
				if (utr5 != null && utr5.intersects(exon)) {
					utrOverlap = utr5.intersectSize(exon);
					if (utrOverlap > 0) {
						if (utrOverlap < seq.length()) seq = seq.substring(utrOverlap);
						else seq = "";
					}
				}
			}

			//---
			// Frame check
			//---
			if (exon.getFrame() < 0) {
				// Nothing to do (assume current frame is right)
			} else if (utrIncluded) {
				// Exon does not code, frame information is meaningless
				if (exon.getFrame() >= 0) exon.setFrame(-1);
			} else {
				// Calculate frame
				// We use GFF style frame calculation
				// References: http://mblab.wustl.edu/GTF22.html
				int frameReal = FrameType.GFF.frameFromLength(sequence.length());

				// Does calculated frame match?
				if (frameReal != exon.getFrame()) {
					if (utrOverlap > 0) {
						throw new RuntimeException("Fatal Error: First exon needs correction: This should never happen!"//
								+ "\n\tThis method is supposed to be called AFTER method"//
								+ "\n\tSnpEffPredictorFactory.frameCorrectionFirstCodingExon(), which"//
								+ "\n\tshould have taken care of this problem." //
								+ "\n\t" + this //
						);
					} else {
						if (Config.get().isDebug()) {
							System.err.println("\t\tFrame correction: " //
									+ "Position " + toStr() //
									+ "Transcript '" + getId() + "'" //
									+ "\tExon rank " + exon.getRank() //
									+ "\tExpected frame: " + frameReal //
									+ "\tExon frame: " + exon.getFrame() //
									+ "\tSequence len: " + sequence.length() //
							);
						}
						// Find matching CDS
						Cds cdsToCorrect = findCds(exon);

						// Correct exon until we get the expected frame
						for (boolean ok = true; ok && frameReal != exon.getFrame();) {
							// Correct both Exon and CDS
							ok &= exon.frameCorrection(1);
							if (cdsToCorrect != null) cdsToCorrect.frameCorrection(1);
							corrected = true;
						}

						// Get new exon's sequence
						seq = exon.getSequence();
					}
				}
			}

			// Append sequence
			sequence.append(seq);
		}

		return corrected;
	}

	/**
	 * Copy frame info from CDSs into Exons
	 */
	void frameFromCds() {
		for (Exon ex : this) {
			// No frame info? => try to find matching CDS
			if (ex.getFrame() < 0) {
				// Check a CDS that matches an exon
				for (Cds cds : getCds()) {
					// CDS matches the exon coordinates? => Copy frame info
					if (isStrandPlus() && (ex.getStart() == cds.getStart())) {
						ex.setFrame(cds.getFrame());
						break;
					} else if (isStrandMinus() && (ex.getEnd() == cds.getEnd())) {
						ex.setFrame(cds.getFrame());
						break;
					}
				}
			}
		}

	}

	/**
	 * Create a list of 3 prime UTRs
	 */
	public List get3primeUtrs() {
		ArrayList list = new ArrayList<>();
		for (Utr utr : utrs)
			if (utr instanceof Utr3prime) list.add((Utr3prime) utr);
		return list;
	}

	public List get3primeUtrsSorted() {
		List list = get3primeUtrs();
		Collections.sort(list);
		return list;
	}

	/**
	 * Create a list of 5 prime UTRs
	 */
	public List get5primeUtrs() {
		ArrayList list = new ArrayList<>();
		for (Utr utr : utrs)
			if (utr instanceof Utr5prime) list.add((Utr5prime) utr);
		return list;
	}

	public List get5primeUtrsSorted() {
		List list = get5primeUtrs();
		Collections.sort(list);
		return list;
	}

	public BioType getBioType() {
		return bioType;
	}

	/**
	 * Get all CDSs
	 */
	public List getCds() {
		return cdss;
	}

	public int getCdsEnd() {
		calcCdsStartEnd();
		return cdsEnd;
	}

	public int getCdsStart() {
		calcCdsStartEnd();
		return cdsStart;
	}

	public Downstream getDownstream() {
		return downstream;
	}

	/**
	 * Get first coding exon
	 */
	public synchronized Exon getFirstCodingExon() {
		if (firstCodingExon == null) {
			// Get transcription start position
			long cstart = getCdsStart();

			// Pick exon intersecting cdsStart (TSS)
			for (Exon exon : sortedStrand())
				if (exon.intersects(cstart)) firstCodingExon = exon;

			// Sanity check
			if (firstCodingExon == null) throw new RuntimeException("Error: Cannot find first coding exon for transcript:\n" + this);
		}
		return firstCodingExon;
	}

	public TranscriptSupportLevel getTranscriptSupportLevel() {
		return transcriptSupportLevel;
	}

	/**
	 * Create a TSS marker
	 */
	public Marker getTss() {
		calcCdsStartEnd();
		Marker tss = new Marker(this, start + (isStrandPlus() ? 0 : -1), start + (isStrandPlus() ? 1 : 0), false, "TSS_" + id);
		return tss;
	}

	public Upstream getUpstream() {
		return upstream;
	}

	/**
	 * Get all UTRs
	 */
	public List getUtrs() {
		return utrs;
	}

	public String getVersion() {
		return version;
	}

	/**
	 * Does this transcript have any errors?
	 */
	public boolean hasError() {
		return isErrorProteinLength() || isErrorStartCodon() || isErrorStopCodonsInCds();
	}

	/**
	 * Does this transcript have any errors?
	 */
	public boolean hasErrorOrWarning() {
		return hasError() || hasWarning();
	}

	public boolean hasTranscriptSupportLevelInfo() {
		return (transcriptSupportLevel != null) && (transcriptSupportLevel != TranscriptSupportLevel.TSL_NA);
	}

	/**
	 * Does this transcript have any errors?
	 */
	public boolean hasWarning() {
		return isWarningStopCodon() // All possible warnings
		;
	}

	/**
	 * Get all introns (lazy init)
	 */
	public synchronized List introns() {
		if (introns == null) {
			introns = new ArrayList<>();

			Exon exBefore = null;
			for (Exon ex : sortedStrand()) {
				if (exBefore != null) {
					// Create intron
					Intron intron;
					int rank = introns.size() + 1;

					// Find intron start and end
					int start, end;
					if (isStrandPlus()) {
						start = exBefore.getEnd() + 1;
						end = ex.getStart() - 1;
					} else {
						start = ex.getEnd() + 1;
						end = exBefore.getStart() - 1;
					}

					int size = end - start + 1;
					if (size > 0) {
						// Add intron to list
						intron = new Intron(this, start, end, strandMinus, id + "_intron_" + rank, exBefore, ex);

						intron.setRank(rank);
						introns.add(intron);
					}
				}

				exBefore = ex;
			}
		}
		return introns;
	}

	public boolean isAaCheck() {
		return aaCheck;
	}

	@Override
	protected boolean isAdjustIfParentDoesNotInclude(Marker parent) {
		return true;
	}

	public boolean isCanonical() {
		return canonical;
	}

	/**
	 * Is this variant in the CDS part of this transcript?
	 */
	boolean isCds(Variant variant) {
		calcCdsStartEnd();

		int cs = cdsStart;
		int ce = cdsEnd;

		if (isStrandMinus()) {
			cs = cdsEnd;
			ce = cdsStart;
		}

		return (variant.getEnd() >= cs) && (variant.getStart() <= ce);
	}

	/**
	 * Has this transcript been checked against CDS/DNA/AA sequences?
	 */
	public boolean isChecked() {
		return aaCheck || dnaCheck;
	}

	public boolean isCorrected() {
		return corrected;
	}

	public boolean isDnaCheck() {
		return dnaCheck;
	}

	public boolean isDownstream(int pos) {
		return downstream != null && downstream.intersects(pos);
	}

	/**
	 * Check if coding length is multiple of 3 in protein coding transcripts
	 * @return true on Error
	 */
	public boolean isErrorProteinLength() {
		if (!Config.get().isTreatAllAsProteinCoding() && !isProteinCoding()) return false;
		return (cds().length() % 3) != 0;
	}

	/**
	 * Is the first codon a START codon?
	 */
	public boolean isErrorStartCodon() {
		if (!Config.get().isTreatAllAsProteinCoding() && !isProteinCoding()) return false;

		// Not even one codon in this protein? Error
		String cds = cds();
		if (cds.length() < 3) return true;

		String codon = cds.substring(0, 3);
		return !codonTable().isStart(codon);
	}

	/**
	 * Check if protein sequence has STOP codons in the middle of the coding sequence
	 * @return true on Error
	 */
	public boolean isErrorStopCodonsInCds() {
		if (!Config.get().isTreatAllAsProteinCoding() && !isProteinCoding()) return false;

		// Get protein sequence
		String prot = protein();
		if (prot == null) return false;

		// Any STOP codon before the end?
		char bases[] = prot.toCharArray();
		int max = bases.length - 1;
		int countErrs = 0;
		for (int i = 0; i < max; i++)
			if (bases[i] == '*') {
				countErrs++;
				// We allow up to one STOP codon because it can be a RARE_AMINO_ACID which is coded as a STOP codon.
				// More than one STOP codon is not "normal", so it's probably an error in the genomic annotations (e.g. ENSEMBL or UCSC)
				if (countErrs > 1) return true;
			}

		// OK
		return false;
	}

	public boolean isIntron(int pos) {
		return findIntron(pos) != null;
	}

	public boolean isProteinCoding() {
		return proteinCoding;
	}

	public boolean isRibosomalSlippage() {
		return ribosomalSlippage;
	}

	public boolean isUpstream(int pos) {
		return upstream != null && upstream.intersects(pos);
	}

	public boolean isUtr(int pos) {
		return findUtr(pos) != null;
	}

	public boolean isUtr(Marker marker) {
		return findUtrs(marker) != null;
	}

	public boolean isUtr3(int pos) {
		Utr utr = findUtr(pos);
		return utr != null && utr instanceof Utr3prime;
	}

	public boolean isUtr5(int pos) {
		Utr utr = findUtr(pos);
		return utr != null && utr instanceof Utr5prime;
	}

	/**
	 * Is the last codon a STOP codon?
	 */
	public boolean isWarningStopCodon() {
		if (!Config.get().isTreatAllAsProteinCoding() && !isProteinCoding()) return false;

		// Not even one codon in this protein? Error
		String cds = cds();
		if (cds.length() < 3) return true;

		String codon = cds.substring(cds.length() - 3);
		return !codonTable().isStop(codon);
	}

	/**
	 * Find the last position before 'pos' within an exon
	 */
	int lastExonPositionBefore(int pos) {
		int last = -1;
		for (Exon ex : sorted()) {
			if (pos < ex.getStart()) {
				// Nothing found?
				if (last < 0) {
					System.err.println("WARNING: Cannot find last exonic position before " + pos + " for transcript '" + id + "'");
					return -1;
				}
				return last;
			} else if (pos <= ex.getEnd()) return pos;
			last = ex.getEnd();
		}

		if (last < 0) System.err.println("WARNING: Cannot find last exonic position before " + pos + " for transcript '" + id + "'");
		return pos;
	}

	/**
	 * A list of all markers in this transcript
	 */
	@Override
	public Markers markers() {
		Markers markers = new Markers();
		markers.addAll(subIntervals());
		markers.addAll(utrs);
		markers.addAll(cdss);
		markers.add(upstream);
		markers.add(downstream);
		markers.addAll(introns());
		return markers;
	}

	/**
	 * Retrieve coding sequence AND the UTRs (mRNA = 5'UTR + CDS + 3'UTR)
	 * I.e. Concatenate all exon sequences
	 */
	public synchronized String mRna() {
		if (mRna != null) return mRna;

		List exons = sortedStrand();

		// Concatenate all exons
		StringBuilder sequence = new StringBuilder();
		for (Exon ex : exons)
			sequence.append(ex.getSequence());

		mRna = sequence.toString();
		return mRna;
	}

	/**
	 * Protein sequence (amino acid sequence produced by this transcripts)
	 */
	public String protein() {
		if (protein == null) {
			if (!(Config.get() != null && Config.get().isTreatAllAsProteinCoding()) && !isProteinCoding()) protein = "";
			else protein = codonTable().aa(cds(), true);
		}
		return protein;
	}

	/**
	 * Query all genomic regions that intersect 'marker'
	 */
	@Override
	public Markers query(Marker marker) {
		Set results = new HashSet<>();

		// Add exons
		for (Exon ex : this)
			if (ex.intersects(marker)) {
				results.add(ex);

				// Query deeper
				for (Marker ee : ex.query(marker))
					results.add(ee);
			}

		// Ad UTRs
		for (Utr u : utrs)
			if (u.intersects(marker)) results.add(u);

		// Add CDSs
		for (Cds m : cdss)
			if (m.intersects(marker)) results.add(m);

		// Add introns
		for (Intron intr : introns())
			if (intr.intersects(marker)) {
				results.add(intr);

				// Query deeper
				for (Marker ee : intr.query(marker))
					results.add(ee);
			}

		// Get results into markers
		Markers markers = new Markers();
		markers.addAll(results);

		return markers;
	}

	/**
	 * Return the first exon that intersects 'interval' (null if not found)
	 */
	public Exon queryExon(Marker interval) {
		for (Exon ei : this)
			if (ei.intersects(interval)) return ei;
		return null;
	}

	/**
	 * Assign ranks to exons
	 */
	public boolean rankExons() {
		boolean changed = false;
		int rank = 1;
		for (Exon exon : sortedStrand()) {
			if (rank != exon.getRank()) {
				exon.setRank(rank);
				changed = true;
			}
			rank++;
		}
		return changed;
	}

	@Override
	public void reset() {
		super.reset();

		utrs = new ArrayList<>();
		cdss = new ArrayList<>();
		introns = null;
		upstream = null;
		downstream = null;
		resetCache();
	}

	public void resetCache() {
		cdsStart = -1;
		cdsEnd = -1;
		firstCodingExon = null;
		cds = null;
		cds2pos = null;
		aa2pos = null;
		mRna = null;
		protein = null;
	}

	public void resetExons() {
		super.reset();
		resetCache();
	}

	/**
	 * Perfom some baseic chekcs, return error type, if any
	 */
	public ErrorWarningType sanityCheck(Variant variant) {
		if (isErrorStopCodonsInCds()) return ErrorWarningType.WARNING_TRANSCRIPT_MULTIPLE_STOP_CODONS;
		if (isErrorProteinLength()) return ErrorWarningType.WARNING_TRANSCRIPT_INCOMPLETE;
		if (isErrorStartCodon()) return ErrorWarningType.WARNING_TRANSCRIPT_NO_START_CODON;
		if (isWarningStopCodon()) return ErrorWarningType.WARNING_TRANSCRIPT_NO_STOP_CODON;
		return null;
	}

	/**
	 * Parse a line from a serialized file
	 */
	@Override
	public void serializeParse(MarkerSerializer markerSerializer) {
		super.serializeParse(markerSerializer);
		bioType = BioType.parse(markerSerializer.getNextField());
		proteinCoding = markerSerializer.getNextFieldBoolean();
		dnaCheck = markerSerializer.getNextFieldBoolean();
		aaCheck = markerSerializer.getNextFieldBoolean();
		corrected = markerSerializer.getNextFieldBoolean();
		ribosomalSlippage = markerSerializer.getNextFieldBoolean();
		transcriptSupportLevel = TranscriptSupportLevel.parse(markerSerializer.getNextField());
		version = markerSerializer.getNextField();

		upstream = (Upstream) markerSerializer.getNextFieldMarker();
		downstream = (Downstream) markerSerializer.getNextFieldMarker();

		for (Marker m : markerSerializer.getNextFieldMarkers())
			utrs.add((Utr) m);

		for (Marker m : markerSerializer.getNextFieldMarkers())
			cdss.add((Cds) m);
	}

	/**
	 * Create a string to serialize to a file
	 */
	@SuppressWarnings({ "unchecked", "rawtypes" })
	@Override
	public String serializeSave(MarkerSerializer markerSerializer) {
		return super.serializeSave(markerSerializer) //
				+ "\t" + bioType //
				+ "\t" + proteinCoding //
				+ "\t" + dnaCheck //
				+ "\t" + aaCheck //
				+ "\t" + corrected //
				+ "\t" + ribosomalSlippage //
				+ "\t" + (transcriptSupportLevel == null ? "" : transcriptSupportLevel.toString()) //
				+ "\t" + version //
				+ "\t" + markerSerializer.save(upstream) //
				+ "\t" + markerSerializer.save(downstream) //
				+ "\t" + markerSerializer.save((Iterable) utrs)//
				+ "\t" + markerSerializer.save((Iterable) cdss)//
		;
	}

	public void setAaCheck(boolean aaCheck) {
		this.aaCheck = aaCheck;
	}

	public void setBioType(BioType bioType) {
		this.bioType = bioType;
	}

	public void setCanonical(boolean canonical) {
		this.canonical = canonical;
	}

	public void setDnaCheck(boolean dnaCheck) {
		this.dnaCheck = dnaCheck;
	}

	public void setProteinCoding(boolean proteinCoding) {
		this.proteinCoding = proteinCoding;
	}

	public void setRibosomalSlippage(boolean ribosomalSlippage) {
		this.ribosomalSlippage = ribosomalSlippage;
	}

	public void setTranscriptSupportLevel(TranscriptSupportLevel transcriptSupportLevel) {
		this.transcriptSupportLevel = transcriptSupportLevel;
	}

	public void setVersion(String version) {
		this.version = version;
	}

	public void sortCds() {
		Collections.sort(cdss);
		resetCache();
	}

	public List spliceSites() {
		List sslist = new ArrayList<>();

		for (Exon ex : this)
			sslist.addAll(ex.getSpliceSites());

		for (Intron intr : introns())
			sslist.addAll(intr.getSpliceSites());

		return sslist;
	}

	@Override
	public String toString() {
		return toString(false);
	}

	public String toString(boolean full) {
		StringBuilder sb = new StringBuilder();

		sb.append(getChromosomeName() + ":" + start + "-" + end);
		sb.append(", strand: " + (isStrandPlus() ? "+" : "-"));
		if ((id != null) && (id.length() > 0)) sb.append(", id:" + id);
		if ((bioType != null) && (bioType != null)) sb.append(", bioType:" + bioType);
		if (isProteinCoding()) sb.append(", Protein");
		if (isAaCheck()) sb.append(", AA check");
		if (isDnaCheck()) sb.append(", DNA check");

		if (numChilds() > 0) {
			sb.append("\n");
			for (Utr utr : get5primeUtrsSorted())
				sb.append("\t\t5'UTR   :\t" + utr + "\n");

			sb.append("\t\tExons:\n");
			for (Exon exon : sorted())
				sb.append("\t\t" + exon + "\n");

			for (Utr utr : get3primeUtrsSorted())
				sb.append("\t\t3'UTR   :\t" + utr + "\n");

			// We may show CDS
			if (isProteinCoding()) {
				sb.append("\t\tCDS     :\t" + cds() + "\n");
				sb.append("\t\tProtein :\t" + protein() + "\n");
			}
		}

		if (full) {
			// Show errors or warnings
			String warn = "";
			if (isErrorStopCodonsInCds()) warn += ErrorWarningType.WARNING_TRANSCRIPT_MULTIPLE_STOP_CODONS + " ";
			if (isErrorProteinLength()) warn += ErrorWarningType.WARNING_TRANSCRIPT_INCOMPLETE + " ";
			if (isErrorStartCodon()) warn += ErrorWarningType.WARNING_TRANSCRIPT_NO_START_CODON + " ";
			if (isWarningStopCodon()) warn += ErrorWarningType.WARNING_TRANSCRIPT_NO_STOP_CODON + " ";
			if (!warn.isEmpty()) sb.append("\tWarnings  :" + warn);

			// Sequence checks
			sb.append("\t\tCDS check : " + (isDnaCheck() ? "OK" : "Failed (or missing)") + "\n");
			sb.append("\t\tAA check  : " + (isAaCheck() ? "OK" : "Failed (or missing)") + "\n");
		}

		return sb.toString();
	}

	/**
	 * Show a transcript as an ASCII Art
	 */
	public String toStringAsciiArt(boolean full) {

		//---
		// ASCII art for transcript
		//---
		char art[] = new char[size()];
		for (int i = start, j = 0; i <= end; i++, j++) {
			Utr utr = findUtr(i);
			if (utr != null) art[j] = utr.isUtr5prime() ? '5' : '3';
			else {
				Exon exon = findExon(i);
				if (exon != null) {
					art[j] = exon.isStrandPlus() ? '>' : '<';
				} else art[j] = '-';
			}
		}

		// Only 'basic' ASCII art?
		if (!full) return new String(art);

		//---
		// DNA Sequence
		//---
		StringBuilder seq = new StringBuilder();
		for (int i = start; i <= end; i++) {
			Exon exon = findExon(i);
			if (exon != null) {
				String s = exon.getSequence().toLowerCase();
				if (exon.isStrandMinus()) s = GprSeq.reverseWc(s);

				s = GprSeq.padN(s, exon.size());
				seq.append(s);
				i += s.length() - 1;
			} else seq.append('.');
		}

		//---
		// AA Sequence and frame
		//---
		StringBuilder aa = new StringBuilder();
		StringBuilder frameSb = new StringBuilder();
		StringBuilder pos1sb = new StringBuilder();
		StringBuilder pos10sb = new StringBuilder();
		StringBuilder pos100sb = new StringBuilder();
		StringBuilder pos1000sb = new StringBuilder();

		int pos = start;
		if (isProteinCoding()) {
			char codon[] = new char[3];
			int step = isStrandPlus() ? 1 : -1;
			int frame = 0;
			for (int i = (isStrandPlus() ? 0 : art.length - 1), j = 0; (i >= 0) && (i < art.length); i += step) {
				if (art[i] == '3' || art[i] == '5') {
					// 5'UTR or 3'UTR
					aa.append(' ');
					frameSb.append(' ');
				} else {
					char b = seq.charAt(i);
					if (b == 'a' || b == 'c' || b == 'g' || b == 't') {
						// Coding sequence
						codon[j++] = b;
						if (j >= 3) {
							j = 0;
							String cod = new String(codon);
							if (isStrandMinus()) cod = GprSeq.wc(cod); // Bases are already reversed, we only need WC complement

							boolean translateStart = (aa.length() <= 0); // First codon? Translate start coddon accordingly
							aa.append(" " + codonTable().aa(cod, translateStart) + " ");
						}

						// Update frame
						frameSb.append(frame);
						frame = (frame + 1) % 3;

					} else {
						// Intron
						aa.append(' ');
						frameSb.append(' ');
					}
				}

				// Position
				pos1sb.append(pos % 10);

				if (pos % 10 == 0) pos10sb.append((pos % 100) / 10);
				else pos10sb.append(' ');

				if (pos % 100 == 0) pos100sb.append((pos % 1000) / 100);
				else pos100sb.append(' ');

				if (pos % 1000 == 0) pos1000sb.append((pos % 10000) / 1000);
				else pos1000sb.append(' ');

				pos++;
			}
		}

		String aaStr = isStrandPlus() ? aa.toString() : aa.reverse().toString();
		String frameStr = isStrandPlus() ? frameSb.toString() : frameSb.reverse().toString();

		//---
		// Coordinates
		//---

		// Create 'vertical lines'
		StringBuilder lines = new StringBuilder();
		if (isStrandPlus()) lines.append(' ');
		int prev = start;
		for (Exon ex : this.sorted()) {
			lines.append(Gpr.repeat(' ', ex.getStart() - prev - 1) + "|");
			prev = ex.getStart();

			lines.append(Gpr.repeat(' ', ex.getEnd() - prev - 1) + "|");
			prev = ex.getEnd();
		}

		StringBuilder coords = new StringBuilder();
		coords.append(lines + "\n");

		ArrayList exons = new ArrayList<>();
		exons.addAll(subIntervals());
		Collections.sort(exons, new IntervalComparatorByStart(true)); // Sort by reverse position

		int n, len;
		for (Exon ex : exons) {
			// Right-side coordinate
			n = ex.getEnd();
			len = n - start + 1;
			coords.append((len > 0 ? lines.subSequence(0, len) : "") + "^" + n + "\n");

			// Left-side coordinate
			n = ex.getStart();
			len = n - start + 1;
			coords.append((len > 0 ? lines.subSequence(0, len) : "") + "^" + n + "\n");
		}

		// Result
		return "" //
				+ (pos1000sb.toString().trim().isEmpty() ? "" : "\n" + pos1000sb) //
				+ (pos100sb.toString().trim().isEmpty() ? "" : "\n" + pos100sb) //
				+ (pos10sb.toString().trim().isEmpty() ? "" : "\n" + pos10sb) //
				+ "\n" + pos1sb //
				+ "\n" + seq //
				+ (isProteinCoding() ? "\n" + aaStr + "\n" + frameStr : "") //
				+ "\n" + new String(art) //
				+ "\n" + coords //
		;
	}

	/**
	 * Calculate UTR regions from CDSs
	 */
	public boolean utrFromCds(boolean verbose) {
		if (cdss.size() <= 0) return false; // Cannot do this if we don't have CDS information

		// All exons minus all UTRs and CDS should give us the missing UTRs
		Markers exons = new Markers();
		Markers minus = new Markers();

		// Add all exons
		for (Exon e : this)
			exons.add(e);

		// Add all UTRs and CDSs to the 'minus' set
		for (Utr uint : getUtrs())
			minus.add(uint);

		for (Cds cint : cdss)
			minus.add(cint);

		Markers missingUtrs = exons.minus(minus); // Perform interval minus
		if (!missingUtrs.isEmpty()) return addMissingUtrs(missingUtrs, verbose); // Anything left? => There was a missing UTR
		return false;
	}

	/**
	 * Get some details about the effect on this transcript
	 */
	@Override
	public boolean variantEffect(Variant variant, VariantEffects variantEffects) {
		if (!intersects(variant)) return false; // Sanity check

		// Large structural variant including the whole transcript?
		if (variant.includes(this) && variant.isStructural()) {
			CodonChange codonChange = CodonChange.factory(variant, this, variantEffects);
			codonChange.codonChange();
			return true;
		}

		//---
		// Structural variants may affect more than one exon
		//---
		boolean mayAffectSeveralExons = variant.isStructural() || variant.isMixed() || variant.isMnp();
		if (mayAffectSeveralExons) {
			int countExon = 0;
			for (Exon ex : this)
				if (ex.intersects(variant)) countExon++;

			// More than one exon?
			if (countExon > 1) {
				CodonChange codonChange = CodonChange.factory(variant, this, variantEffects);
				codonChange.codonChange();
				return true;
			}
		}

		//---
		// Does it hit an exon?
		// Note: This only adds spliceSites effects, for detailed codon
		//       changes effects we use 'CodonChange' class
		//---
		boolean exonAnnotated = false;
		for (Exon ex : this)
			if (ex.intersects(variant)) {
				exonAnnotated |= ex.variantEffect(variant, variantEffects);
			}

		//---
		// Hits a UTR region?
		//---
		boolean included = false;
		for (Utr utr : utrs)
			if (utr.intersects(variant)) {
				// Calculate the effect
				utr.variantEffect(variant, variantEffects);
				included |= utr.includes(variant); // Is this variant fully included in the UTR?
			}
		if (included) return true; // Variant fully included in the UTR? => We are done.

		//---
		// Does it hit an intron?
		//---
		for (Intron intron : introns())
			if (intron.intersects(variant)) {
				intron.variantEffect(variant, variantEffects);
				included |= intron.includes(variant); // Is this variant fully included in this intron?
			}
		if (included) return true; // Variant fully included? => We are done.

		//---
		// No annotations from exons? => Add transcript
		//---
		if (!exonAnnotated) {
			variantEffects.add(variant, this, EffectType.TRANSCRIPT, "");
			return true;
		}

		return exonAnnotated;
	}

}