All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.charite.compbio.jannovar.data.JannovarData Maven / Gradle / Ivy

package de.charite.compbio.jannovar.data;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultimap;

import de.charite.compbio.jannovar.Immutable;
import de.charite.compbio.jannovar.impl.intervals.IntervalArray;
import de.charite.compbio.jannovar.reference.TranscriptIntervalEndExtractor;
import de.charite.compbio.jannovar.reference.TranscriptModel;

/**
 * This data type is used for serialization after downloading.
 *
 * Making this class immutable makes it a convenient serializeable read-only database.
 *
 * @author Manuel Holtgrewe
 */
@Immutable
public final class JannovarData implements Serializable {

	/** Serial version ID. */
	private static final long serialVersionUID = 3L;

	/** map from chromosome ID to {@link Chromosome} */
	private final ImmutableMap chromosomes;

	/** map from transcript accession to {@link TranscriptModel} instance. */
	private final ImmutableMap tmByAccession;

	/** map from transcript accession to {@link TranscriptModel} instance. */
	private final ImmutableMultimap tmByGeneSymbol;

	/** information about reference lengths and identities */
	private final ReferenceDictionary refDict;

	/**
	 * Initialize the object with the given values.
	 *
	 * @param refDict
	 *            the {@link ReferenceDictionary} to use in this object
	 * @param transcriptInfos
	 *            the list of {@link TranscriptModel} objects to use in this object
	 */
	public JannovarData(ReferenceDictionary refDict, ImmutableList transcriptInfos) {
		this.refDict = refDict;
		this.chromosomes = makeChromsomes(refDict, transcriptInfos);
		this.tmByAccession = makeTMByAccession(transcriptInfos);
		this.tmByGeneSymbol = makeTMByGeneSymbol(transcriptInfos);
	}

	/** @return map from chromosome ID to {@link Chromosome} */
	public ImmutableMap getChromosomes() {
		return chromosomes;
	}

	/** @return map from transcript accession to {@link TranscriptModel} instance. */
	public ImmutableMap getTmByAccession() {
		return tmByAccession;
	}

	/** @return map from transcript accession to {@link TranscriptModel} instance. */
	public ImmutableMultimap getTmByGeneSymbol() {
		return tmByGeneSymbol;
	}

	/** @return information about reference lengths and identities */
	public ReferenceDictionary getRefDict() {
		return refDict;
	}

	/**
	 * @param transcriptInfos
	 *            set of {@link TranscriptModel}s to build multi-mapping for
	 * @return multi-mapping from gene symbol to {@link TranscriptModel}
	 */
	private static ImmutableMultimap makeTMByGeneSymbol(
			ImmutableList transcriptInfos) {
		ImmutableMultimap.Builder builder = new ImmutableMultimap.Builder();
		for (TranscriptModel tm : transcriptInfos)
			builder.put(tm.getGeneSymbol(), tm);
		return builder.build();
	}

	/**
	 * @param transcriptInfos
	 *            set of {@link TranscriptModel}s to build mapping for
	 * @return mapping from gene symbol to {@link TranscriptModel}
	 */
	private static ImmutableMap makeTMByAccession(
			ImmutableList transcriptInfos) {
		ImmutableMap.Builder builder = new ImmutableMap.Builder();
		for (TranscriptModel tm : transcriptInfos)
			builder.put(tm.getAccession(), tm);
		return builder.build();
	}

	/**
	 * This function constructs a HashMap map of Chromosome objects in which the {@link TranscriptInfo}
	 * objects are entered into an {@link IntervalArray} for the appropriate Chromosome.
	 *
	 * @param refDict
	 *            the {@link ReferenceDictionary} to use for the construction
	 * @param transcriptInfos
	 *            list of {@link TranscriptInfo} objects with the transcripts of all chromosomes
	 * @return a mapping from numeric chromsome ID to {@link Chromosome} object
	 */
	private static ImmutableMap makeChromsomes(ReferenceDictionary refDict,
			ImmutableList transcriptInfos) {
		ImmutableMap.Builder builder = new ImmutableMap.Builder();

		// First, factorize the TranscriptInfo objects by chromosome ID.

		// create hash map for this
		HashMap> transcripts = new HashMap>();
		for (Integer chrID : refDict.getContigIDToName().keySet())
			transcripts.put(chrID, new ArrayList());
		// distribute TranscriptInfo lists
		for (TranscriptModel transcript : transcriptInfos)
			transcripts.get(transcript.getChr()).add(transcript);

		// Then, construct an interval tree for each chromosome and add the lists of intervals.
		for (Integer chrID : transcripts.keySet()) {
			IntervalArray iTree = new IntervalArray(transcripts.get(chrID),
					new TranscriptIntervalEndExtractor());
			builder.put(chrID, new Chromosome(refDict, chrID, iTree));
		}

		return builder.build();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy