de.charite.compbio.jannovar.data.JannovarData Maven / Gradle / Ivy
package de.charite.compbio.jannovar.data;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultimap;
import de.charite.compbio.jannovar.Immutable;
import de.charite.compbio.jannovar.impl.intervals.IntervalArray;
import de.charite.compbio.jannovar.reference.TranscriptIntervalEndExtractor;
import de.charite.compbio.jannovar.reference.TranscriptModel;
/**
* This data type is used for serialization after downloading.
*
* Making this class immutable makes it a convenient serializeable read-only database.
*
* @author Manuel Holtgrewe
*/
@Immutable
public final class JannovarData implements Serializable {
/** Serial version ID. */
private static final long serialVersionUID = 3L;
/** map from chromosome ID to {@link Chromosome} */
private final ImmutableMap chromosomes;
/** map from transcript accession to {@link TranscriptModel} instance. */
private final ImmutableMap tmByAccession;
/** map from transcript accession to {@link TranscriptModel} instance. */
private final ImmutableMultimap tmByGeneSymbol;
/** information about reference lengths and identities */
private final ReferenceDictionary refDict;
/**
* Initialize the object with the given values.
*
* @param refDict
* the {@link ReferenceDictionary} to use in this object
* @param transcriptInfos
* the list of {@link TranscriptModel} objects to use in this object
*/
public JannovarData(ReferenceDictionary refDict, ImmutableList transcriptInfos) {
this.refDict = refDict;
this.chromosomes = makeChromsomes(refDict, transcriptInfos);
this.tmByAccession = makeTMByAccession(transcriptInfos);
this.tmByGeneSymbol = makeTMByGeneSymbol(transcriptInfos);
}
/** @return map from chromosome ID to {@link Chromosome} */
public ImmutableMap getChromosomes() {
return chromosomes;
}
/** @return map from transcript accession to {@link TranscriptModel} instance. */
public ImmutableMap getTmByAccession() {
return tmByAccession;
}
/** @return map from transcript accession to {@link TranscriptModel} instance. */
public ImmutableMultimap getTmByGeneSymbol() {
return tmByGeneSymbol;
}
/** @return information about reference lengths and identities */
public ReferenceDictionary getRefDict() {
return refDict;
}
/**
* @param transcriptInfos
* set of {@link TranscriptModel}s to build multi-mapping for
* @return multi-mapping from gene symbol to {@link TranscriptModel}
*/
private static ImmutableMultimap makeTMByGeneSymbol(
ImmutableList transcriptInfos) {
ImmutableMultimap.Builder builder = new ImmutableMultimap.Builder();
for (TranscriptModel tm : transcriptInfos)
builder.put(tm.getGeneSymbol(), tm);
return builder.build();
}
/**
* @param transcriptInfos
* set of {@link TranscriptModel}s to build mapping for
* @return mapping from gene symbol to {@link TranscriptModel}
*/
private static ImmutableMap makeTMByAccession(
ImmutableList transcriptInfos) {
ImmutableMap.Builder builder = new ImmutableMap.Builder();
for (TranscriptModel tm : transcriptInfos)
builder.put(tm.getAccession(), tm);
return builder.build();
}
/**
* This function constructs a HashMap map of Chromosome objects in which the {@link TranscriptInfo}
* objects are entered into an {@link IntervalArray} for the appropriate Chromosome.
*
* @param refDict
* the {@link ReferenceDictionary} to use for the construction
* @param transcriptInfos
* list of {@link TranscriptInfo} objects with the transcripts of all chromosomes
* @return a mapping from numeric chromsome ID to {@link Chromosome} object
*/
private static ImmutableMap makeChromsomes(ReferenceDictionary refDict,
ImmutableList transcriptInfos) {
ImmutableMap.Builder builder = new ImmutableMap.Builder();
// First, factorize the TranscriptInfo objects by chromosome ID.
// create hash map for this
HashMap> transcripts = new HashMap>();
for (Integer chrID : refDict.getContigIDToName().keySet())
transcripts.put(chrID, new ArrayList());
// distribute TranscriptInfo lists
for (TranscriptModel transcript : transcriptInfos)
transcripts.get(transcript.getChr()).add(transcript);
// Then, construct an interval tree for each chromosome and add the lists of intervals.
for (Integer chrID : transcripts.keySet()) {
IntervalArray iTree = new IntervalArray(transcripts.get(chrID),
new TranscriptIntervalEndExtractor());
builder.put(chrID, new Chromosome(refDict, chrID, iTree));
}
return builder.build();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy