
org.broadinstitute.hellbender.utils.samples.SampleDB Maven / Gradle / Ivy
The newest version!
package org.broadinstitute.hellbender.utils.samples;
import org.broadinstitute.hellbender.engine.GATKPath;
import java.util.*;
/**
* Simple database for managing samples
*/
public final class SampleDB {
/**
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
* this is stored as a HashMap.
*/
private final Map samples = new LinkedHashMap<>();
/**
* Package private for use by SampleDBBuilder .
*/
SampleDB() {}
public static SampleDB createSampleDBFromPedigree(final GATKPath pedigreeFile, final PedigreeValidationType strictness) {
return createSampleDBFromPedigreeAndDataSources(pedigreeFile, null, strictness);
}
public static SampleDB createSampleDBFromPedigree(final GATKPath pedigreeFile) {
return createSampleDBFromPedigree(pedigreeFile, PedigreeValidationType.STRICT);
}
public static SampleDB createSampleDBFromPedigreeAndDataSources(final GATKPath pedigreeFile, final Collection samples, final PedigreeValidationType strictness) {
final SampleDBBuilder sampleDBBuilder = new SampleDBBuilder(strictness);
if (pedigreeFile != null) {
sampleDBBuilder.addSamplesFromPedigreeFiles(Collections.singletonList(pedigreeFile));
}
if (samples != null) {
sampleDBBuilder.addSamplesFromSampleNames(samples);
}
return sampleDBBuilder.getFinalSampleDB();
}
/**
* Protected function to add a single sample to the database
*
* @param newSample to be added
*/
protected SampleDB addSample(final Sample newSample) {
Sample updatedSample = newSample;
Sample prevSample = samples.get(newSample.getID());
if (prevSample != null) {
updatedSample = prevSample.mergeSamples(newSample);
}
samples.put(newSample.getID(), updatedSample);
return this;
}
// --------------------------------------------------------------------------------
//
// Functions for getting a sample from the DB
//
// --------------------------------------------------------------------------------
/**
* Get a sample by its ID
* If an alias is passed in, return the main sample object
* @param id
* @return sample Object with this ID, or null if this does not exist
*/
public Sample getSample(String id) {
return samples.get(id);
}
// --------------------------------------------------------------------------------
//
// Functions for accessing samples in the DB
//
// --------------------------------------------------------------------------------
/**
* Get number of sample objects
* @return size of samples map
*/
public int sampleCount() {
return samples.size();
}
public Set getSamples() {
return new LinkedHashSet<>(samples.values());
}
// --------------------------------------------------------------------------------
//
// Higher level pedigree functions
//
// --------------------------------------------------------------------------------
/**
* Returns a sorted set of the family IDs in all samples
* @return Sorted set of the family IDs in all samples (excluding null ids)
*/
public final Set getFamilyIDs() {
return getFamilies().keySet();
}
/**
* Returns a map from family ID -> set of family members.
* @return Map from family ID -> set of family members for all samples with non-null family ids
*/
public final Map> getFamilies() {
return getFamilies(null);
}
/**
* @param sampleIds - all samples to include. If null is passed then all samples are returned.
* @return Map from family ID -> set of family members for all samples in sampleIds with
* non-null family ids
*/
public final Map> getFamilies(Collection sampleIds) {
final Map> families = new TreeMap<>();
for (final Sample sample : samples.values()) {
if (sampleIds == null || sampleIds.contains(sample.getID())) {
final String famID = sample.getFamilyID();
if (famID != null) {
if (!families.containsKey(famID)) {
families.put(famID, new TreeSet<>());
}
families.get(famID).add(sample);
}
}
}
return families;
}
/**
* Return all samples with a given family ID
* @param familyId
* @return Set of all samples with the given family id.
*/
public Set getFamily(String familyId) {
return getFamilies().get(familyId);
}
/**
* Returns all the trios present in the sample database. The strictOneChild parameter determines
* whether multiple children of the same parents resolve to multiple trios, or are excluded
* @param strictOneChild - exclude pedigrees with >1 child for parental pair
* @return - all of the mother+father=child triplets, subject to strictOneChild
*/
public Set getTrios(final boolean strictOneChild) {
Set trioSet = new LinkedHashSet<>();
for ( final String familyString : getFamilyIDs() ) {
final Set family = getFamily(familyString);
for ( final Sample sample : family) {
if ( getParents(sample).size() == 2 ) {
final Trio trio = new Trio(getSample(sample.getMaternalID()), getSample(sample.getPaternalID()), sample);
trioSet.add(trio);
}
}
}
if ( strictOneChild ) {
trioSet = removeTriosWithSameParents(trioSet);
}
return trioSet;
}
/**
* Returns all the trios present in the db. See getTrios(boolean strictOneChild)
* @return all the trios present in the samples db.
*/
public final Set getTrios() {
return getTrios(false);
}
/**
* Subsets a set of trios to only those with nonmatching founders. If two (or more) trio objects have
* the same mother and father, then both (all) are removed from the returned set.
* @param trios - a set of Trio objects
* @return those subset of Trio objects in the input set with nonmatching founders
*/
private Set removeTriosWithSameParents(final Set trios) {
final Set filteredTrios = new LinkedHashSet<>();
filteredTrios.addAll(trios);
final Set triosWithSameParents = new LinkedHashSet<>();
for ( final Trio referenceTrio : filteredTrios ) {
for ( final Trio compareTrio : filteredTrios ) {
if ( referenceTrio != compareTrio &&
referenceTrio.getFather().equals(compareTrio.getFather()) &&
referenceTrio.getMother().equals(compareTrio.getMother()) ) {
triosWithSameParents.add(referenceTrio);
triosWithSameParents.add(compareTrio);
}
}
}
filteredTrios.removeAll(triosWithSameParents);
return filteredTrios;
}
public Set getFounderIds(){
Set founders = new LinkedHashSet<>();
for (Sample sample : getSamples()) {
if (getParents(sample).size() < 1) {
founders.add(sample.getID());
}
}
return founders;
}
/**
* Get the sample's mother
* @param offSpring child of mother to return
* @return sample object with relationship mother, if exists, or null
*/
public Sample getMother(Sample offSpring) {
String maternalID = offSpring.getMaternalID();
return null == maternalID ? null : samples.get(maternalID);
}
/**
* Get the sample's father
* @param offSpring child of father to return
* @return sample object with relationship father, if exists, or null
*/
public Sample getFather(Sample offSpring) {
String paternalID = offSpring.getPaternalID();
return null == paternalID ? null : samples.get(paternalID);
}
/**
* Get the sample's father and mother
* @param offSpring child of parents to return
* @return sample objects with relationship parents, if exists, or null
*/
public List getParents(final Sample offSpring) {
final List parents = new ArrayList<>(2);
Sample parent = getMother(offSpring);
if (parent != null) {
parents.add(parent);
}
parent = getFather(offSpring);
if(parent != null) {
parents.add(parent);
}
return parents;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy