Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools;
import htsjdk.samtools.util.SequenceUtil;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* Merges SAMFileHeaders that have the same sequences into a single merged header
* object while providing read group translation for cases where read groups
* clash across input headers.
*/
public class SamFileHeaderMerger {
/**
* A 4-digit base 36 number is going to be attached to colliding SAMFileHeaderRecords,
* To do this we first create an array of values to convert integer remainders into
* base 36 values, we use base 36 because we have 10 digits and 26 numbers
*/
private static final char[] INT_TO_BASE36 = new char[36];
static {
int aVal = (int) 'A';
int zeroVal = (int) '0';
for (int i = 0; i < 10; i++) {
INT_TO_BASE36[i] = (char) (zeroVal + i);
}
for (int i = 0; i < 26; i++) {
INT_TO_BASE36[i + 10] = (char) (aVal + i);
}
}
//Super Header to construct
private final SAMFileHeader mergedHeader;
private Collection readers;
private final Collection headers;
private int recordCounter;
//Translation of old group ids to new group ids
private final Map> samReadGroupIdTranslation =
new IdentityHashMap>();
//the read groups from different files use the same group ids
private boolean hasReadGroupCollisions = false;
//the program records from different files use the same program record ids
private boolean hasProgramGroupCollisions = false;
//Translation of old program group ids to new program group ids
private final Map> samProgramGroupIdTranslation =
new IdentityHashMap>();
private boolean hasMergedSequenceDictionary = false;
// Translation of old sequence dictionary ids to new dictionary ids
// This is an IdentityHashMap because it can be quite expensive to compute the hashCode for
// large SAMFileHeaders. It is possible that two input files will have identical headers so that
// the regular HashMap would fold them together, but the value stored in each of the two
// Map entries will be the same, so it should not hurt anything.
private final Map> samSeqDictionaryIdTranslationViaHeader =
new IdentityHashMap>();
//HeaderRecordFactory that creates SAMReadGroupRecord instances.
private static final HeaderRecordFactory READ_GROUP_RECORD_FACTORY = new HeaderRecordFactory() {
@Override
public SAMReadGroupRecord createRecord(final String id, final SAMReadGroupRecord srcReadGroupRecord) {
return new SAMReadGroupRecord(id, srcReadGroupRecord);
}
};
//HeaderRecordFactory that creates SAMProgramRecord instances.
private static final HeaderRecordFactory PROGRAM_RECORD_FACTORY = new HeaderRecordFactory() {
@Override
public SAMProgramRecord createRecord(final String id, final SAMProgramRecord srcProgramRecord) {
return new SAMProgramRecord(id, srcProgramRecord);
}
};
//comparator used to sort lists of program group and read group records
private static final Comparator RECORD_ID_COMPARATOR = new Comparator() {
@Override
public int compare(final AbstractSAMHeaderRecord o1, final AbstractSAMHeaderRecord o2) {
return o1.getId().compareTo(o2.getId());
}
};
/**
* Create SAMFileHeader with additional information. Required that sequence dictionaries agree.
*
* @param readers sam file readers to combine
* @param sortOrder sort order new header should have
* @deprecated replaced by {@link #SamFileHeaderMerger(SAMFileHeader.SortOrder, Collection, boolean)}
*/
@Deprecated
public SamFileHeaderMerger(final Collection readers, final SAMFileHeader.SortOrder sortOrder) {
this(readers, sortOrder, false);
}
/**
* Create SAMFileHeader with additional information.
*
* @param readers sam file readers to combine
* @param sortOrder sort order new header should have
* @param mergeDictionaries If true, merge sequence dictionaries in new header. If false, require that
* all input sequence dictionaries be identical.
* @deprecated replaced by {@link #SamFileHeaderMerger(SAMFileHeader.SortOrder, Collection, boolean)}
*/
@Deprecated
public SamFileHeaderMerger(final Collection readers, final SAMFileHeader.SortOrder sortOrder, final boolean mergeDictionaries) {
this(sortOrder, getHeadersFromReaders(readers), mergeDictionaries);
this.readers = readers;
}
/**
* Create SAMFileHeader with additional information.. This is the preferred constructor.
*
* @param sortOrder sort order new header should have
* @param headers sam file headers to combine
* @param mergeDictionaries If true, merge sequence dictionaries in new header. If false, require that
* all input sequence dictionaries be identical.
*/
public SamFileHeaderMerger(final SAMFileHeader.SortOrder sortOrder, final Collection headers, final boolean mergeDictionaries) {
this.headers = new LinkedHashSet(headers);
this.mergedHeader = new SAMFileHeader();
SAMSequenceDictionary sequenceDictionary;
try {
sequenceDictionary = getSequenceDictionary(headers);
this.hasMergedSequenceDictionary = false;
} catch (SequenceUtil.SequenceListsDifferException pe) {
if (mergeDictionaries) {
sequenceDictionary = mergeSequenceDictionaries(headers);
this.hasMergedSequenceDictionary = true;
} else {
throw pe;
}
}
this.mergedHeader.setSequenceDictionary(sequenceDictionary);
// Set program that creates input alignments
for (final SAMProgramRecord program : mergeProgramGroups(headers)) {
this.mergedHeader.addProgramRecord(program);
}
// Set read groups for merged header
final List readGroups = mergeReadGroups(headers);
this.mergedHeader.setReadGroups(readGroups);
this.mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.none);
this.mergedHeader.setSortOrder(sortOrder);
for (final SAMFileHeader header : headers) {
for (final String comment : header.getComments()) {
this.mergedHeader.addComment(comment);
}
}
}
// Utility method to make use with old constructor
private static List getHeadersFromReaders(final Collection readers) {
final List headers = new ArrayList(readers.size());
for (final SamReader reader : readers) {
headers.add(reader.getFileHeader());
}
return headers;
}
/**
* Checks to see if there are clashes where different readers are using the same read
* group IDs. If yes, then those IDs that collided are remapped.
*
* @param headers headers to combine
* @return new list of read groups constructed from all the readers
*/
private List mergeReadGroups(final Collection headers) {
//prepare args for mergeHeaderRecords(..) call
final HashSet idsThatAreAlreadyTaken = new HashSet();
final List> readGroupsToProcess = new LinkedList>();
for (final SAMFileHeader header : headers) {
for (final SAMReadGroupRecord readGroup : header.getReadGroups()) {
//verify that there are no existing id collisions in this input file
if (!idsThatAreAlreadyTaken.add(readGroup.getId()))
throw new SAMException("Input file: " + header + " contains more than one RG with the same id (" + readGroup.getId() + ")");
readGroupsToProcess.add(new HeaderRecordAndFileHeader(readGroup, header));
}
idsThatAreAlreadyTaken.clear();
}
final List result = new LinkedList();
recordCounter = 0;
hasReadGroupCollisions = mergeHeaderRecords(readGroupsToProcess, READ_GROUP_RECORD_FACTORY, idsThatAreAlreadyTaken, samReadGroupIdTranslation, result);
//sort the result list by record id
Collections.sort(result, RECORD_ID_COMPARATOR);
return result;
}
/**
* Checks to see if there are clashes where different readers are using the same program
* group IDs. If yes, then those IDs that collided are remapped.
*
* @param headers headers to combine
* @return new list of program groups constructed from all the readers
*/
private List mergeProgramGroups(final Collection headers) {
final List overallResult = new LinkedList();
//this Set will accumulate all SAMProgramRecord ids that have been encountered so far.
final HashSet idsThatAreAlreadyTaken = new HashSet();
//need to process all program groups
List> programGroupsLeftToProcess = new LinkedList>();
for (final SAMFileHeader header : headers) {
for (final SAMProgramRecord programGroup : header.getProgramRecords()) {
//verify that there are no existing id collisions in this input file
if (!idsThatAreAlreadyTaken.add(programGroup.getId()))
throw new SAMException("Input file: " + header + " contains more than one PG with the same id (" + programGroup.getId() + ")");
programGroupsLeftToProcess.add(new HeaderRecordAndFileHeader(programGroup, header));
}
idsThatAreAlreadyTaken.clear();
}
recordCounter = 0;
//A program group header (lets say ID=2 PN=B PP=1) may have a PP (previous program) attribute which chains it to
//another program group header (lets say ID=1 PN=A) to indicate that the given file was
//processed by program A followed by program B. These PP attributes potentially
//connect headers into one or more tree structures. Merging is done by
//first merging all headers that don't have PP attributes (eg. tree roots),
//then updating and merging all headers whose PPs point to the tree-root headers,
//and so on until all program group headers are processed.
//currentProgramGroups is the list of records to merge next. Start by merging the programGroups that don't have a PP attribute (eg. the tree roots).
List> currentProgramGroups = new LinkedList>();
for (final Iterator> programGroupsLeftToProcessIterator = programGroupsLeftToProcess.iterator(); programGroupsLeftToProcessIterator.hasNext(); ) {
final HeaderRecordAndFileHeader pair = programGroupsLeftToProcessIterator.next();
if (pair.getHeaderRecord().getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG) == null) {
programGroupsLeftToProcessIterator.remove();
currentProgramGroups.add(pair);
}
}
//merge currentProgramGroups
while (!currentProgramGroups.isEmpty()) {
final List currentResult = new LinkedList();
hasProgramGroupCollisions |= mergeHeaderRecords(currentProgramGroups, PROGRAM_RECORD_FACTORY, idsThatAreAlreadyTaken, samProgramGroupIdTranslation, currentResult);
//add currentResults to overallResults
overallResult.addAll(currentResult);
//apply the newly-computed id translations to currentProgramGroups and programGroupsLeftToProcess
currentProgramGroups = translateIds(currentProgramGroups, samProgramGroupIdTranslation, false);
programGroupsLeftToProcess = translateIds(programGroupsLeftToProcess, samProgramGroupIdTranslation, true);
//find all records in programGroupsLeftToProcess whose ppId points to a record that was just processed (eg. a record that's in currentProgramGroups),
//and move them to the list of programGroupsToProcessNext.
final LinkedList> programGroupsToProcessNext = new LinkedList>();
for (final Iterator> programGroupsLeftToProcessIterator = programGroupsLeftToProcess.iterator(); programGroupsLeftToProcessIterator.hasNext(); ) {
final HeaderRecordAndFileHeader pairLeftToProcess = programGroupsLeftToProcessIterator.next();
final Object ppIdOfRecordLeftToProcess = pairLeftToProcess.getHeaderRecord().getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG);
//find what currentProgramGroups this ppId points to (NOTE: they have to come from the same file)
for (final HeaderRecordAndFileHeader justProcessedPair : currentProgramGroups) {
final String idJustProcessed = justProcessedPair.getHeaderRecord().getId();
if (pairLeftToProcess.getFileHeader() == justProcessedPair.getFileHeader() && ppIdOfRecordLeftToProcess.equals(idJustProcessed)) {
programGroupsLeftToProcessIterator.remove();
programGroupsToProcessNext.add(pairLeftToProcess);
break;
}
}
}
currentProgramGroups = programGroupsToProcessNext;
}
//verify that all records were processed
if (!programGroupsLeftToProcess.isEmpty()) {
final StringBuffer errorMsg = new StringBuffer(programGroupsLeftToProcess.size() + " program groups weren't processed. Do their PP ids point to existing PGs? \n");
for (final HeaderRecordAndFileHeader pair : programGroupsLeftToProcess) {
final SAMProgramRecord record = pair.getHeaderRecord();
errorMsg.append("@PG ID:" + record.getProgramGroupId() + " PN:" + record.getProgramName() + " PP:" + record.getPreviousProgramGroupId() + "\n");
}
throw new SAMException(errorMsg.toString());
}
//sort the result list by record id
Collections.sort(overallResult, RECORD_ID_COMPARATOR);
return overallResult;
}
/**
* Utility method that takes a list of program groups and remaps all their
* ids (including ppIds if requested) using the given idTranslationTable.
*
* NOTE: when remapping, this method creates new SAMProgramRecords and
* doesn't mutate any records in the programGroups list.
*
* @param programGroups The program groups to translate.
* @param idTranslationTable The translation table.
* @param translatePpIds Whether ppIds should be translated as well.
* @return The list of translated records.
*/
private List> translateIds(
final List> programGroups,
final Map> idTranslationTable,
final boolean translatePpIds) {
//go through programGroups and translate any IDs and PPs based on the idTranslationTable.
final List> result = new LinkedList>();
for (final HeaderRecordAndFileHeader pair : programGroups) {
final SAMProgramRecord record = pair.getHeaderRecord();
final String id = record.getProgramGroupId();
final String ppId = (String) record.getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG);
final SAMFileHeader header = pair.getFileHeader();
final Map translations = idTranslationTable.get(header);
//see if one or both ids need to be translated
SAMProgramRecord translatedRecord = null;
if (translations != null) {
final String translatedId = translations.get(id);
final String translatedPpId = translatePpIds ? translations.get(ppId) : null;
final boolean needToTranslateId = translatedId != null && !translatedId.equals(id);
final boolean needToTranslatePpId = translatedPpId != null && !translatedPpId.equals(ppId);
if (needToTranslateId && needToTranslatePpId) {
translatedRecord = new SAMProgramRecord(translatedId, record);
translatedRecord.setAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG, translatedPpId);
} else if (needToTranslateId) {
translatedRecord = new SAMProgramRecord(translatedId, record);
} else if (needToTranslatePpId) {
translatedRecord = new SAMProgramRecord(id, record);
translatedRecord.setAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG, translatedPpId);
}
}
if (translatedRecord != null) {
result.add(new HeaderRecordAndFileHeader(translatedRecord, header));
} else {
result.add(pair); //keep the original record
}
}
return result;
}
/**
* Utility method for merging a List of AbstractSAMHeaderRecords. If it finds
* records that have identical ids and attributes, it will collapse them
* into one record. If it finds records that have identical ids but
* non-identical attributes, this is treated as a collision. When collision happens,
* the records' ids are remapped, and an old-id to new-id mapping is added to the idTranslationTable.
*
* NOTE: Non-collided records also get recorded in the idTranslationTable as
* old-id to old-id. This way, an idTranslationTable lookup should never return null.
*
* @param headerRecords The header records to merge.
* @param headerRecordFactory Constructs a specific subclass of AbstractSAMHeaderRecord.
* @param idsThatAreAlreadyTaken If the id of a headerRecord matches an id in this set, it will be treated as a collision, and the headRecord's id will be remapped.
* @param idTranslationTable When records collide, their ids are remapped, and an old-id to new-id
* mapping is added to the idTranslationTable. Non-collided records also get recorded in the idTranslationTable as
* old-id to old-id. This way, an idTranslationTable lookup should never return null.
* @param result The list of merged header records.
* @return True if there were collisions.
*/
private boolean mergeHeaderRecords(final List> headerRecords, final HeaderRecordFactory headerRecordFactory,
final HashSet idsThatAreAlreadyTaken, final Map> idTranslationTable, final List result) {
//The outer Map bins the header records by their ids. The nested Map further collapses
//header records which, in addition to having the same id, also have identical attributes.
//In other words, each key in the nested map represents one or more
//header records which have both identical ids and identical attributes. The List of
//SAMFileHeaders keeps track of which readers these header record(s) came from.
final Map>> idToRecord =
new LinkedHashMap>>();
//Populate the idToRecord and seenIds data structures
for (final HeaderRecordAndFileHeader pair : headerRecords) {
final RecordType record = pair.getHeaderRecord();
final SAMFileHeader header = pair.getFileHeader();
final String recordId = record.getId();
Map> recordsWithSameId = idToRecord.get(recordId);
if (recordsWithSameId == null) {
recordsWithSameId = new LinkedHashMap>();
idToRecord.put(recordId, recordsWithSameId);
}
List fileHeaders = recordsWithSameId.get(record);
if (fileHeaders == null) {
fileHeaders = new LinkedList();
recordsWithSameId.put(record, fileHeaders);
}
fileHeaders.add(header);
}
//Resolve any collisions between header records by remapping their ids.
boolean hasCollisions = false;
for (final Map.Entry>> entry : idToRecord.entrySet()) {
final String recordId = entry.getKey();
final Map> recordsWithSameId = entry.getValue();
for (final Map.Entry> recordWithUniqueAttr : recordsWithSameId.entrySet()) {
final RecordType record = recordWithUniqueAttr.getKey();
final List fileHeaders = recordWithUniqueAttr.getValue();
String newId;
if (!idsThatAreAlreadyTaken.contains(recordId)) {
//don't remap 1st record. If there are more records
//with this id, they will be remapped in the 'else'.
newId = recordId;
idsThatAreAlreadyTaken.add(recordId);
++recordCounter;
} else {
//there is more than one record with this id.
hasCollisions = true;
//Below we tack on one of roughly 1.7 million possible 4 digit base36 at random we do this because
//our old process of just counting from 0 upward and adding that to the previous id led to 1000s of hits on
//idsThatAreAlreadyTaken.contains just to resolve 1 collision when merging 1000s of similarly processed bams
while (idsThatAreAlreadyTaken.contains(newId = recordId + "." + positiveFourDigitBase36Str(recordCounter++))) ;
idsThatAreAlreadyTaken.add(newId);
}
for (final SAMFileHeader fileHeader : fileHeaders) {
Map readerTranslationTable = idTranslationTable.get(fileHeader);
if (readerTranslationTable == null) {
readerTranslationTable = new HashMap();
idTranslationTable.put(fileHeader, readerTranslationTable);
}
readerTranslationTable.put(recordId, newId);
}
result.add(headerRecordFactory.createRecord(newId, record));
}
}
return hasCollisions;
}
/**
* Convert an integer to base36, protected solely for testing
*
* @param leftOver Both the initial value and the running quotient
* @return A four digit string composed of base 36 symbols
*/
public static String positiveFourDigitBase36Str(int leftOver) {
if (leftOver == 0) {
return "0";
}
final StringBuilder builder = new StringBuilder(10);
while (leftOver > 0) {
final int valueIndex = leftOver % 36;
builder.append(INT_TO_BASE36[valueIndex]);
leftOver /= 36;
}
return builder.reverse().toString();
}
/**
* Get the sequences off the SAMFileHeader. Throws runtime exception if the sequence
* are different from one another.
*
* @param headers headers to pull sequences from
* @return sequences from files. Each file should have the same sequence
*/
private SAMSequenceDictionary getSequenceDictionary(final Collection headers) {
SAMSequenceDictionary sequences = null;
for (final SAMFileHeader header : headers) {
if (sequences == null) {
sequences = header.getSequenceDictionary();
} else {
final SAMSequenceDictionary currentSequences = header.getSequenceDictionary();
SequenceUtil.assertSequenceDictionariesEqual(sequences, currentSequences);
}
}
return sequences;
}
/**
* Get the sequences from the SAMFileHeader, and merge the resulting sequence dictionaries.
*
* @param headers headers to pull sequences from
* @return sequences from files. Each file should have the same sequence
*/
private SAMSequenceDictionary mergeSequenceDictionaries(final Collection headers) {
SAMSequenceDictionary sequences = new SAMSequenceDictionary();
for (final SAMFileHeader header : headers) {
final SAMSequenceDictionary currentSequences = header.getSequenceDictionary();
sequences = mergeSequences(sequences, currentSequences);
}
// second pass, make a map of the original seqeunce id -> new sequence id
createSequenceMapping(headers, sequences);
return sequences;
}
/**
* They've asked to merge the sequence headers. What we support right now is finding the sequence name superset.
*
* @param mergeIntoDict the result of merging so far. All SAMSequenceRecords in here have been cloned from the originals.
* @param mergeFromDict A new sequence dictionary to merge into mergeIntoDict.
* @return A new sequence dictionary that resulting from merging the two inputs.
*/
private SAMSequenceDictionary mergeSequences(final SAMSequenceDictionary mergeIntoDict, final SAMSequenceDictionary mergeFromDict) {
// a place to hold the sequences that we haven't found a home for, in the order the appear in mergeFromDict.
final LinkedList holder = new LinkedList();
// Return value will be created from this.
final LinkedList resultingDict = new LinkedList();
for (final SAMSequenceRecord sequenceRecord : mergeIntoDict.getSequences()) {
resultingDict.add(sequenceRecord);
}
// Index into resultingDict of previous SAMSequenceRecord from mergeFromDict that already existed in mergeIntoDict.
int prevloc = -1;
// Previous SAMSequenceRecord from mergeFromDict that already existed in mergeIntoDict.
SAMSequenceRecord previouslyMerged = null;
for (final SAMSequenceRecord sequenceRecord : mergeFromDict.getSequences()) {
// Does it already exist in resultingDict?
final int loc = getIndexOfSequenceName(resultingDict, sequenceRecord.getSequenceName());
if (loc == -1) {
// If doesn't already exist in resultingDict, save it an decide where to insert it later.
holder.add(sequenceRecord.clone());
} else if (prevloc > loc) {
// If sequenceRecord already exists in resultingDict, but prior to the previous one
// from mergeIntoDict that already existed, cannot merge.
throw new SAMException("Cannot merge sequence dictionaries because sequence " +
sequenceRecord.getSequenceName() + " and " + previouslyMerged.getSequenceName() +
" are in different orders in two input sequence dictionaries.");
} else {
// Since sequenceRecord already exists in resultingDict, don't need to add it.
// Add in all the sequences prior to it that have been held in holder.
resultingDict.addAll(loc, holder);
// Remember the index of sequenceRecord so can check for merge incompatibility.
prevloc = loc + holder.size();
previouslyMerged = sequenceRecord;
holder.clear();
}
}
// Append anything left in holder.
if (!holder.isEmpty()) {
resultingDict.addAll(holder);
}
return new SAMSequenceDictionary(resultingDict);
}
/**
* Find sequence in list.
*
* @param list List to search for the sequence name.
* @param sequenceName Name to search for.
* @return Index of SAMSequenceRecord with the given name in list, or -1 if not found.
*/
private static int getIndexOfSequenceName(final List list, final String sequenceName) {
for (int i = 0; i < list.size(); ++i) {
if (list.get(i).getSequenceName().equals(sequenceName)) {
return i;
}
}
return -1;
}
/**
* create the sequence mapping. This map is used to convert the unmerged header sequence ID's to the merged
* list of sequence id's.
*
* @param headers the collections of headers.
* @param masterDictionary the superset dictionary we've created.
*/
private void createSequenceMapping(final Collection headers, final SAMSequenceDictionary masterDictionary) {
final LinkedList resultingDictStr = new LinkedList<>();
for (final SAMSequenceRecord r : masterDictionary.getSequences()) {
resultingDictStr.add(r.getSequenceName());
}
for (final SAMFileHeader header : headers) {
final Map seqMap = new HashMap<>();
final SAMSequenceDictionary dict = header.getSequenceDictionary();
for (final SAMSequenceRecord rec : dict.getSequences()) {
seqMap.put(rec.getSequenceIndex(), resultingDictStr.indexOf(rec.getSequenceName()));
}
this.samSeqDictionaryIdTranslationViaHeader.put(header, seqMap);
}
}
/**
* Returns the read group id that should be used for the input read and RG id.
*
* @deprecated replaced by {@link #getReadGroupId(SAMFileHeader, String)}
*/
@Deprecated
public String getReadGroupId(final SamReader reader, final String originalReadGroupId) {
return getReadGroupId(reader.getFileHeader(), originalReadGroupId);
}
/** Returns the read group id that should be used for the input read and RG id. */
public String getReadGroupId(final SAMFileHeader header, final String originalReadGroupId) {
return this.samReadGroupIdTranslation.get(header).get(originalReadGroupId);
}
/**
* @param reader one of the input files
* @param originalProgramGroupId a program group ID from the above input file
* @return new ID from the merged list of program groups in the output file
* @deprecated replaced by {@link #getProgramGroupId(SAMFileHeader, String)}
*/
@Deprecated
public String getProgramGroupId(final SamReader reader, final String originalProgramGroupId) {
return getProgramGroupId(reader.getFileHeader(), originalProgramGroupId);
}
/**
* @param header one of the input headers
* @param originalProgramGroupId a program group ID from the above input file
* @return new ID from the merged list of program groups in the output file
*/
public String getProgramGroupId(final SAMFileHeader header, final String originalProgramGroupId) {
return this.samProgramGroupIdTranslation.get(header).get(originalProgramGroupId);
}
/** Returns true if there are read group duplicates within the merged headers. */
public boolean hasReadGroupCollisions() {
return this.hasReadGroupCollisions;
}
/** Returns true if there are program group duplicates within the merged headers. */
public boolean hasProgramGroupCollisions() {
return hasProgramGroupCollisions;
}
/** @return if we've merged the sequence dictionaries, return true */
public boolean hasMergedSequenceDictionary() {
return hasMergedSequenceDictionary;
}
/** Returns the merged header that should be written to any output merged file. */
public SAMFileHeader getMergedHeader() {
return this.mergedHeader;
}
/**
* Returns the collection of readers that this header merger is working with. May return null.
*
* @deprecated replaced by {@link #getHeaders()}
*/
@Deprecated
public Collection getReaders() {
return this.readers;
}
/**
* Returns the collection of readers that this header merger is working with.
*/
public Collection getHeaders() {
return this.headers;
}
/**
* returns the new mapping for a specified reader, given it's old sequence index
*
* @param reader the reader
* @param oldReferenceSequenceIndex the old sequence (also called reference) index
* @return the new index value
* @deprecated replaced by {@link #getMergedSequenceIndex(SAMFileHeader, Integer)}
*/
@Deprecated
public Integer getMergedSequenceIndex(final SamReader reader, final Integer oldReferenceSequenceIndex) {
return this.getMergedSequenceIndex(reader.getFileHeader(), oldReferenceSequenceIndex);
}
/**
* Another mechanism for getting the new sequence index, for situations in which the reader is not available.
* Note that if the SAMRecord has already had its header replaced with the merged header, this won't work.
*
* @param header The original header for the input record in question.
* @param oldReferenceSequenceIndex The original sequence index.
* @return the new index value that is compatible with the merged sequence index.
*/
public Integer getMergedSequenceIndex(final SAMFileHeader header, final Integer oldReferenceSequenceIndex) {
final Map mapping = this.samSeqDictionaryIdTranslationViaHeader.get(header);
if (mapping == null) {
throw new SAMException("No sequence dictionary mapping available for header: " + header);
}
final Integer newIndex = mapping.get(oldReferenceSequenceIndex);
if (newIndex == null) {
throw new SAMException("No mapping for reference index " + oldReferenceSequenceIndex + " from header: " + header);
}
return newIndex;
}
/**
* Implementations of this interface are used by mergeHeaderRecords(..) to instantiate
* specific subclasses of AbstractSAMHeaderRecord.
*/
private interface HeaderRecordFactory {
/**
* Constructs a new instance of RecordType.
*
* @param id The id of the new record.
* @param srcRecord Except for the id, the new record will be a copy of this source record.
*/
RecordType createRecord(final String id, RecordType srcRecord);
}
/**
* Struct that groups together a subclass of AbstractSAMHeaderRecord with the
* SAMFileHeader that it came from.
*/
private static class HeaderRecordAndFileHeader {
private final RecordType headerRecord;
private final SAMFileHeader samFileHeader;
public HeaderRecordAndFileHeader(final RecordType headerRecord, final SAMFileHeader samFileHeader) {
this.headerRecord = headerRecord;
this.samFileHeader = samFileHeader;
}
public RecordType getHeaderRecord() {
return headerRecord;
}
public SAMFileHeader getFileHeader() {
return samFileHeader;
}
}
}