Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.illumina;
import htsjdk.samtools.ReservedTagConstants;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.filter.SamRecordFilter;
import htsjdk.samtools.filter.SolexaNoiseFilter;
import picard.PicardException;
import picard.fastq.IlluminaReadNameEncoder;
import picard.fastq.ReadNameEncoder;
import picard.illumina.parser.ClusterData;
import picard.illumina.parser.ReadData;
import picard.illumina.parser.ReadStructure;
import picard.util.AdapterMarker;
import picard.util.AdapterPair;
import picard.util.IlluminaUtil;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Takes ClusterData provided by an IlluminaDataProvider into one or two SAMRecords,
* as appropriate, and optionally marking adapter sequence. There is one converter per
* IlluminaBasecallsToSam run, and all the TileProcessors use the same converter.
*
* @author [email protected]
*/
public class ClusterDataToSamConverter implements
IlluminaBasecallsConverter.ClusterDataConverter {
private final String readGroupId;
private final SamRecordFilter filters = new SolexaNoiseFilter();
private final boolean isPairedEnd;
private final boolean hasSampleBarcode;
private final boolean hasMolecularBarcode;
private final int [] templateIndices;
private final int [] sampleBarcodeIndices;
private final int [] molecularBarcodeIndices;
private final AdapterMarker adapterMarker;
private final int outputRecordsPerCluster;
private final ReadNameEncoder readNameEncoder;
// TODO: add RX and QX to the list of SAMTags and change this. initial discussion
// TODO: here:
// TODO: - https://github.com/broadinstitute/picard/issues/287
// TODO: - HTS-spec issue: https://github.com/samtools/hts-specs/issues/109
// TODO: - https://github.com/samtools/hts-specs/pull/119
private String molecularIndexTag = "RX";
private String molecularIndexQualityTag = "QX";
private final String molecularIndexDelimiter = "-";
private List tagPerMolecularIndex = Collections.emptyList();
/**
* Constructor
*
* @param runBarcode Used to construct read names.
* @param readGroupId If non-null, set RG attribute on SAMRecord to this.
* @param readStructure The expected structure (number of reads and indexes,
* and their length) in the read.
* @param adapters The list of adapters to check for in the read
*/
public ClusterDataToSamConverter(final String runBarcode,
final String readGroupId,
final ReadStructure readStructure,
final List adapters) {
this.readGroupId = readGroupId;
this.readNameEncoder = new IlluminaReadNameEncoder(runBarcode);
this.isPairedEnd = readStructure.templates.length() == 2;
this.hasSampleBarcode = !readStructure.sampleBarcodes.isEmpty();
this.hasMolecularBarcode = !readStructure.molecularBarcode.isEmpty();
if (adapters.isEmpty()) {
this.adapterMarker = null;
} else {
this.adapterMarker = new AdapterMarker(adapters.toArray(new AdapterPair[adapters.size()]));
}
this.templateIndices = readStructure.templates.getIndices();
this.sampleBarcodeIndices = readStructure.sampleBarcodes.getIndices();
this.molecularBarcodeIndices = readStructure.molecularBarcode.getIndices();
this.outputRecordsPerCluster = readStructure.templates.length();
}
/**
* Sets the SAM tag to use to store the molecular index bases. If multiple molecular indexes exist, it will concatenate them
* and store them in this tag.
*/
public ClusterDataToSamConverter withMolecularIndexTag(final String molecularIndexTag) {
if (molecularIndexTag == null) throw new IllegalArgumentException("Molecular index tag was null");
this.molecularIndexTag = molecularIndexTag;
return this;
}
/**
* Sets the SAM tag to use to store the molecular index base qualities. If multiple molecular indexes exist, it will concatenate them
* and store them in this tag.
*/
public ClusterDataToSamConverter withMolecularIndexQualityTag(final String molecularIndexQualityTag) {
if (molecularIndexQualityTag == null) throw new IllegalArgumentException("Molecular index quality tag was null");
this.molecularIndexQualityTag = molecularIndexQualityTag;
return this;
}
/**
* Sets the SAM tags to use to store the bases each molecular index. This will only be used if there are more than one molecular
* index. If fewer tags are given than molecular indexes found, then the remaining molecular indexes will be concatenated and stored
* in the last tag. If more tags are provided than molecular indexes found, the additional tags will not be used.
*/
public ClusterDataToSamConverter withTagPerMolecularIndex(final List tagPerMolecularIndex) {
if (tagPerMolecularIndex == null) throw new IllegalArgumentException("Null given for tagPerMolecularIndex");
this.tagPerMolecularIndex = tagPerMolecularIndex;
return this;
}
/**
* Creates a new SAM record from the basecall data
*/
private SAMRecord createSamRecord(final ReadData readData, final String readName, final boolean isPf, final boolean firstOfPair,
final String unmatchedBarcode,
final List molecularIndexes, final List molecularIndexQualities) {
final SAMRecord sam = new SAMRecord(null);
sam.setReadName(readName);
sam.setReadBases(readData.getBases());
sam.setBaseQualities(readData.getQualities());
// Flag values
sam.setReadPairedFlag(isPairedEnd);
sam.setReadUnmappedFlag(true);
sam.setReadFailsVendorQualityCheckFlag(!isPf);
if (isPairedEnd) {
sam.setMateUnmappedFlag(true);
sam.setFirstOfPairFlag(firstOfPair);
sam.setSecondOfPairFlag(!firstOfPair);
}
if (filters.filterOut(sam)) {
sam.setAttribute(ReservedTagConstants.XN, 1);
}
if (this.readGroupId != null) {
sam.setAttribute(SAMTag.RG.name(), readGroupId);
}
// If it's a barcoded run and the read isn't assigned to a barcode, then add the barcode
// that was read as an optional tag
if (unmatchedBarcode != null) {
sam.setAttribute(SAMTag.BC.name(), unmatchedBarcode);
}
if (!molecularIndexes.isEmpty()) {
if (!this.molecularIndexTag.isEmpty()) {
sam.setAttribute(this.molecularIndexTag, String.join(molecularIndexDelimiter, molecularIndexes));
}
if (!this.molecularIndexQualityTag.isEmpty()) {
sam.setAttribute(this.molecularIndexQualityTag, String.join(molecularIndexDelimiter, molecularIndexQualities));
}
if (!this.tagPerMolecularIndex.isEmpty()) {
if (tagPerMolecularIndex.size() != molecularIndexes.size()) {
throw new PicardException("Found " + molecularIndexes.size() + " molecular indexes but only " + tagPerMolecularIndex.size() + " SAM tags given.");
}
for (int i = 0; i < this.tagPerMolecularIndex.size(); i++) {
sam.setAttribute(this.tagPerMolecularIndex.get(i), molecularIndexes.get(i));
}
}
}
return sam;
}
/**
* Creates the SAMRecord for each read in the cluster
*/
public IlluminaBasecallsToSam.SAMRecordsForCluster convertClusterToOutputRecord(final ClusterData cluster) {
final IlluminaBasecallsToSam.SAMRecordsForCluster ret = new IlluminaBasecallsToSam.SAMRecordsForCluster(outputRecordsPerCluster);
final String readName = readNameEncoder.generateReadName(cluster, null); // Use null here to prevent /1 or /2 suffixes on read name.
// Get and transform the unmatched barcode, if any, to store with the reads
String unmatchedBarcode = null;
if (hasSampleBarcode && cluster.getMatchedBarcode() == null) {
final byte[][] barcode = new byte[sampleBarcodeIndices.length][];
for (int i = 0; i < sampleBarcodeIndices.length; i++) {
barcode[i] = cluster.getRead(sampleBarcodeIndices[i]).getBases();
}
unmatchedBarcode = IlluminaUtil.barcodeSeqsToString(barcode).replace('.', 'N'); //TODO: This has a separator, where as in other places we do not use a separator
}
final List molecularIndexes;
final List molecularIndexQualities;
if (hasMolecularBarcode) {
molecularIndexes = new ArrayList<>();
molecularIndexQualities = new ArrayList<>();
for (int i = 0; i < molecularBarcodeIndices.length; i++) {
molecularIndexes.add(new String(cluster.getRead(molecularBarcodeIndices[i]).getBases()).replace('.', 'N'));
molecularIndexQualities.add(SAMUtils.phredToFastq(cluster.getRead(molecularBarcodeIndices[i]).getQualities()));
}
} else {
molecularIndexes = Collections.emptyList();
molecularIndexQualities = Collections.emptyList();
}
final SAMRecord firstOfPair = createSamRecord(
cluster.getRead(templateIndices[0]), readName, cluster.isPf(), true, unmatchedBarcode, molecularIndexes, molecularIndexQualities);
ret.records[0] = firstOfPair;
SAMRecord secondOfPair = null;
if(isPairedEnd) {
secondOfPair = createSamRecord(
cluster.getRead(templateIndices[1]), readName, cluster.isPf(), false, unmatchedBarcode, molecularIndexes, molecularIndexQualities);
ret.records[1] = secondOfPair;
}
if (adapterMarker != null) {
// Clip the read
if (isPairedEnd) {
adapterMarker.adapterTrimIlluminaPairedReads(firstOfPair, secondOfPair);
}
else {
adapterMarker.adapterTrimIlluminaSingleRead(firstOfPair);
}
}
return ret;
}
}