Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* The MIT License
*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools.util;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMValidationError;
import htsjdk.samtools.TextCigarCodec;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
/**
* @author [email protected]
*/
public class CigarUtil {
private static final Log log = Log.getInstance(CigarUtil.class);
/** adjust the cigar based on adapter clipping.
* TODO: If there is hard clipping at the end of the input CIGAR, it is lost. It should not be.
* *
* @param clipFrom 1-based position where the clipping starts
* @param oldCigar The existing unclipped cigar
* @return New adjusted list of cigar elements
*/
// package visible so can be unit-tested
public static List softClipEndOfRead(final int clipFrom, final List oldCigar) {
final int clippedBases = (int)CoordMath.getLength(clipFrom, Cigar.getReadLength(oldCigar));
List newCigar = new LinkedList();
int pos = 1;
for (CigarElement c : oldCigar) {
// Distinguish two cases:
// c occurs before the clipped region
// c is adjacent to or straddles the boundary between clipped and unclipped region.
// c never occurs after the clipped region; clipped region is always at the end
final CigarOperator op = c.getOperator();
final int length = op.consumesReadBases()? c.getLength() : 0;
final int endPos = pos + length - 1; // same as pos on next iteration
if (endPos < (clipFrom - 1)) {
// handle elements before clip position (just copy them)
newCigar.add(c);
} else if (endPos >= (clipFrom - 1)) {
// handle adjacent or straddling element
elementStraddlesClippedRead(newCigar, c,
(clipFrom -1) - (pos -1) , clippedBases);
break;
}
pos = endPos + 1; // update pos for next iteration
} // end loop over cigar elements
return newCigar;
}
// a cigar element occurs in the middle of an adapter clipping
static private void elementStraddlesClippedRead(List newCigar, CigarElement c,
int relativeClippedPosition,
int clippedBases){
final CigarOperator op = c.getOperator();
int clipAmount = clippedBases;
if (op.consumesReadBases()){
if (op.consumesReferenceBases() && relativeClippedPosition > 0){
newCigar.add(new CigarElement(relativeClippedPosition, op));
}
if (!op.consumesReferenceBases()){
clipAmount = clippedBases + relativeClippedPosition;
}
} else if (relativeClippedPosition != 0){
throw new SAMException("Unexpected non-0 relativeClippedPosition " + relativeClippedPosition);
}
newCigar.add(new CigarElement(clipAmount, CigarOperator.S)); // S is always last element
}
/**
* Adds a soft-clip, based on clipFrom, to the SAM record's existing cigar
* and, for negative strands, also adjusts the SAM record's start position.
* Soft clips the end of the read as the read came off the sequencer.
*/
public static void softClip3PrimeEndOfRead(SAMRecord rec, final int clipFrom) {
final Cigar cigar = rec.getCigar();
// we don't worry about SEED_REGION_LENGTH in clipFrom
final boolean negativeStrand = rec.getReadNegativeStrandFlag();
List oldCigar = cigar.getCigarElements();
if (!isValidCigar(rec, cigar, true)){
return; // log message already issued
}
if (negativeStrand){
// Can't just use Collections.reverse() here because oldCigar is unmodifiable
oldCigar = new ArrayList(oldCigar);
Collections.reverse(oldCigar);
}
List newCigarElems = CigarUtil.softClipEndOfRead(clipFrom, oldCigar);
if (negativeStrand) {
Collections.reverse(newCigarElems);
}
final Cigar newCigar = new Cigar(newCigarElems);
if (negativeStrand){
int oldLength = cigar.getReferenceLength();
int newLength = newCigar.getReferenceLength();
int sizeChange = oldLength - newLength;
if (sizeChange > 0){
rec.setAlignmentStart(rec.getAlignmentStart() + sizeChange);
} else if (sizeChange < 0){
throw new SAMException("The clipped length " + newLength +
" is longer than the old unclipped length " + oldLength);
}
}
rec.setCigar(newCigar);
// Check that the end result is not a read without any aligned bases
boolean hasMappedBases = false;
for (final CigarElement elem : newCigar.getCigarElements()) {
final CigarOperator op = elem.getOperator();
if (op.consumesReferenceBases() && op.consumesReadBases()) {
hasMappedBases = true;
break;
}
}
if (!hasMappedBases) {
rec.setReadUnmappedFlag(true);
rec.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
rec.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
rec.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
rec.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
rec.setInferredInsertSize(0);
}
else if (!isValidCigar(rec, newCigar, false)){
// log message already issued
throw new IllegalStateException("Invalid new Cigar: " + newCigar + " (" + oldCigar + ") for " +
rec.getReadName());
}
}
private static boolean isValidCigar(SAMRecord rec, Cigar cigar, boolean isOldCigar) {
if (cigar == null || cigar.getCigarElements() == null || cigar.getCigarElements().isEmpty()) {
if (isOldCigar) {
if (rec.getReadUnmappedFlag()) {
// don't bother to warn since this does occur for PE reads
} else {
log.warn("Cigar is empty for read " + rec);
}
} else {
log.error("Empty new cigar");
}
return false;
}
if (rec.getReadUnmappedFlag()){
log.info("Unmapped read with cigar: " + rec.getReadName() + " (" + rec.getCigarString() + "/" + cigar.toString() + ")");
}
final List validationErrors = cigar.isValid(rec.getReadName(), -1);
if (validationErrors != null && !validationErrors.isEmpty()) {
log.error("Invalid cigar for read " + rec +
(isOldCigar ? " " : " for new cigar with clipped adapter ") +
" (" + rec.getCigarString() + "/" + cigar.toString() + ") " +
validationErrors);
return false;
}
if (rec.getReadLength() != cigar.getReadLength()){
// throw new SAMException(
log.error( rec.getReadLength() +
" read length does not = cigar length " + cigar.getReferenceLength() +
(isOldCigar? " oldCigar " : " ") +
rec + " cigar:" + cigar);
return false;
}
return true;
}
/**
* Adds additional soft-clipped bases at the 3' and/or 5' end of the cigar. Does not
* change the existing cigar except to merge the newly added soft-clipped bases if the
* element at the end of the cigar being modified is also a soft-clip.
*
* @param cigar The cigar on which to base the new cigar
* @param negativeStrand Whether the read is on the negative strand
* @param threePrimeEnd number of soft-clipped bases to add to the 3' end of the read
* @param fivePrimeEnd number of soft-clipped bases to add to the 5' end of the read
*/
public static Cigar addSoftClippedBasesToEndsOfCigar(Cigar cigar, boolean negativeStrand,
final int threePrimeEnd, final int fivePrimeEnd) {
List newCigar = new ArrayList(cigar.getCigarElements());
if (negativeStrand) {
Collections.reverse(newCigar);
}
if (threePrimeEnd > 0) {
int last = newCigar.size()-1;
int bases = threePrimeEnd;
if (newCigar.get(last).getOperator() == CigarOperator.SOFT_CLIP) {
CigarElement oldSoftClip = newCigar.remove(last);
bases += oldSoftClip.getLength();
}
newCigar.add(new CigarElement(bases, CigarOperator.SOFT_CLIP));
}
if (fivePrimeEnd > 0) {
int bases = fivePrimeEnd;
if (newCigar.get(0).getOperator() == CigarOperator.SOFT_CLIP) {
CigarElement oldSoftClip = newCigar.remove(0);
bases += oldSoftClip.getLength();
}
newCigar.add(0, new CigarElement(bases, CigarOperator.SOFT_CLIP));
}
if (negativeStrand) {
Collections.reverse(newCigar);
}
return new Cigar(newCigar);
}
// unpack a cigar string into an array of cigarOperators
// to facilitate sequence manipulation
public static char[] cigarArrayFromElements(List cigar){
int pos = 0;
int length = 0;
for (CigarElement e : cigar){
length += e.getLength();
}
char[] result = new char[length];
for (CigarElement e : cigar){
for (int i = 0; i < e.getLength(); i++){
CigarOperator o = e.getOperator();
result[i+pos] = (char) CigarOperator.enumToCharacter(o);
}
pos += e.getLength();
}
return result;
}
// unpack a cigar string into an array of cigarOperators
// to facilitate sequence manipulation
public static char[] cigarArrayFromString(String cigar){
return cigarArrayFromElements(TextCigarCodec.decode(cigar).getCigarElements());
}
// construct a cigar string from an array of cigarOperators.
public static String cigarStringFromArray(final char[] cigar){
String result = "";
int length = cigar.length;
char lastOp = 0; int lastLen = 0;
for (int i=0; i < length; i++){
if (cigar[i] == lastOp){
lastLen++;
} else if (cigar[i] == '-'){
; // nothing - just ignore '-'
} else {
if (lastOp != 0)
result = result + Integer.toString(lastLen) + Character.toString(lastOp);
lastLen = 1;
lastOp = cigar[i];
}
}
return result + Integer.toString(lastLen) + Character.toString(lastOp);
}
}