All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.samtools.cram.build.Cram2SamRecordFactory Maven / Gradle / Ivy

There is a newer version: 4.1.3
Show newest version
/**
 * ****************************************************************************
 * Copyright 2013 EMBL-EBI
 * 

* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **************************************************************************** */ package htsjdk.samtools.cram.build; import htsjdk.samtools.Cigar; import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarOperator; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.cram.encoding.readfeatures.Deletion; import htsjdk.samtools.cram.encoding.readfeatures.HardClip; import htsjdk.samtools.cram.encoding.readfeatures.InsertBase; import htsjdk.samtools.cram.encoding.readfeatures.Insertion; import htsjdk.samtools.cram.encoding.readfeatures.Padding; import htsjdk.samtools.cram.encoding.readfeatures.ReadBase; import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature; import htsjdk.samtools.cram.encoding.readfeatures.RefSkip; import htsjdk.samtools.cram.encoding.readfeatures.SoftClip; import htsjdk.samtools.cram.encoding.readfeatures.Substitution; import htsjdk.samtools.cram.structure.CramCompressionRecord; import htsjdk.samtools.cram.structure.ReadTag; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; public class Cram2SamRecordFactory { private final SAMFileHeader header; public Cram2SamRecordFactory(final SAMFileHeader header) { this.header = header; } public SAMRecord create(final CramCompressionRecord cramRecord) { final SAMRecord samRecord = new SAMRecord(header); samRecord.setReadName(cramRecord.readName); copyFlags(cramRecord, samRecord); if (cramRecord.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { samRecord.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); samRecord.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY); samRecord.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); } else { samRecord.setReferenceIndex(cramRecord.sequenceId); samRecord.setAlignmentStart(cramRecord.alignmentStart); samRecord.setMappingQuality(cramRecord.mappingQuality); } if (cramRecord.isSegmentUnmapped()) samRecord.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); else samRecord.setCigar(getCigar2(cramRecord.readFeatures, cramRecord.readLength)); if (samRecord.getReadPairedFlag()) { samRecord.setMateReferenceIndex(cramRecord.mateSequenceID); samRecord .setMateAlignmentStart(cramRecord.mateAlignmentStart > 0 ? cramRecord.mateAlignmentStart : SAMRecord.NO_ALIGNMENT_START); samRecord.setMateNegativeStrandFlag(cramRecord.isMateNegativeStrand()); samRecord.setMateUnmappedFlag(cramRecord.isMateUnmapped()); } else { samRecord.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); samRecord.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START); } samRecord.setInferredInsertSize(cramRecord.templateSize); samRecord.setReadBases(cramRecord.readBases); samRecord.setBaseQualities(cramRecord.qualityScores); if (cramRecord.tags != null) for (final ReadTag tag : cramRecord.tags) samRecord.setAttribute(tag.getKey(), tag.getValue()); if (cramRecord.readGroupID > -1) { final SAMReadGroupRecord readGroupRecord = header.getReadGroups().get( cramRecord.readGroupID); samRecord.setAttribute("RG", readGroupRecord.getId()); } return samRecord; } private static void copyFlags(final CramCompressionRecord cramRecord, final SAMRecord samRecord) { samRecord.setReadPairedFlag(cramRecord.isMultiFragment()); samRecord.setProperPairFlag(cramRecord.isProperPair()); samRecord.setReadUnmappedFlag(cramRecord.isSegmentUnmapped()); samRecord.setReadNegativeStrandFlag(cramRecord.isNegativeStrand()); samRecord.setFirstOfPairFlag(cramRecord.isFirstSegment()); samRecord.setSecondOfPairFlag(cramRecord.isLastSegment()); samRecord.setSecondaryAlignment(cramRecord.isSecondaryAlignment()); samRecord.setReadFailsVendorQualityCheckFlag(cramRecord.isVendorFiltered()); samRecord.setDuplicateReadFlag(cramRecord.isDuplicate()); samRecord.setSupplementaryAlignmentFlag(cramRecord.isSupplementary()); } private static Cigar getCigar2(final Collection features, final int readLength) { if (features == null || features.isEmpty()) { final CigarElement cigarElement = new CigarElement(readLength, CigarOperator.M); return new Cigar(Collections.singletonList(cigarElement)); } final List list = new ArrayList(); int totalOpLen = 1; CigarElement cigarElement; CigarOperator lastOperator = CigarOperator.MATCH_OR_MISMATCH; int lastOpLen = 0; int lastOpPos = 1; CigarOperator cigarOperator; int readFeatureLength; for (final ReadFeature feature : features) { final int gap = feature.getPosition() - (lastOpPos + lastOpLen); if (gap > 0) { if (lastOperator != CigarOperator.MATCH_OR_MISMATCH) { list.add(new CigarElement(lastOpLen, lastOperator)); lastOpPos += lastOpLen; totalOpLen += lastOpLen; lastOpLen = gap; } else { lastOpLen += gap; } lastOperator = CigarOperator.MATCH_OR_MISMATCH; } switch (feature.getOperator()) { case Insertion.operator: cigarOperator = CigarOperator.INSERTION; readFeatureLength = ((Insertion) feature).getSequence().length; break; case SoftClip.operator: cigarOperator = CigarOperator.SOFT_CLIP; readFeatureLength = ((SoftClip) feature).getSequence().length; break; case HardClip.operator: cigarOperator = CigarOperator.HARD_CLIP; readFeatureLength = ((HardClip) feature).getLength(); break; case InsertBase.operator: cigarOperator = CigarOperator.INSERTION; readFeatureLength = 1; break; case Deletion.operator: cigarOperator = CigarOperator.DELETION; readFeatureLength = ((Deletion) feature).getLength(); break; case RefSkip.operator: cigarOperator = CigarOperator.SKIPPED_REGION; readFeatureLength = ((RefSkip) feature).getLength(); break; case Padding.operator: cigarOperator = CigarOperator.PADDING; readFeatureLength = ((Padding) feature).getLength(); break; case Substitution.operator: case ReadBase.operator: cigarOperator = CigarOperator.MATCH_OR_MISMATCH; readFeatureLength = 1; break; default: continue; } if (lastOperator != cigarOperator) { // add last feature if (lastOpLen > 0) { list.add(new CigarElement(lastOpLen, lastOperator)); totalOpLen += lastOpLen; } lastOperator = cigarOperator; lastOpLen = readFeatureLength; lastOpPos = feature.getPosition(); } else lastOpLen += readFeatureLength; if (!cigarOperator.consumesReadBases()) lastOpPos -= readFeatureLength; } if (lastOperator != null) { if (lastOperator != CigarOperator.M) { list.add(new CigarElement(lastOpLen, lastOperator)); if (readLength >= lastOpPos + lastOpLen) { cigarElement = new CigarElement(readLength - (lastOpLen + lastOpPos) + 1, CigarOperator.M); list.add(cigarElement); } } else if (readLength == 0 || readLength > lastOpPos - 1) { if (readLength == 0) cigarElement = new CigarElement(lastOpLen, CigarOperator.M); else cigarElement = new CigarElement(readLength - lastOpPos + 1, CigarOperator.M); list.add(cigarElement); } } if (list.isEmpty()) { cigarElement = new CigarElement(readLength, CigarOperator.M); return new Cigar(Collections.singletonList(cigarElement)); } return new Cigar(list); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy