All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.samtools.cram.encoding.writer.CramRecordWriter Maven / Gradle / Ivy

There is a newer version: 4.1.3
Show newest version
/**
 * ****************************************************************************
 * Copyright 2013 EMBL-EBI
 * 

* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **************************************************************************** */ package htsjdk.samtools.cram.encoding.writer; import htsjdk.samtools.cram.encoding.readfeatures.*; import htsjdk.samtools.cram.io.BitOutputStream; import htsjdk.samtools.cram.ref.ReferenceContext; import htsjdk.samtools.cram.structure.*; import htsjdk.samtools.cram.structure.Slice; import java.io.ByteArrayOutputStream; import java.nio.charset.Charset; import java.util.List; import java.util.Map; import java.util.stream.Collectors; public class CramRecordWriter { private final DataSeriesWriter bitFlagsC; private final DataSeriesWriter compBitFlagsC; private final DataSeriesWriter readLengthC; private final DataSeriesWriter alStartC; private final DataSeriesWriter readGroupC; private final DataSeriesWriter readNameC; private final DataSeriesWriter distanceC; private final Map> tagValueCodecs; private final DataSeriesWriter numberOfReadFeaturesCodec; private final DataSeriesWriter featurePositionCodec; private final DataSeriesWriter featuresCodeCodec; private final DataSeriesWriter baseCodec; private final DataSeriesWriter qualityScoreCodec; private final DataSeriesWriter qualityScoreArrayCodec; private final DataSeriesWriter baseSubstitutionCodeCodec; private final DataSeriesWriter insertionCodec; private final DataSeriesWriter softClipCodec; private final DataSeriesWriter hardClipCodec; private final DataSeriesWriter paddingCodec; private final DataSeriesWriter deletionLengthCodec; private final DataSeriesWriter mappingQualityScoreCodec; private final DataSeriesWriter mateBitFlagsCodec; private final DataSeriesWriter nextFragmentReferenceSequenceIDCodec; private final DataSeriesWriter nextFragmentAlignmentStart; private final DataSeriesWriter templateSize; private final DataSeriesWriter tagIdListCodec; private final DataSeriesWriter refIdCodec; private final DataSeriesWriter refSkipCodec; private final Charset charset = Charset.forName("UTF8"); private final boolean captureReadNames; private final ReferenceContext refContext; private final SubstitutionMatrix substitutionMatrix; private final boolean AP_delta; private final Map encodingMap; private final BitOutputStream coreBlockOutputStream; private final Map externalBlockOutputMap; /** * Initializes a Cram Record Writer * * @param coreOutputStream Core data block bit stream, to be written by non-external Encodings * @param externalOutputMap External data block byte stream map, to be written by external Encodings * @param header the associated Cram Compression Header * @param refContext the reference context to assign to these records */ public CramRecordWriter(final BitOutputStream coreOutputStream, final Map externalOutputMap, final CompressionHeader header, final ReferenceContext refContext) { this.captureReadNames = header.readNamesIncluded; this.refContext = refContext; this.substitutionMatrix = header.substitutionMatrix; this.AP_delta = header.APDelta; this.encodingMap = header.encodingMap; this.coreBlockOutputStream = coreOutputStream; this.externalBlockOutputMap = externalOutputMap; bitFlagsC = createDataWriter(DataSeries.BF_BitFlags); compBitFlagsC = createDataWriter(DataSeries.CF_CompressionBitFlags); readLengthC = createDataWriter(DataSeries.RL_ReadLength); alStartC = createDataWriter(DataSeries.AP_AlignmentPositionOffset); readGroupC = createDataWriter(DataSeries.RG_ReadGroup); readNameC = createDataWriter(DataSeries.RN_ReadName); distanceC = createDataWriter(DataSeries.NF_RecordsToNextFragment); numberOfReadFeaturesCodec = createDataWriter(DataSeries.FN_NumberOfReadFeatures); featurePositionCodec = createDataWriter(DataSeries.FP_FeaturePosition); featuresCodeCodec = createDataWriter(DataSeries.FC_FeatureCode); baseCodec = createDataWriter(DataSeries.BA_Base); qualityScoreCodec = createDataWriter(DataSeries.QS_QualityScore); baseSubstitutionCodeCodec = createDataWriter(DataSeries.BS_BaseSubstitutionCode); insertionCodec = createDataWriter(DataSeries.IN_Insertion); softClipCodec = createDataWriter(DataSeries.SC_SoftClip); hardClipCodec = createDataWriter(DataSeries.HC_HardClip); paddingCodec = createDataWriter(DataSeries.PD_padding); deletionLengthCodec = createDataWriter(DataSeries.DL_DeletionLength); mappingQualityScoreCodec = createDataWriter(DataSeries.MQ_MappingQualityScore); mateBitFlagsCodec = createDataWriter(DataSeries.MF_MateBitFlags); nextFragmentReferenceSequenceIDCodec = createDataWriter(DataSeries.NS_NextFragmentReferenceSequenceID); nextFragmentAlignmentStart = createDataWriter(DataSeries.NP_NextFragmentAlignmentStart); templateSize = createDataWriter(DataSeries.TS_InsertSize); tagIdListCodec = createDataWriter(DataSeries.TL_TagIdList); refIdCodec = createDataWriter(DataSeries.RI_RefId); refSkipCodec = createDataWriter(DataSeries.RS_RefSkip); // special case: re-encodes QS as a byte array qualityScoreArrayCodec = new DataSeriesWriter<>(DataSeriesType.BYTE_ARRAY, header.encodingMap.get(DataSeries.QS_QualityScore), coreOutputStream, externalOutputMap); tagValueCodecs = header.tMap.entrySet() .stream() .collect(Collectors.toMap( Map.Entry::getKey, mapEntry -> new DataSeriesWriter<>(DataSeriesType.BYTE_ARRAY, mapEntry.getValue(), coreOutputStream, externalOutputMap))); } /** * Look up a Data Series in the Cram Compression Header's Encoding Map. If found, create a Data Writer * * @param dataSeries Which Data Series to write * @param The Java data type associated with the Data Series * @return a Data Writer for the given Data Series, or null if it's not in the encoding map */ private DataSeriesWriter createDataWriter(final DataSeries dataSeries) { if (encodingMap.containsKey(dataSeries)) { return new DataSeriesWriter<>(dataSeries.getType(), encodingMap.get(dataSeries), coreBlockOutputStream, externalBlockOutputMap); } else { return null; } } /** * Writes a series of Cram Compression Records, using this class's Encodings * * @param records the Cram Compression Records to write * @param initialAlignmentStart the alignmentStart of the enclosing {@link Slice}, for delta calculation */ public void writeCramCompressionRecords(final List records, final int initialAlignmentStart) { int prevAlignmentStart = initialAlignmentStart; for (final CramCompressionRecord record : records) { writeRecord(record, prevAlignmentStart); prevAlignmentStart = record.alignmentStart; } } /** * Write a Cram Compression Record, using this class's Encodings * * @param r the Cram Compression Record to write * @param prevAlignmentStart the alignmentStart of the previous record, for delta calculation */ private void writeRecord(final CramCompressionRecord r, final int prevAlignmentStart) { bitFlagsC.writeData(r.flags); compBitFlagsC.writeData(r.getCompressionFlags()); if (refContext.isMultiRef()) { refIdCodec.writeData(r.sequenceId); } readLengthC.writeData(r.readLength); if (AP_delta) { final int alignmentDelta = r.alignmentStart - prevAlignmentStart; alStartC.writeData(alignmentDelta); } else { alStartC.writeData(r.alignmentStart); } readGroupC.writeData(r.readGroupID); if (captureReadNames) { readNameC.writeData(r.readName.getBytes(charset)); } // mate record: if (r.isDetached()) { mateBitFlagsCodec.writeData(r.getMateFlags()); if (!captureReadNames) { readNameC.writeData(r.readName.getBytes(charset)); } nextFragmentReferenceSequenceIDCodec.writeData(r.mateSequenceID); nextFragmentAlignmentStart.writeData(r.mateAlignmentStart); templateSize.writeData(r.templateSize); } else if (r.isHasMateDownStream()) { distanceC.writeData(r.recordsToNextFragment); } // tag records: tagIdListCodec.writeData(r.tagIdsIndex.value); if (r.tags != null) { for (int i = 0; i < r.tags.length; i++) { final DataSeriesWriter writer = tagValueCodecs.get(r.tags[i].keyType3BytesAsInt); writer.writeData(r.tags[i].getValueAsByteArray()); } } if (!r.isSegmentUnmapped()) { // writing read features: numberOfReadFeaturesCodec.writeData(r.readFeatures.size()); int prevPos = 0; for (final ReadFeature f : r.readFeatures) { featuresCodeCodec.writeData(f.getOperator()); featurePositionCodec.writeData(f.getPosition() - prevPos); prevPos = f.getPosition(); switch (f.getOperator()) { case ReadBase.operator: final ReadBase rb = (ReadBase) f; baseCodec.writeData(rb.getBase()); qualityScoreCodec.writeData(rb.getQualityScore()); break; case Substitution.operator: final Substitution sv = (Substitution) f; if (sv.getCode() < 0) baseSubstitutionCodeCodec.writeData(substitutionMatrix.code(sv.getReferenceBase(), sv.getBase())); else baseSubstitutionCodeCodec.writeData(sv.getCode()); // baseSubstitutionCodec.writeData((byte) sv.getBaseChange().getChange()); break; case Insertion.operator: final Insertion iv = (Insertion) f; insertionCodec.writeData(iv.getSequence()); break; case SoftClip.operator: final SoftClip fv = (SoftClip) f; softClipCodec.writeData(fv.getSequence()); break; case HardClip.operator: final HardClip hv = (HardClip) f; hardClipCodec.writeData(hv.getLength()); break; case Padding.operator: final Padding pv = (Padding) f; paddingCodec.writeData(pv.getLength()); break; case Deletion.operator: final Deletion dv = (Deletion) f; deletionLengthCodec.writeData(dv.getLength()); break; case RefSkip.operator: final RefSkip rsv = (RefSkip) f; refSkipCodec.writeData(rsv.getLength()); break; case InsertBase.operator: final InsertBase ib = (InsertBase) f; baseCodec.writeData(ib.getBase()); break; case BaseQualityScore.operator: final BaseQualityScore bqs = (BaseQualityScore) f; qualityScoreCodec.writeData(bqs.getQualityScore()); break; default: throw new RuntimeException("Unknown read feature operator: " + (char) f.getOperator()); } } // mapping quality: mappingQualityScoreCodec.writeData(r.mappingQuality); if (r.isForcePreserveQualityScores()) { qualityScoreArrayCodec.writeData(r.qualityScores); } } else { if (!r.isUnknownBases()) { for (final byte b : r.readBases) { baseCodec.writeData(b); } } if (r.isForcePreserveQualityScores()) { qualityScoreArrayCodec.writeData(r.qualityScores); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy