Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* ****************************************************************************
* Copyright 2013 EMBL-EBI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ****************************************************************************
*/
package htsjdk.samtools.cram.encoding.writer;
import htsjdk.samtools.cram.encoding.readfeatures.*;
import htsjdk.samtools.cram.io.BitOutputStream;
import htsjdk.samtools.cram.ref.ReferenceContext;
import htsjdk.samtools.cram.structure.*;
import htsjdk.samtools.cram.structure.Slice;
import java.io.ByteArrayOutputStream;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class CramRecordWriter {
private final DataSeriesWriter bitFlagsC;
private final DataSeriesWriter compBitFlagsC;
private final DataSeriesWriter readLengthC;
private final DataSeriesWriter alStartC;
private final DataSeriesWriter readGroupC;
private final DataSeriesWriter readNameC;
private final DataSeriesWriter distanceC;
private final Map> tagValueCodecs;
private final DataSeriesWriter numberOfReadFeaturesCodec;
private final DataSeriesWriter featurePositionCodec;
private final DataSeriesWriter featuresCodeCodec;
private final DataSeriesWriter baseCodec;
private final DataSeriesWriter qualityScoreCodec;
private final DataSeriesWriter qualityScoreArrayCodec;
private final DataSeriesWriter baseSubstitutionCodeCodec;
private final DataSeriesWriter insertionCodec;
private final DataSeriesWriter softClipCodec;
private final DataSeriesWriter hardClipCodec;
private final DataSeriesWriter paddingCodec;
private final DataSeriesWriter deletionLengthCodec;
private final DataSeriesWriter mappingQualityScoreCodec;
private final DataSeriesWriter mateBitFlagsCodec;
private final DataSeriesWriter nextFragmentReferenceSequenceIDCodec;
private final DataSeriesWriter nextFragmentAlignmentStart;
private final DataSeriesWriter templateSize;
private final DataSeriesWriter tagIdListCodec;
private final DataSeriesWriter refIdCodec;
private final DataSeriesWriter refSkipCodec;
private final Charset charset = Charset.forName("UTF8");
private final boolean captureReadNames;
private final ReferenceContext refContext;
private final SubstitutionMatrix substitutionMatrix;
private final boolean AP_delta;
private final Map encodingMap;
private final BitOutputStream coreBlockOutputStream;
private final Map externalBlockOutputMap;
/**
* Initializes a Cram Record Writer
*
* @param coreOutputStream Core data block bit stream, to be written by non-external Encodings
* @param externalOutputMap External data block byte stream map, to be written by external Encodings
* @param header the associated Cram Compression Header
* @param refContext the reference context to assign to these records
*/
public CramRecordWriter(final BitOutputStream coreOutputStream,
final Map externalOutputMap,
final CompressionHeader header,
final ReferenceContext refContext) {
this.captureReadNames = header.readNamesIncluded;
this.refContext = refContext;
this.substitutionMatrix = header.substitutionMatrix;
this.AP_delta = header.APDelta;
this.encodingMap = header.encodingMap;
this.coreBlockOutputStream = coreOutputStream;
this.externalBlockOutputMap = externalOutputMap;
bitFlagsC = createDataWriter(DataSeries.BF_BitFlags);
compBitFlagsC = createDataWriter(DataSeries.CF_CompressionBitFlags);
readLengthC = createDataWriter(DataSeries.RL_ReadLength);
alStartC = createDataWriter(DataSeries.AP_AlignmentPositionOffset);
readGroupC = createDataWriter(DataSeries.RG_ReadGroup);
readNameC = createDataWriter(DataSeries.RN_ReadName);
distanceC = createDataWriter(DataSeries.NF_RecordsToNextFragment);
numberOfReadFeaturesCodec = createDataWriter(DataSeries.FN_NumberOfReadFeatures);
featurePositionCodec = createDataWriter(DataSeries.FP_FeaturePosition);
featuresCodeCodec = createDataWriter(DataSeries.FC_FeatureCode);
baseCodec = createDataWriter(DataSeries.BA_Base);
qualityScoreCodec = createDataWriter(DataSeries.QS_QualityScore);
baseSubstitutionCodeCodec = createDataWriter(DataSeries.BS_BaseSubstitutionCode);
insertionCodec = createDataWriter(DataSeries.IN_Insertion);
softClipCodec = createDataWriter(DataSeries.SC_SoftClip);
hardClipCodec = createDataWriter(DataSeries.HC_HardClip);
paddingCodec = createDataWriter(DataSeries.PD_padding);
deletionLengthCodec = createDataWriter(DataSeries.DL_DeletionLength);
mappingQualityScoreCodec = createDataWriter(DataSeries.MQ_MappingQualityScore);
mateBitFlagsCodec = createDataWriter(DataSeries.MF_MateBitFlags);
nextFragmentReferenceSequenceIDCodec = createDataWriter(DataSeries.NS_NextFragmentReferenceSequenceID);
nextFragmentAlignmentStart = createDataWriter(DataSeries.NP_NextFragmentAlignmentStart);
templateSize = createDataWriter(DataSeries.TS_InsertSize);
tagIdListCodec = createDataWriter(DataSeries.TL_TagIdList);
refIdCodec = createDataWriter(DataSeries.RI_RefId);
refSkipCodec = createDataWriter(DataSeries.RS_RefSkip);
// special case: re-encodes QS as a byte array
qualityScoreArrayCodec = new DataSeriesWriter<>(DataSeriesType.BYTE_ARRAY, header.encodingMap.get(DataSeries.QS_QualityScore), coreOutputStream, externalOutputMap);
tagValueCodecs = header.tMap.entrySet()
.stream()
.collect(Collectors.toMap(
Map.Entry::getKey,
mapEntry -> new DataSeriesWriter<>(DataSeriesType.BYTE_ARRAY, mapEntry.getValue(), coreOutputStream, externalOutputMap)));
}
/**
* Look up a Data Series in the Cram Compression Header's Encoding Map. If found, create a Data Writer
*
* @param dataSeries Which Data Series to write
* @param The Java data type associated with the Data Series
* @return a Data Writer for the given Data Series, or null if it's not in the encoding map
*/
private DataSeriesWriter createDataWriter(final DataSeries dataSeries) {
if (encodingMap.containsKey(dataSeries)) {
return new DataSeriesWriter<>(dataSeries.getType(), encodingMap.get(dataSeries), coreBlockOutputStream, externalBlockOutputMap);
}
else {
return null;
}
}
/**
* Writes a series of Cram Compression Records, using this class's Encodings
*
* @param records the Cram Compression Records to write
* @param initialAlignmentStart the alignmentStart of the enclosing {@link Slice}, for delta calculation
*/
public void writeCramCompressionRecords(final List records, final int initialAlignmentStart) {
int prevAlignmentStart = initialAlignmentStart;
for (final CramCompressionRecord record : records) {
writeRecord(record, prevAlignmentStart);
prevAlignmentStart = record.alignmentStart;
}
}
/**
* Write a Cram Compression Record, using this class's Encodings
*
* @param r the Cram Compression Record to write
* @param prevAlignmentStart the alignmentStart of the previous record, for delta calculation
*/
private void writeRecord(final CramCompressionRecord r, final int prevAlignmentStart) {
bitFlagsC.writeData(r.flags);
compBitFlagsC.writeData(r.getCompressionFlags());
if (refContext.isMultiRef()) {
refIdCodec.writeData(r.sequenceId);
}
readLengthC.writeData(r.readLength);
if (AP_delta) {
final int alignmentDelta = r.alignmentStart - prevAlignmentStart;
alStartC.writeData(alignmentDelta);
} else {
alStartC.writeData(r.alignmentStart);
}
readGroupC.writeData(r.readGroupID);
if (captureReadNames) {
readNameC.writeData(r.readName.getBytes(charset));
}
// mate record:
if (r.isDetached()) {
mateBitFlagsCodec.writeData(r.getMateFlags());
if (!captureReadNames) {
readNameC.writeData(r.readName.getBytes(charset));
}
nextFragmentReferenceSequenceIDCodec.writeData(r.mateSequenceID);
nextFragmentAlignmentStart.writeData(r.mateAlignmentStart);
templateSize.writeData(r.templateSize);
} else if (r.isHasMateDownStream()) {
distanceC.writeData(r.recordsToNextFragment);
}
// tag records:
tagIdListCodec.writeData(r.tagIdsIndex.value);
if (r.tags != null) {
for (int i = 0; i < r.tags.length; i++) {
final DataSeriesWriter writer = tagValueCodecs.get(r.tags[i].keyType3BytesAsInt);
writer.writeData(r.tags[i].getValueAsByteArray());
}
}
if (!r.isSegmentUnmapped()) {
// writing read features:
numberOfReadFeaturesCodec.writeData(r.readFeatures.size());
int prevPos = 0;
for (final ReadFeature f : r.readFeatures) {
featuresCodeCodec.writeData(f.getOperator());
featurePositionCodec.writeData(f.getPosition() - prevPos);
prevPos = f.getPosition();
switch (f.getOperator()) {
case ReadBase.operator:
final ReadBase rb = (ReadBase) f;
baseCodec.writeData(rb.getBase());
qualityScoreCodec.writeData(rb.getQualityScore());
break;
case Substitution.operator:
final Substitution sv = (Substitution) f;
if (sv.getCode() < 0)
baseSubstitutionCodeCodec.writeData(substitutionMatrix.code(sv.getReferenceBase(), sv.getBase()));
else
baseSubstitutionCodeCodec.writeData(sv.getCode());
// baseSubstitutionCodec.writeData((byte) sv.getBaseChange().getChange());
break;
case Insertion.operator:
final Insertion iv = (Insertion) f;
insertionCodec.writeData(iv.getSequence());
break;
case SoftClip.operator:
final SoftClip fv = (SoftClip) f;
softClipCodec.writeData(fv.getSequence());
break;
case HardClip.operator:
final HardClip hv = (HardClip) f;
hardClipCodec.writeData(hv.getLength());
break;
case Padding.operator:
final Padding pv = (Padding) f;
paddingCodec.writeData(pv.getLength());
break;
case Deletion.operator:
final Deletion dv = (Deletion) f;
deletionLengthCodec.writeData(dv.getLength());
break;
case RefSkip.operator:
final RefSkip rsv = (RefSkip) f;
refSkipCodec.writeData(rsv.getLength());
break;
case InsertBase.operator:
final InsertBase ib = (InsertBase) f;
baseCodec.writeData(ib.getBase());
break;
case BaseQualityScore.operator:
final BaseQualityScore bqs = (BaseQualityScore) f;
qualityScoreCodec.writeData(bqs.getQualityScore());
break;
default:
throw new RuntimeException("Unknown read feature operator: " + (char) f.getOperator());
}
}
// mapping quality:
mappingQualityScoreCodec.writeData(r.mappingQuality);
if (r.isForcePreserveQualityScores()) {
qualityScoreArrayCodec.writeData(r.qualityScores);
}
} else {
if (!r.isUnknownBases()) {
for (final byte b : r.readBases) {
baseCodec.writeData(b);
}
}
if (r.isForcePreserveQualityScores()) {
qualityScoreArrayCodec.writeData(r.qualityScores);
}
}
}
}