Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools;
import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.SAMValidationError.Type;
import htsjdk.samtools.util.DateParser;
import htsjdk.samtools.util.LineReader;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.StringUtil;
import htsjdk.samtools.util.Log;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
/**
* Parser for a SAM text header, and a generator of SAM text header.
*/
public class SAMTextHeaderCodec {
private static final String HEADER_LINE_START = "@";
// These attributes are populated when parsing or generating
private SAMFileHeader mFileHeader;
private final TextTagCodec mTagCodec = new TextTagCodec();
// These attributes are populated when parsing text
private String mCurrentLine;
private LineReader mReader;
private String mSource;
private List sequences;
private List readGroups;
// For error reporting when parsing
private ValidationStringency validationStringency = ValidationStringency.SILENT;
// These attributes are populated when generating text
private Writer writer;
private static final String TAG_KEY_VALUE_SEPARATOR = ":";
private static final char TAG_KEY_VALUE_SEPARATOR_CHAR = ':';
private static final String FIELD_SEPARATOR = "\t";
private static final char FIELD_SEPARATOR_CHAR = '\t';
private static final Pattern FIELD_SEPARATOR_RE = Pattern.compile(FIELD_SEPARATOR);
public static final String COMMENT_PREFIX = HEADER_LINE_START + HeaderRecordType.CO.name() + FIELD_SEPARATOR;
private static final Log log = Log.getInstance(SAMTextHeaderCodec.class);
void setWriter(final Writer writer) {
this.writer = writer;
}
void setmFileHeader(final SAMFileHeader header) {
this.mFileHeader = header;
}
/**
* Reads text SAM header and converts to a SAMFileHeader object.
* @param reader Where to get header text from.
* @param source Name of the input file, for error messages. May be null.
* @return complete header object.
*/
public SAMFileHeader decode(final LineReader reader, final String source) {
mFileHeader = new SAMFileHeader();
mReader = reader;
mSource = source;
sequences = new ArrayList<>();
readGroups = new ArrayList<>();
while (advanceLine() != null) {
final ParsedHeaderLine parsedHeaderLine = new ParsedHeaderLine(mCurrentLine);
if (!parsedHeaderLine.isLineValid()) {
continue;
}
switch (parsedHeaderLine.getHeaderRecordType()) {
case HD:
parseHDLine(parsedHeaderLine);
break;
case PG:
parsePGLine(parsedHeaderLine);
break;
case RG:
parseRGLine(parsedHeaderLine);
break;
case SQ:
parseSQLine(parsedHeaderLine);
break;
case CO:
mFileHeader.addComment(mCurrentLine);
break;
default:
throw new IllegalStateException("Unrecognized header record type: " +
parsedHeaderLine.getHeaderRecordType());
}
}
mFileHeader.setSequenceDictionary(new SAMSequenceDictionary(sequences));
mFileHeader.setReadGroups(readGroups);
SAMUtils.processValidationErrors(mFileHeader.getValidationErrors(), -1, validationStringency);
return mFileHeader;
}
private String advanceLine() {
final int nextChar = mReader.peek();
this.mCurrentLine = (nextChar == '@') ? mReader.readLine() : null;
return this.mCurrentLine;
}
/**
* Transfer standard and non-standard tags from text representation to in-memory representation.
* All values are now stored as Strings.
* @param record attributes get set into this object.
* @param textAttributes Map of tag type to value. Some values may be removed by this method.
*/
private void transferAttributes(final AbstractSAMHeaderRecord record, final Map textAttributes) {
// All header tags are now of type String, so no need to distinguish standard from non-standard.
for (final Map.Entry entry : textAttributes.entrySet()) {
record.setAttribute(entry.getKey(), entry.getValue());
}
}
private void parsePGLine(final ParsedHeaderLine parsedHeaderLine) {
assert(HeaderRecordType.PG.equals(parsedHeaderLine.getHeaderRecordType()));
if (!parsedHeaderLine.requireTag(SAMProgramRecord.PROGRAM_GROUP_ID_TAG)) {
return;
}
final SAMProgramRecord programRecord = new SAMProgramRecord(parsedHeaderLine.removeValue(SAMProgramRecord.PROGRAM_GROUP_ID_TAG));
transferAttributes(programRecord, parsedHeaderLine.mKeyValuePairs);
mFileHeader.addProgramRecord(programRecord);
}
private void parseRGLine(final ParsedHeaderLine parsedHeaderLine) {
assert(HeaderRecordType.RG.equals(parsedHeaderLine.getHeaderRecordType()));
if (!parsedHeaderLine.requireTag(SAMReadGroupRecord.READ_GROUP_ID_TAG)) {
return;
}
// Allow no SM tag if validation stringency is not strict. This call has the side effect of reporting an error
// or throwing an exception depending on validation stringency if this is missing.
parsedHeaderLine.requireTag(SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG);
final SAMReadGroupRecord samReadGroupRecord = new SAMReadGroupRecord(parsedHeaderLine.removeValue(SAMReadGroupRecord.READ_GROUP_ID_TAG));
transferAttributes(samReadGroupRecord, parsedHeaderLine.mKeyValuePairs);
// Convert non-String attributes to the appropriate types
final String predictedMedianInsertSize =
(String)samReadGroupRecord.getAttribute(SAMReadGroupRecord.PREDICTED_MEDIAN_INSERT_SIZE_TAG);
if (predictedMedianInsertSize != null) {
try {
Integer.parseInt(predictedMedianInsertSize);
samReadGroupRecord.setAttribute(SAMReadGroupRecord.PREDICTED_MEDIAN_INSERT_SIZE_TAG,predictedMedianInsertSize);
} catch (NumberFormatException e) {
reportErrorParsingLine(SAMReadGroupRecord.PREDICTED_MEDIAN_INSERT_SIZE_TAG +
" is not numeric: " + predictedMedianInsertSize, SAMValidationError.Type.INVALID_PREDICTED_MEDIAN_INSERT_SIZE,
e);
}
}
final String dateRunProduced = (String)samReadGroupRecord.getAttribute(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG);
if (dateRunProduced != null) {
Object date;
try {
date = mTagCodec.decodeDate(dateRunProduced);
} catch (DateParser.InvalidDateException e) {
// Can't convert date string into Date object. Treat it as a string if validation
// stringency allows it.
date = dateRunProduced;
reportErrorParsingLine(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG + " tag value '" +
dateRunProduced + "' is not parseable as a date", SAMValidationError.Type.INVALID_DATE_STRING,
e);
}
samReadGroupRecord.setAttribute(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG, date.toString());
}
readGroups.add(samReadGroupRecord);
}
private void parseSQLine(final ParsedHeaderLine parsedHeaderLine) {
assert(HeaderRecordType.SQ.equals(parsedHeaderLine.getHeaderRecordType()));
if (!parsedHeaderLine.requireTag(SAMSequenceRecord.SEQUENCE_NAME_TAG) ||
!parsedHeaderLine.requireTag(SAMSequenceRecord.SEQUENCE_LENGTH_TAG)) {
return;
}
String sequenceName = parsedHeaderLine.removeValue(SAMSequenceRecord.SEQUENCE_NAME_TAG);
sequenceName = SAMSequenceRecord.truncateSequenceName(sequenceName);
final SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord(sequenceName,
Integer.parseInt(parsedHeaderLine.removeValue(SAMSequenceRecord.SEQUENCE_LENGTH_TAG)));
transferAttributes(samSequenceRecord, parsedHeaderLine.mKeyValuePairs);
sequences.add(samSequenceRecord);
}
private void parseHDLine(final ParsedHeaderLine parsedHeaderLine) {
assert(HeaderRecordType.HD.equals(parsedHeaderLine.getHeaderRecordType()));
if (!parsedHeaderLine.requireTag(SAMFileHeader.VERSION_TAG)) {
return;
}
final String soString = parsedHeaderLine.getValue(SAMFileHeader.SORT_ORDER_TAG);
try {
if (soString != null) SortOrder.valueOf(soString);
} catch (IllegalArgumentException e) {
reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() +
" line has non-conforming SO tag value: " + soString + ".",
SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null);
}
final String goString = parsedHeaderLine.getValue(SAMFileHeader.GROUP_ORDER_TAG);
try {
if (goString != null) SAMFileHeader.GroupOrder.valueOf(goString);
} catch (IllegalArgumentException e) {
reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() +
" line has non-conforming GO tag value: "+ goString + ".",
SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null);
}
transferAttributes(mFileHeader, parsedHeaderLine.mKeyValuePairs);
}
private void reportErrorParsingLine(String reason, final SAMValidationError.Type type, final Throwable nestedException) {
reason = "Error parsing SAM header. " + reason + ". Line:\n" + mCurrentLine;
if (validationStringency != ValidationStringency.STRICT) {
final SAMValidationError error = new SAMValidationError(type, reason, null, mReader.getLineNumber());
error.setSource(mSource);
mFileHeader.addValidationError(error);
} else {
String fileMessage = "";
if (mSource != null) {
fileMessage = "File " + mSource;
}
throw new SAMFormatException(reason + "; " + fileMessage +
"; Line number " + mReader.getLineNumber(), nestedException);
}
}
private enum HeaderRecordType {
HD, SQ, RG, PG, CO
}
/**
* Takes a header line as a String and converts it into a HeaderRecordType, and a map of key:value strings.
* If the line does not contain a recognized HeaderRecordType, then the line is considered invalid, and will
* not have any key:value pairs.
*/
private class ParsedHeaderLine {
private HeaderRecordType mHeaderRecordType;
private final Map mKeyValuePairs = new LinkedHashMap();
private boolean lineValid = false;
ParsedHeaderLine(final String line) {
assert(line.startsWith(HEADER_LINE_START));
// Tab-separate
String[] fields = new String[1024];
int numFields = StringUtil.split(line, fields, FIELD_SEPARATOR_CHAR);
if (numFields == fields.length) {
// Lots of fields, so fall back
fields = FIELD_SEPARATOR_RE.split(line);
numFields = fields.length;
}
// Parse the HeaderRecordType
try {
mHeaderRecordType = HeaderRecordType.valueOf(fields[0].substring(1));
} catch (IllegalArgumentException e) {
reportErrorParsingLine("Unrecognized header record type", SAMValidationError.Type.UNRECOGNIZED_HEADER_TYPE, null);
mHeaderRecordType = null;
return;
}
// Do not parse key:value pairs for comment lines.
if (mHeaderRecordType == HeaderRecordType.CO) {
lineValid = true;
return;
}
final String[] keyAndValue = new String[2];
// Parse they key:value pairs
for (int i = 1; i < numFields; ++i) {
if (StringUtil.splitConcatenateExcessTokens(fields[i], keyAndValue, TAG_KEY_VALUE_SEPARATOR_CHAR) != 2) {
reportErrorParsingLine("Problem parsing " + HEADER_LINE_START + mHeaderRecordType +
" key:value pair", SAMValidationError.Type.POORLY_FORMATTED_HEADER_TAG, null);
continue;
}
if (mKeyValuePairs.containsKey(keyAndValue[0]) &&
! mKeyValuePairs.get(keyAndValue[0]).equals(keyAndValue[1])) {
reportErrorParsingLine("Problem parsing " + HEADER_LINE_START + mHeaderRecordType +
" key:value pair " + keyAndValue[0] + ":" + keyAndValue[1] +
" clashes with " + keyAndValue[0] + ":" + mKeyValuePairs.get(keyAndValue[0]),
SAMValidationError.Type.HEADER_TAG_MULTIPLY_DEFINED, null);
continue;
}
if (keyAndValue[0].length() != 2) {
reportErrorParsingLine("Problem parsing " + HEADER_LINE_START + mHeaderRecordType +
" key:value pair " + keyAndValue[0] + ": key is not two characters",
SAMValidationError.Type.HEADER_TAG_INVALID_KEY, null);
continue;
}
validateSortOrderValue(keyAndValue);
mKeyValuePairs.put(keyAndValue[0], keyAndValue[1]);
}
lineValid = true;
}
private void validateSortOrderValue(String[] value) {
if (SAMFileHeader.SORT_ORDER_TAG.equals(value[0])) {
try {
SortOrder.valueOf(value[1]);
} catch (IllegalArgumentException e) {
if (validationStringency == ValidationStringency.STRICT) {
throw new SAMFormatException("Found non-conforming header SO tag: "
+ value[1]
+ ", exiting because VALIDATION_STRINGENCY=STRICT");
} else if (validationStringency == ValidationStringency.LENIENT) {
log.warn("Found non-conforming header SO tag: "
+ value[1] + ". Treating as 'unknown'.");
}
value[1] = SortOrder.unknown.toString();
}
}
}
/**
* True if the line is recognized as one of the valid HeaderRecordTypes.
*/
public boolean isLineValid() {
return lineValid;
}
/**
* Handling depends on the validation stringency. If the tag is not present, and stringency is strict,
* an exception is thrown. If stringency is not strict, false is returned.
* @param tag Must be present for the line to be considered value.
* @return True if tag is present.
*/
boolean requireTag(final String tag) {
if (!mKeyValuePairs.containsKey(tag)) {
reportErrorParsingLine(HEADER_LINE_START + mHeaderRecordType + " line missing " + tag + " tag",
SAMValidationError.Type.HEADER_RECORD_MISSING_REQUIRED_TAG, null);
return false;
}
return true;
}
/**
* @return null if line is invalid, otherwise the parsed HeaderRecordType
*/
public HeaderRecordType getHeaderRecordType() {
return mHeaderRecordType;
}
boolean containsKey(final String key) {
return mKeyValuePairs.containsKey(key);
}
String getValue(final String key) {
return mKeyValuePairs.get(key);
}
String removeValue(final String key) {
final String ret = mKeyValuePairs.get(key);
mKeyValuePairs.remove(key);
return ret;
}
}
/**
* Convert SAMFileHeader from in-memory representation to text representation. Always writes
* SAMFileHeader.CURRENT_VERSION as the version in the header.
* @param writer where to write the header text.
* @param header object to be converted to text.
*/
public void encode(final Writer writer, final SAMFileHeader header) {
encode(writer, header, false);
}
/**
* Convert SAMFileHeader from in-memory representation to text representation.
* @param writer where to write the header text.
* @param header object to be converted to text.
* @param keepExistingVersionNumber If true, writes whatever version # was in the header. If false, writes
* SAMFileHeader.CURRENT_VERSION.
*/
public void encode(final Writer writer, final SAMFileHeader header, final boolean keepExistingVersionNumber) {
mFileHeader = header;
this.writer = new BufferedWriter(writer);
writeHDLine(keepExistingVersionNumber);
for (final SAMSequenceRecord sequenceRecord: header.getSequenceDictionary().getSequences()) {
writeSQLine(sequenceRecord);
}
for (final SAMReadGroupRecord readGroup : header.getReadGroups()) {
writeRGLine(readGroup);
}
for (final SAMProgramRecord programRecord : header.getProgramRecords()) {
writePGLine(programRecord);
}
for (final String comment : header.getComments()) {
println(comment);
}
try {
this.writer.flush();
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
/**
* Encode {@link SAMSequenceRecord}.
* Designed for using in {@link SAMSequenceDictionaryCodec}, allows to implement recording on the fly.
* @throws IllegalStateException, if writer is null.
*/
void encodeSequenceRecord(final SAMSequenceRecord sequenceRecord) {
if (writer == null) {
throw new IllegalStateException("writer couldn't be null");
}
writeSQLine(sequenceRecord);
}
/**
* Encode HD line.
* Designed for using in {@link SAMSequenceDictionaryCodec}, allows to implement recording on the fly.
* @throws IllegalStateException, if writer is null.
*/
void encodeHeaderLine(final boolean keepExistingVersionNumber) {
if (writer == null) {
throw new IllegalStateException("writer couldn't be null");
}
writeHDLine(keepExistingVersionNumber);
}
private void println(final String s) {
try {
writer.append(s);
writer.append("\n");
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
private void writePGLine(final SAMProgramRecord programRecord) {
println(getPGLine(programRecord));
}
protected String getPGLine(final SAMProgramRecord programRecord) {
final String[] fields = new String[2 + programRecord.getAttributes().size()];
fields[0] = HEADER_LINE_START + HeaderRecordType.PG;
fields[1] = SAMProgramRecord.PROGRAM_GROUP_ID_TAG + TAG_KEY_VALUE_SEPARATOR + programRecord.getProgramGroupId();
encodeTags(programRecord, fields, 2);
return StringUtil.join(FIELD_SEPARATOR, fields);
}
private void writeRGLine(final SAMReadGroupRecord readGroup) {
println(getRGLine(readGroup));
}
protected String getRGLine(final SAMReadGroupRecord readGroup) {
final String[] fields = new String[2 + readGroup.getAttributes().size()];
fields[0] = HEADER_LINE_START + HeaderRecordType.RG;
fields[1] = SAMReadGroupRecord.READ_GROUP_ID_TAG + TAG_KEY_VALUE_SEPARATOR + readGroup.getReadGroupId();
encodeTags(readGroup, fields, 2);
return StringUtil.join(FIELD_SEPARATOR, fields);
}
private void writeHDLine(final boolean keepExistingVersionNumber) {
final SAMFileHeader newHeader;
if (keepExistingVersionNumber) {
newHeader = mFileHeader;
} else {
// Make a copy of the header, excluding the version from the input header, so that
// output get CURRENT_VERSION instead of whatever the version of the input header was.
newHeader = new SAMFileHeader();
for (final Map.Entry entry : mFileHeader.getAttributes()) {
if (!entry.getKey().equals(SAMFileHeader.VERSION_TAG)) {
newHeader.setAttribute(entry.getKey(), entry.getValue());
}
}
}
final String[] fields = new String[1 + newHeader.getAttributes().size()];
fields[0] = HEADER_LINE_START + HeaderRecordType.HD;
encodeTags(newHeader, fields, 1);
println(StringUtil.join(FIELD_SEPARATOR, fields));
}
private void writeSQLine(final SAMSequenceRecord sequenceRecord) {
println(getSQLine(sequenceRecord));
}
protected String getSQLine(final SAMSequenceRecord sequenceRecord) {
final int numAttributes = sequenceRecord.getAttributes() != null ? sequenceRecord.getAttributes().size() : 0;
final String[] fields = new String[3 + numAttributes];
fields[0] = HEADER_LINE_START + HeaderRecordType.SQ;
fields[1] = SAMSequenceRecord.SEQUENCE_NAME_TAG + TAG_KEY_VALUE_SEPARATOR + sequenceRecord.getSequenceName();
fields[2] = SAMSequenceRecord.SEQUENCE_LENGTH_TAG + TAG_KEY_VALUE_SEPARATOR + Integer.toString(sequenceRecord.getSequenceLength());
encodeTags(sequenceRecord, fields, 3);
return StringUtil.join(FIELD_SEPARATOR, fields);
}
/**
* Encode all the attributes in the given object as text
* @param rec object containing attributes, and knowledge of which are standard tags
* @param fields where to put the text representation of the tags. Must be big enough to hold all tags.
* @param offset where to start putting text tag representations.
*/
private void encodeTags(final AbstractSAMHeaderRecord rec, final String[] fields, int offset) {
for (final Map.Entry entry: rec.getAttributes()) {
fields[offset++] = mTagCodec.encodeUntypedTag(entry.getKey(), entry.getValue());
}
}
public void setValidationStringency(final ValidationStringency validationStringency) {
if (validationStringency == null) {
throw new IllegalArgumentException("null validationStringency not allowed");
}
this.validationStringency = validationStringency;
}
}