All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencb.biodata.tools.alignment.converters.AlignmentBiConverter Maven / Gradle / Ivy

/*
 * 
 *
 */

package org.opencb.biodata.tools.alignment.converters;

import htsjdk.samtools.SAMFormatException;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.TagValueAndUnsignedArrayFlag;
import htsjdk.samtools.TextTagCodec;
import htsjdk.samtools.util.StringUtil;
import org.opencb.biodata.tools.commons.BiConverter;

import java.util.Map;

/**
 * Created by pfurio on 25/10/16.
 */
public abstract class AlignmentBiConverter implements BiConverter {

    protected boolean adjustQuality;
    protected static final String FIELD_SEPARATOR = "\t";

    // From SAM specification
    private static final int QNAME_COL = 0;
    private static final int FLAG_COL = 1;
    private static final int RNAME_COL = 2;
    private static final int POS_COL = 3;
    private static final int MAPQ_COL = 4;
    private static final int CIGAR_COL = 5;
    private static final int MRNM_COL = 6;
    private static final int MPOS_COL = 7;
    private static final int ISIZE_COL = 8;
    private static final int SEQ_COL = 9;
    private static final int QUAL_COL = 10;
    private static final int NUM_REQUIRED_FIELDS = 11;


    protected SAMRecord from(String samLine) {
        final String[] fields = new String[1000];
        final int numFields = StringUtil.split(samLine, fields, '\t');
        if (numFields < NUM_REQUIRED_FIELDS) {
            throw new IllegalArgumentException("Not enough fields");
        }
        if (numFields == fields.length) {
            throw new IllegalArgumentException("Too many fields in SAM text record.");
        }
        for (int i = 0; i < numFields; ++i) {
            if (fields[i].isEmpty()) {
                throw new IllegalArgumentException("Empty field at position " + i + " (zero-based)");
            }
        }

        SAMRecord out = new SAMRecord(null);
        if (fields.length > 11) {
            out.setReadName(fields[QNAME_COL]);
            out.setFlags(Integer.valueOf(fields[FLAG_COL]));
            out.setReferenceName(fields[RNAME_COL]);
            out.setAlignmentStart(Integer.valueOf(fields[POS_COL]));
            out.setMappingQuality(Integer.valueOf(fields[MAPQ_COL]));
            out.setCigarString(fields[CIGAR_COL]);
            out.setMateReferenceName(fields[MRNM_COL].equals("=") ? out.getReferenceName() : fields[MRNM_COL]);
            out.setMateAlignmentStart(Integer.valueOf(fields[MPOS_COL]));
            out.setInferredInsertSize(Integer.valueOf(fields[ISIZE_COL]));

            if (!fields[SEQ_COL].equals("*")) {
                out.setReadString(fields[SEQ_COL]);
            } else {
                out.setReadBases(SAMRecord.NULL_SEQUENCE);
            }
            if (!fields[QUAL_COL].equals("*")) {
                out.setBaseQualityString(fields[QUAL_COL]);
            } else {
                out.setBaseQualities(SAMRecord.NULL_QUALS);
            }
        }

        TextTagCodec tagCodec = new TextTagCodec();
        for (int i = NUM_REQUIRED_FIELDS; i < numFields; ++i) {
            Map.Entry entry;
            try {
                entry = tagCodec.decode(fields[i]);
            } catch (SAMFormatException e) {
                throw new IllegalArgumentException("Unable to decode field \"" + fields[i] + "\"", e);
            }
            if (entry != null) {
                if (entry.getValue() instanceof TagValueAndUnsignedArrayFlag) {
                    final TagValueAndUnsignedArrayFlag valueAndFlag = (TagValueAndUnsignedArrayFlag) entry.getValue();
                    if (valueAndFlag.isUnsignedArray) {
                        out.setUnsignedArrayAttribute(entry.getKey(), valueAndFlag.value);
                    } else {
                        out.setAttribute(entry.getKey(), valueAndFlag.value);
                    }
                } else {
                    out.setAttribute(entry.getKey(), entry.getValue());
                }
            }
        }

        return out;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy