All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.ByteMapIntervalPileup Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils;

import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import it.unimi.dsi.fastutil.ints.*;
import org.broadinstitute.hellbender.utils.read.CigarUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.reference.ReferenceBases;

import java.util.*;
import java.util.stream.Collectors;

/**
 * An interval map where bases and qualities are explicitly stored in matrices
 * for fast look-up.
 */
class ByteMapIntervalPileup implements IntervalPileup {

    private final ReferenceBases referenceBases;
    private final List reads;
    private final List elements;
    private final Int2ObjectMap elementByIndex;
    private final Map elementByRead;
    private final int width;
    private final int height;
    private byte[][] bases;
    private byte[][] quals;
    private final List insertsBuffer = new ArrayList<>(10);
    private final IntList insertsBufferOffsets = new IntArrayList(10);


    ByteMapIntervalPileup(final ReferenceBases referenceBases, final List reads) {
        this.referenceBases = referenceBases;
        this.width = referenceBases.getInterval().size();
        final SimpleInterval referenceInterval = referenceBases.getInterval();
        final int referenceStart = referenceInterval.getStart();
        final int referenceEnd = referenceInterval.getEnd();
        this.reads = Collections.unmodifiableList(reads.stream()
                .filter(read -> !read.isUnmapped() && read.getStart() <= referenceEnd && read.getEnd() >= referenceStart)
                .sorted(Comparator.comparingInt(GATKRead::getStart).thenComparing(GATKRead::getEnd).thenComparing(GATKRead::getName))
                .collect(Collectors.toList()));
        this.elements = new ArrayList<>(this.reads.size());
        this.elementByIndex = new Int2ObjectOpenHashMap<>(this.reads.size());
        this.elementByRead = new HashMap<>(this.reads.size());
        this.height = this.reads.size();
        bases = new byte[height][width];
        quals = new byte[height][width];
        for (int i = 0; i < height; i++) {
            final IntervalPileup.Element element = new Element(this, this.reads.get(i), i);
            elements.add(element);
            elementByIndex.put(i, element);
            elementByRead.put(this.reads.get(i), element);
        }
    }

    @Override
    public List reads() {
        return reads;
    }

    @Override
    public ReferenceBases reference() {
        return referenceBases;
    }

    @Override
    public int width() {
        return width;
    }

    @Override
    public int height() {
        return height;
    }

    @Override
    public byte baseAt(final int row, final int column) {
        return bases[row][column];
    }

    @Override
    public byte qualAt(int row, int column) {
        return quals[row][column];
    }

    @Override
    public IntervalPileup.Insert insertAt(final int row, final int column) {
        final IntervalPileup.Element element = elementByIndex.get(row);
        return element != null ? element.insertAt(column) : null;
    }

    @Override
    public GATKRead readAt(int row, int column) {
        final IntervalPileup.Element element = elements.get(row);
        if (element.minColumn() > column || element.maxColumn() < column) {
            return null;
        } else {
            return element.read();
        }
    }

    @Override
    public List readsAt(int row, int column) {
        final GATKRead read = readAt(row, column);
        return read != null ? Collections.singletonList(read) : Collections.emptyList();
    }

    @Override
    public IntervalPileup.Element element(final GATKRead read) {
        return elementByRead.get(read);
    }

    @Override
    public boolean hasInsertAt(final int row, final int column) {
        final IntervalPileup.Element element = elements.get(row);
        return element.hasInsertAt(column);
    }

    @Override
    public byte[] insertBasesAt(final int row, final int column) {
        final IntervalPileup.Element element = elements.get(row);
        return element.insertBasesAt(column);
    }

    @Override
    public byte[] insertQualsAt(final int row, final int column) {
        final IntervalPileup.Element element = elements.get(row);
        return element.insertQualsAt(column);
    }

    static class Insert implements  IntervalPileup.Insert {
        private final GATKRead enclosingRead;
        private final int offset;
        private final int length;
        private final int column;
        private transient int hashCode = 0;

        Insert(final GATKRead enclosingRead, final int offset, final int column, final int length) {
            this.enclosingRead = enclosingRead;
            this.offset = offset;
            this.length = length;
            this.column = column;
        }

        public int column() {
            return column;
        }

        @Override
        public int length() {
            return length;
        }

        @Override
        public byte[] bases() {
            final byte[] result = new byte[length];
            final int copied = enclosingRead.copyBases(offset, result, 0, length);
            if (copied < length) {
                Arrays.fill(result, copied, result.length, (byte) 'N');
            }
            return result;
        }

        @Override
        public byte[] quals() {
            final byte[] result = new byte[length];
            final int copied = enclosingRead.copyBaseQualities(offset, result, 0, length);
            if (copied < length) {
                Arrays.fill(result, copied, result.length, NO_BQ);
            }
            return result;
        }

        @Override
        public int copyBases(int offset, byte[] dest, int destOffset, int maxLength) {
            final int actualMaxLength = Math.min(length, maxLength);
            final int copied = enclosingRead.copyBases(this.offset + offset, dest, destOffset, actualMaxLength);
            if (copied < actualMaxLength) {
                Arrays.fill(dest, destOffset + copied, destOffset + actualMaxLength, (byte) 'N');
            }
            return actualMaxLength;
        }

        @Override
        public int copyQuals(int offset, byte[] dest, int destOffset, int maxLength) {
            final int actualMaxLength = Math.min(length, maxLength);
            final int copied = enclosingRead.copyBaseQualities(this.offset + offset, dest, destOffset, actualMaxLength);
            if (copied < actualMaxLength) {
                Arrays.fill(dest, destOffset + copied, destOffset + actualMaxLength, (byte) 'N');
            }
            return actualMaxLength;
        }

        @Override
        public int hashCode() {
            if (hashCode == 0) {
                final byte[] readBases = enclosingRead.getBasesNoCopy();
                if (readBases != null && readBases.length > offset) {
                    final int to = offset + length;
                    final int to2 = to <= readBases.length ? to : readBases.length;
                    int i;
                    hashCode = 1;
                    for (i = offset; i < to2; i++) {
                        hashCode = hashCode * 31 + readBases[i];
                    }
                    for (; i < to; i++) {
                        hashCode = hashCode * 31 + 'N';
                    }
                }
            }
            return hashCode;
        }

        @Override
        public boolean equals(final Object other) {
            return other == this || (other instanceof Insert && equals((IntervalPileup.Insert) other));
        }

        private boolean equals(final IntervalPileup.Insert other) {
            return length == other.length() && hashCode() == other.hashCode() &&
                    equalBases(other) && equalQualities(other);
        }

        private boolean equalBases(final IntervalPileup.Insert other) {
            if (other instanceof Insert) {
                equalBases((Insert) other);
            }
            final byte[] otherBases = other.bases();
            final byte[] readBases = enclosingRead.getBasesNoCopy();
            for (int i = 0; i < otherBases.length; i++) {
                if (otherBases[i] != readBases[offset + i]) {
                    return false;
                }
            }
            return true;
        }

        private boolean equalBases(final Insert other) {
            final byte[] otherBases = other.enclosingRead.getBasesNoCopy();
            final byte[] thisBases = enclosingRead.getBasesNoCopy();
            for (int i = 0; i < length; i++) {
                if (otherBases[other.offset + i] != thisBases[offset + i]) {
                    return false;
                }
            }
            return true;
        }

        private boolean equalQualities(final IntervalPileup.Insert other) {
            if (other instanceof Insert) {
                equalQualities((Insert) other);
            }
            final byte[] otherQuals = other.quals();
            final byte[] thisQuals = enclosingRead.getBaseQualitiesNoCopy();
            for (int i = 0; i < otherQuals.length; i++) {
                if (otherQuals[i] != thisQuals[offset + i]) {
                    return false;
                }
            }
            return true;
        }

        private boolean equalQualities(final Insert other) {
            final byte[] otherQuals = other.enclosingRead.getBaseQualitiesNoCopy();
            final byte[] thisQuals = enclosingRead.getBaseQualitiesNoCopy();
            for (int i = 0; i < length; i++) {
                if (otherQuals[other.offset + i] != thisQuals[offset + i]) {
                    return false;
                }
            }
            return true;
        }

        @Override
        public String toString() {
            return new String(bases()) + "/" + new String(quals());
        }
    }

    static class Element implements IntervalPileup.Element {

        private final GATKRead read;
        private final int row;
        private final int minColumn;
        private final int maxColumn;
        private final Int2ObjectMap inserts;
        private final ByteMapIntervalPileup pileup;

        private Element(final ByteMapIntervalPileup pileup, final GATKRead read, final int row) {
            this.pileup = pileup;
            this.row = row;
            this.read = read;
            final Cigar cigar = read.getCigar();
            final int referenceWidth = cigar.getReferenceLength();
            int referenceOffset = read.getStart() - pileup.referenceBases.getInterval().getStart();
            int readOffset = 0;
            minColumn = Math.max(0, referenceOffset);
            maxColumn = Math.min(pileup.width - 1, referenceOffset + referenceWidth - 1);
            Arrays.fill(pileup.bases[row], 0, minColumn, NO_BASE);
            Arrays.fill(pileup.bases[row], maxColumn + 1, pileup.width, NO_BASE);
            Arrays.fill(pileup.quals[row], 0, minColumn, NO_BQ);
            Arrays.fill(pileup.quals[row], maxColumn + 1, pileup.width, NO_BQ);
            final List cigarElements = cigar.getCigarElements();
            pileup.insertsBuffer.clear();
            pileup.insertsBufferOffsets.clear();
            int i;
            for (i = 0; referenceOffset <= maxColumn + 1 &&  i < cigarElements.size(); i++) {
                final CigarElement element = cigarElements.get(i);
                final CigarOperator op = element.getOperator();
                final int length = element.getLength();
                final int newReferenceOffset = referenceOffset + (op.consumesReferenceBases() ? length : 0);
                final int newReadOffset = readOffset + (op.consumesReadBases() ? length : 0);
                if (newReferenceOffset >= minColumn) {
                    if (op.isAlignment()) {
                        final int leftOverhang = referenceOffset < minColumn ? minColumn - referenceOffset : 0;
                        final int from = referenceOffset + leftOverhang;
                        final int len = newReferenceOffset > maxColumn ? maxColumn + 1 - from :
                                  length - leftOverhang;
                        final int readFrom = readOffset + leftOverhang;
                        final int copiedBases = read.copyBases(readFrom, pileup.bases[row], from, len);
                        final int copiedQuals = read.copyBaseQualities(readFrom, pileup.quals[row], from, len);
                        if (copiedBases < len) {
                            Arrays.fill(pileup.bases[row], from + copiedBases, from + len - copiedBases, (byte) 'N');
                        }
                        if (copiedQuals < len) {
                            Arrays.fill(pileup.quals[row], from + copiedQuals, from + len - copiedQuals, NO_BQ);
                        }
                    } else if (op == CigarOperator.I) {
                        pileup.insertsBuffer.add(new Insert(read, readOffset, referenceOffset - 1, length));
                        pileup.insertsBufferOffsets.add(referenceOffset - 1);
                    } else if (op == CigarOperator.D || op == CigarOperator.N) {
                        final int from = referenceOffset < minColumn ? minColumn : referenceOffset;
                        final int len = (newReferenceOffset > maxColumn ? maxColumn + 1 : newReferenceOffset) - from;
                        Arrays.fill(pileup.bases[row], from, from + len, GAP);
                        Arrays.fill(pileup.quals[row], from, from + len, NO_BQ);
                    }
                }
                readOffset = newReadOffset;
                referenceOffset = newReferenceOffset;
            }
            // merge adjacent inserts if any.
            mergeAdjacentInserts(read, pileup.insertsBuffer, pileup.insertsBufferOffsets);
            inserts = consolidateInserts(pileup.insertsBuffer, pileup.insertsBufferOffsets);
        }

        private static Int2ObjectMap consolidateInserts(final List inserts, final IntList offsets) {
            final int size = inserts.size();
            if (size == 0) {
                return Int2ObjectMaps.emptyMap();
            } else if (size == 1) {
                return Int2ObjectMaps.singleton(offsets.get(0) , inserts.get(0));
            } else {
                final Int2ObjectMap result = size < 5 ? new Int2ObjectArrayMap<>(size)
                        : new Int2ObjectOpenHashMap<>(size);
                for (int ins = 0; ins < size; ins++) {
                    result.put(offsets.get(ins) , inserts.get(ins));
                }
                return result;
            }
        }

        private static void mergeAdjacentInserts(final GATKRead read, final List inserts, final IntList offsets) {
            int i;
            for (i = inserts.size() - 1; i > 0; i--) {
                if (offsets.get(i) == offsets.get(i-1)) {
                    final Insert left = inserts.get(i-1);
                    final Insert right = inserts.get(i);
                    final Insert merged = new Insert(read, left.offset, left.column,left.length + right.length);
                    inserts.remove(i);
                    inserts.set(i - 1, merged);
                    offsets.remove(i);
                }
            }
        }

        @Override
        public GATKRead read() {
            return read;
        }

        @Override
        public int row() {
            return row;
        }

        @Override
        public int minColumn() {
            return minColumn;
        }

        @Override
        public int maxColumn() {
            return maxColumn;
        }


        @Override
        public boolean hasInsertAt(final int column) {
            return inserts.containsKey(column);
        }

        @Override
        public int insertSize(final int column) {
            final Insert insert = inserts.get(column);
            return insert != null ? insert.length() : 0;
        }

        @Override
        public int copyInsertQuals(final int column, final byte[] dest, final int offset, final int maxLength) {
            Utils.nonNull(dest);
            Utils.validIndex(offset, dest.length);
            Utils.validIndex(offset + maxLength, dest.length);
            final Insert insert = inserts.get(column);
            if (maxLength != 0 && insert != null) {
                return insert.copyQuals(0, dest, offset, maxLength);
            } else {
                return 0;
            }
        }

        @Override
        public int copyInsertBases(final int column, final byte[] dest, final int offset, final int maxLength) {
            Utils.nonNull(dest);
            Utils.validIndex(offset, dest.length);
            Utils.validIndex(offset + maxLength, dest.length);
            final Insert insert = inserts.get(column);
            if (maxLength != 0 && insert != null) {
                return insert.copyBases(0, dest, offset, maxLength);
            } else {
                return 0;
            }
        }

        @Override
        public byte[] insertBasesAt(final int column) {
            final Insert insert = inserts.get(column);
            return insert != null ? insert.bases() : null;
        }

        @Override
        public byte[] insertQualsAt(final int column) {
            final Insert insert = inserts.get(column);
            return insert != null ? insert.quals() : null;
        }

        @Override
        public Insert insertAt(final int column) {
            return inserts.get(column);
        }

        @Override
        public List inserts() {
            return new ArrayList<>(inserts.values());
        }

        @Override
        public byte baseAt(final int column) {
            return pileup.bases[row][column];
        }

        @Override
        public byte qualAt(final int column) {
            return pileup.quals[row][column];
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy