org.seqdoop.hadoop_bam.SplittingBAMIndex Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-bam Show documentation
Show all versions of hadoop-bam Show documentation
A Java library for the manipulation of files in common bioinformatics formats using the Hadoop MapReduce framework.
// Copyright (c) 2010 Aalto University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
// File created: 2010-08-04 13:11:10
package org.seqdoop.hadoop_bam;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableSet;
import java.util.TreeSet;
/** An index into BAM files, for {@link BAMInputFormat}. Reads files that are
* created by {@link SplittingBAMIndexer}.
*
* Indexes the positions of individual BAM records in the file.
*/
public final class SplittingBAMIndex {
private final NavigableSet virtualOffsets = new TreeSet();
public SplittingBAMIndex() {}
public SplittingBAMIndex(final File path) throws IOException {
this(new BufferedInputStream(new FileInputStream(path)));
}
public SplittingBAMIndex(final InputStream in) throws IOException {
readIndex(in);
}
public void readIndex(final InputStream in) throws IOException {
virtualOffsets.clear();
final ByteBuffer bb = ByteBuffer.allocate(8);
for (long prev = -1; in.read(bb.array()) == 8;) {
final long cur = bb.getLong(0);
if (prev > cur)
throw new IOException(String.format(
"Invalid splitting BAM index; offsets not in order: %#x > %#x",
prev, cur));
virtualOffsets.add(prev = cur);
}
in.close();
if (virtualOffsets.size() < 1)
throw new IOException(
"Invalid splitting BAM index: "+
"should contain at least the file size");
}
public List getVirtualOffsets() {
return new ArrayList<>(virtualOffsets);
}
public Long prevAlignment(final long filePos) {
return virtualOffsets.floor(filePos << 16);
}
public Long nextAlignment(final long filePos) {
return virtualOffsets.higher(filePos << 16);
}
public int size() { return virtualOffsets.size(); }
private long first() { return virtualOffsets.first(); }
private long last() { return prevAlignment(bamSize() - 1); }
long bamSize() { return virtualOffsets.last() >>> 16; }
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
SplittingBAMIndex that = (SplittingBAMIndex) o;
return virtualOffsets != null ? virtualOffsets.equals(that.virtualOffsets) : that
.virtualOffsets == null;
}
@Override
public int hashCode() {
return virtualOffsets != null ? virtualOffsets.hashCode() : 0;
}
@Override
public String toString() {
return virtualOffsets.toString();
}
/** Writes some statistics about each splitting BAM index file given as an
* argument.
*/
public static void main(String[] args) {
if (args.length == 0) {
System.out.println(
"Usage: SplittingBAMIndex [splitting BAM indices...]\n\n"+
"Writes a few statistics about each splitting BAM index.");
return;
}
for (String arg : args) {
final File f = new File(arg);
if (f.isFile() && f.canRead()) {
try {
System.err.printf("%s:\n", f);
final SplittingBAMIndex bi = new SplittingBAMIndex(f);
if (bi.size() == 1) {
System.err.printf("\t0 alignments\n" +
"\tassociated BAM file size %d\n", bi.bamSize());
} else {
final long first = bi.first();
final long last = bi.last();
System.err.printf(
"\t%d alignments\n" +
"\tfirst is at %#06x in BGZF block at %#014x\n" +
"\tlast is at %#06x in BGZF block at %#014x\n" +
"\tassociated BAM file size %d\n",
bi.size(),
first & 0xffff, first >>> 16,
last & 0xffff, last >>> 16,
bi.bamSize());
}
} catch (IOException e) {
System.err.printf("Failed to read %s!\n", f);
e.printStackTrace();
}
} else
System.err.printf("%s does not look like a readable file!\n", f);
}
}
}