![JAR search and dependency download from the Maven repository](/logo.png)
com.google.cloud.genomics.dataflow.readers.bam.BAMIO Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of google-genomics-dataflow Show documentation
Show all versions of google-genomics-dataflow Show documentation
Google Genomics Dataflow pipelines.
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.genomics.dataflow.readers.bam;
import com.google.api.services.storage.Storage;
import htsjdk.samtools.DefaultSAMRecordFactory;
import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.SeekingBAMFileReader;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.seekablestream.SeekableStream;
import java.io.IOException;
import java.util.logging.Logger;
/**
* Utility methods for opening BAM files from GCS storage using HTSJDK.
* For sharding in pipelines we need access to the guts of BAM index,
* so openBAMAndExposeIndex provides a convenient way to get both SamReader and
* a stream for an index file.
*/
public class BAMIO {
public static final String BAM_INDEX_FILE_MIME_TYPE = "application/octet-stream";
public static class ReaderAndIndex {
public SamReader reader;
public SeekableStream index;
}
private static final Logger LOG = Logger.getLogger(BAMIO.class.getName());
public static ReaderAndIndex openBAMAndExposeIndex(Storage.Objects storageClient, String gcsStoragePath, ValidationStringency stringency) throws IOException {
ReaderAndIndex result = new ReaderAndIndex();
result.index = openIndexForPath(storageClient, gcsStoragePath);
result.reader = openBAMReader(
openBAMFile(storageClient, gcsStoragePath,result.index), stringency, false, 0);
return result;
}
public static SamReader openBAM(Storage.Objects storageClient, String gcsStoragePath,
ValidationStringency stringency, boolean includeFileSource) throws IOException {
return openBAMReader(openBAMFile(storageClient, gcsStoragePath,
openIndexForPath(storageClient, gcsStoragePath)), stringency, includeFileSource, 0);
}
public static SamReader openBAM(Storage.Objects storageClient, String gcsStoragePath,
ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException {
return openBAMReader(openBAMFile(storageClient, gcsStoragePath,
null), stringency, includeFileSource, offset);
}
public static SamReader openBAM(Storage.Objects storageClient, String gcsStoragePath, ValidationStringency stringency) throws IOException {
return openBAM(storageClient, gcsStoragePath, stringency, false);
}
private static SeekableStream openIndexForPath(Storage.Objects storageClient,String gcsStoragePath) {
final String indexPath = gcsStoragePath + ".bai";
try {
return new SeekableGCSStream(storageClient, indexPath);
} catch (IOException ex) {
LOG.info("No index for " + indexPath);
// Ignore if there is no bai file
}
return null;
}
private static SamInputResource openBAMFile(Storage.Objects storageClient, String gcsStoragePath, SeekableStream index) throws IOException {
SeekableGCSStream s = new SeekableGCSStream(storageClient, gcsStoragePath);
SamInputResource samInputResource =
SamInputResource.of(s);
if (index != null) {
samInputResource.index(index);
}
LOG.info("getReadsFromBAMFile - got input resources");
return samInputResource;
}
public static class SeekingReaderAdapter extends SamReader.PrimitiveSamReaderToSamReaderAdapter {
SeekingBAMFileReader underlyingReader;
public SeekingReaderAdapter(SeekingBAMFileReader reader, SamInputResource resource){
super(reader, resource);
underlyingReader = reader;
}
public SeekingBAMFileReader underlyingSeekingReader() {
return underlyingReader;
}
}
private static SamReader openBAMReader(SamInputResource resource, ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException {
SamReaderFactory samReaderFactory = SamReaderFactory
.makeDefault()
.validationStringency(stringency)
.enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES);
if (includeFileSource) {
samReaderFactory.enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS);
}
if (offset == 0) {
return samReaderFactory.open(resource);
}
LOG.info("Initializing seeking reader with the offset of " + offset);
SeekingBAMFileReader primitiveReader = new SeekingBAMFileReader(resource,
false,
stringency,
DefaultSAMRecordFactory.getInstance(),
offset);
final SeekingReaderAdapter reader =
new SeekingReaderAdapter(primitiveReader, resource);
samReaderFactory.reapplyOptions(reader);
return reader;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy