All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.seqdoop.hadoop_bam.cli.plugins.View Maven / Gradle / Ivy

Go to download

A Java library for the manipulation of files in common bioinformatics formats using the Hadoop MapReduce framework.

There is a newer version: 7.10.0
Show newest version
// Copyright (c) 2011 Aalto University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.

// File created: 2011-06-14 13:38:57

package org.seqdoop.hadoop_bam.cli.plugins;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.StringTokenizer;

import htsjdk.samtools.BamFileIoUtils;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.SAMFileWriterImpl;
import htsjdk.samtools.SAMFormatException;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTextWriter;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.seekablestream.SeekableStream;

import org.apache.hadoop.fs.Path;

import org.seqdoop.hadoop_bam.SAMFormat;
import org.seqdoop.hadoop_bam.cli.CLIPlugin;
import org.seqdoop.hadoop_bam.cli.Utils;
import org.seqdoop.hadoop_bam.custom.jargs.gnu.CmdLineParser;
import org.seqdoop.hadoop_bam.custom.jargs.gnu.CmdLineParser.Option.BooleanOption;
import org.seqdoop.hadoop_bam.custom.jargs.gnu.CmdLineParser.Option.StringOption;
import org.seqdoop.hadoop_bam.util.Pair;
import org.seqdoop.hadoop_bam.util.WrapSeekable;

public final class View extends CLIPlugin {
	private static final List> optionDescs
		= new ArrayList>();

	private static final CmdLineParser.Option
		headerOnlyOpt = new BooleanOption('H', "header-only"),
		formatOpt     = new StringOption ('F', "format=FMT"),
		stringencyOpt = new  StringOption("validation-stringency=S");

	public View() {
		super("view", "SAM and BAM viewing", "1.2", "PATH [regions...]",
			optionDescs,
			"Reads the BAM or SAM file in PATH and, by default, outputs it in "+
			"SAM format. If any number of regions is given, only the alignments "+
			"overlapping with those regions are output. Then an index is also "+
			"required, expected at PATH.bai by default."+
			"\n\n"+
			"Regions can be given as only reference sequence names or indices "+
			"like 'chr1', or with position ranges as well like 'chr1:100-200'. "+
			"These coordinates are 1-based, with 0 representing the start or "+
			"end of the sequence.");
	}
	static {
		optionDescs.add(new Pair(
			headerOnlyOpt, "print header only"));
		optionDescs.add(new Pair(
			formatOpt, "select the output format based on FMT: SAM or BAM"));
		optionDescs.add(new Pair(
			stringencyOpt, Utils.getStringencyOptHelp()));
	}

	// We suppress deprecation warnings for htsjdk.samtools.SAMFileReader since the
	// alternative, SamReader, does not support the isBinary method.
	@SuppressWarnings("deprecation")
	@Override protected int run(CmdLineParser parser) {

		final List args = parser.getRemainingArgs();
		if (args.isEmpty()) {
			System.err.println("view :: PATH not given.");
			return 3;
		}

        Utils.toStringency(parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "view");

		final String       path    = args.get(0);
		final List regions = args.subList(1, args.size());

		final boolean headerOnly = parser.getBoolean(headerOnlyOpt);

		final htsjdk.samtools.SAMFileReader reader;

		try {
			final Path p = new Path(path);

			SeekableStream idx;
			try {
				idx = WrapSeekable.openPath(getConf(), p.suffix(".bai"));
			} catch (Exception e) {
				idx = null;
			}

			final SeekableStream sam = WrapSeekable.openPath(getConf(), p);

			reader = idx == null ? new htsjdk.samtools.SAMFileReader(sam,      false)
			                     : new htsjdk.samtools.SAMFileReader(sam, idx, false);
		} catch (Exception e) {
			System.err.printf("view :: Could not open '%s': %s\n",
			                  path, e.getMessage());
			return 4;
		}

		reader.setValidationStringency(ValidationStringency.SILENT);

		final SAMFileHeader header;

		try {
			header = reader.getFileHeader();
		} catch (SAMFormatException e) {
			System.err.printf("view :: Could not parse '%s': %s\n",
			                  path, e.getMessage());
			return 4;
		}

		final String fmt = (String)parser.getOptionValue(formatOpt);

		final SAMFormat format =
			fmt == null ? SAMFormat.SAM
			            : SAMFormat.valueOf(fmt.toUpperCase(Locale.ENGLISH));

		final SAMFileWriterImpl writer;
		switch (format) {
			case BAM:
				// BAM output inside view no longer supported since Picard made the class private
                System.err.println("BAM output inside view no longer supported");
                return 1;
			case SAM:
				writer = new SAMTextWriter(System.out);
				break;
			default: writer = null; assert false;
		}

        writer.setSortOrder(header.getSortOrder(), true);
        writer.setHeader(header);

		if (regions.isEmpty() || headerOnly) {
			if (!headerOnly)
				if (!writeIterator(writer, reader.iterator(), path))
					return 4;

			writer.close();
			return 0;
		}

		if (!reader.isBinary()) {
			System.err.println("view :: Cannot output regions from SAM file");
			return 4;
		}

		if (!reader.hasIndex()) {
			System.err.println(
				"view :: Cannot output regions from BAM file lacking an index");
			return 4;
		}

		reader.enableIndexCaching(true);

		boolean errors = false;

		for (final String region : regions) {
			final StringTokenizer st = new StringTokenizer(region, ":-");
			final String refStr = st.nextToken();
			final int beg, end;

			if (st.hasMoreTokens()) {
				beg = parseCoordinate(st.nextToken());
				end = st.hasMoreTokens() ? parseCoordinate(st.nextToken()) : -1;

				if (beg < 0 || end < 0) {
					errors = true;
					continue;
				}
				if (end < beg) {
					System.err.printf(
						"view :: Invalid range, cannot end before start: '%d-%d'\n",
						beg, end);
					errors = true;
					continue;
				}
			} else
				beg = end = 0;

			SAMSequenceRecord ref = header.getSequence(refStr);
			if (ref == null) try {
				ref = header.getSequence(Integer.parseInt(refStr));
			} catch (NumberFormatException e) {}

			if (ref == null) {
				System.err.printf(
					"view :: Not a valid sequence name or index: '%s'\n", refStr);
				errors = true;
				continue;
			}

			final SAMRecordIterator it =
				reader.queryOverlapping(ref.getSequenceName(), beg, end);

			if (!writeIterator(writer, it, path))
				return 4;
		}
		writer.close();
		return errors ? 5 : 0;
	}

	private boolean writeIterator(
		SAMFileWriterImpl writer, SAMRecordIterator it, String path)
	{
		try {
			while (it.hasNext())
				writer.addAlignment(it.next());
			return true;
		} catch (SAMFormatException e) {
			writer.close();
			System.err.printf("view :: Could not parse '%s': %s\n",
			                  path, e.getMessage());
			return false;
		}
	}

	private int parseCoordinate(String s) {
		int c;
		try {
			c = Integer.parseInt(s);
		} catch (NumberFormatException e) {
			c = -1;
		}
		if (c < 0)
			System.err.printf("view :: Not a valid coordinate: '%s'\n", s);
		return c;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy