All Downloads are FREE. Search and download functionalities are using the official Maven repository.

picard.illumina.CheckIlluminaDirectory Maven / Gradle / Ivy

There is a newer version: 3.2.0
Show newest version
package picard.illumina;

import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProcessExecutor;
import htsjdk.samtools.util.StringUtil;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.Illumina;
import picard.cmdline.StandardOptionDefinitions;
import picard.illumina.parser.IlluminaDataProviderFactory;
import picard.illumina.parser.IlluminaDataType;
import picard.illumina.parser.IlluminaFileUtil;
import picard.illumina.parser.OutputMapping;
import picard.illumina.parser.ParameterizedFileUtil;
import picard.illumina.parser.ReadStructure;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

/**
 * Program to check a lane of an Illumina output directory.  This program checks that files exist, are non-zero in length, for every tile/cycle and
 * specified data type.  If NO data type is specified then the default data types used by IlluminaBasecallsToSam are used.
 */
@CommandLineProgramProperties(
        usage = CheckIlluminaDirectory.USAGE_SUMMARY + CheckIlluminaDirectory.USAGE_DETAILS,
        usageShort = CheckIlluminaDirectory.USAGE_SUMMARY,
        programGroup = Illumina.class
)
public class CheckIlluminaDirectory extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Asserts the validity for specified Illumina basecalling data.  ";
    static final String USAGE_DETAILS = "

This tool will check that the basecall directory and the internal files are available, exist, " + "and are reasonably sized for every tile and cycle. Reasonably sized means non-zero sized for files that exist per tile and " + "equal size for binary files that exist per cycle or per tile. If DATA_TYPES {Position, BaseCalls, QualityScores, PF," + " or Barcodes} are not specified, then the default data types used by IlluminaBasecallsToSam are used. " + "CheckIlluminaDirectory DOES NOT check that the individual records in a file are well-formed.

" + "" + "

Usage example:

" + "
" +
            "java -jar picard.jar CheckIlluminaDirectory \\
" + " BASECALLS_DIR=/BaseCalls/ \\
" + " READ_STRUCTURE=25T8B25T \\
" + " LANES=1 \\
" + " DATA_TYPES=BaseCalls " + "
" + "
" ; private static final Log log = Log.getInstance(CheckIlluminaDirectory.class); // The following attributes define the command-line arguments @Option(doc = "The basecalls output directory. ", shortName = "B") public File BASECALLS_DIR; @Option(doc = "The data types that should be checked for each tile/cycle. If no values are provided then the data types checked are " + "those required by IlluminaBaseCallsToSam (which is a superset of those used in ExtractIlluminaBarcodes). These data types vary " + "slightly depending on whether or not the run is barcoded so READ_STRUCTURE should be the same as that which will be passed to " + "IlluminaBasecallsToSam. " + "If this option is left unspecified then both ExtractIlluminaBarcodes and IlluminaBaseCallsToSam should complete successfully " + "UNLESS the " + "individual records of the files themselves are spurious.", shortName = "DT", optional = true) public final Set DATA_TYPES = new TreeSet(); @Option(doc = ReadStructure.PARAMETER_DOC + " Note: If you want to check whether or not a future IlluminaBasecallsToSam or " + "ExtractIlluminaBarcodes run will fail then be sure to use the exact same READ_STRUCTURE that you would pass to these programs " + "for this run.", shortName = "RS") public String READ_STRUCTURE; @Option(doc = "The number of the lane(s) to check. ", shortName = StandardOptionDefinitions.LANE_SHORT_NAME, minElements = 1) public List LANES; @Option(doc = "The number(s) of the tile(s) to check. ", shortName = "T", optional = true) public List TILE_NUMBERS; @Option(doc = "A flag to determine whether or not to create fake versions of the missing files.", shortName = "F", optional = true) public Boolean FAKE_FILES = false; @Option(doc = "A flag to create symlinks to the loc file for the X Ten for each tile.", shortName = "X", optional = true) public Boolean LINK_LOCS = false; /** * Required main method implementation. */ public static void main(final String[] argv) { new CheckIlluminaDirectory().instanceMainWithExit(argv); } @Override protected int doWork() { final ReadStructure readStructure = new ReadStructure(READ_STRUCTURE); if (DATA_TYPES.isEmpty()) { DATA_TYPES.addAll(Arrays.asList(IlluminaBasecallsConverter.DATA_TYPES_NO_BARCODE)); } final List failingLanes = new ArrayList(); int totalFailures = 0; final int[] expectedCycles = new OutputMapping(readStructure).getOutputCycles(); log.info("Checking lanes(" + StringUtil.join(",", LANES) + " in basecalls directory (" + BASECALLS_DIR .getAbsolutePath() + ")\n"); log.info("Expected cycles: " + StringUtil.intValuesToString(expectedCycles)); for (final Integer lane : LANES) { IlluminaFileUtil fileUtil = new IlluminaFileUtil(BASECALLS_DIR, lane); final List expectedTiles = fileUtil.getExpectedTiles(); if (!TILE_NUMBERS.isEmpty()) { expectedTiles.retainAll(TILE_NUMBERS); } if (LINK_LOCS) { createLocFileSymlinks(fileUtil, lane); //we need to create a new file util because it stores a cache to the files it found on //construction and this doesn't inclue the recently created symlinks fileUtil = new IlluminaFileUtil(BASECALLS_DIR, lane); } log.info("Checking lane " + lane); log.info("Expected tiles: " + StringUtil.join(", ", expectedTiles)); final int numFailures = verifyLane(fileUtil, expectedTiles, expectedCycles, DATA_TYPES, FAKE_FILES); if (numFailures > 0) { log.info("Lane " + lane + " FAILED " + " Total Errors: " + numFailures); failingLanes.add(lane); totalFailures += numFailures; } else { log.info("Lane " + lane + " SUCCEEDED "); } } int status = 0; if (totalFailures == 0) { log.info("SUCCEEDED! All required files are present and non-empty."); } else { status = totalFailures; log.info("FAILED! There were " + totalFailures + " in the following lanes: " + StringUtil .join(", ", failingLanes)); } return status; } private void createLocFileSymlinks(final IlluminaFileUtil fileUtil, final int lane) { final File baseFile = new File(BASECALLS_DIR.getParentFile().getAbsolutePath() + File.separator + "s.locs"); final File newFileBase = new File(baseFile.getParent() + File.separator + IlluminaFileUtil .longLaneStr(lane) + File.separator); if (baseFile.exists()) { boolean success = true; if (!newFileBase.exists()) { success = newFileBase.mkdirs(); } if (success) { for (final Integer tile : fileUtil.getExpectedTiles()) { final String newName = newFileBase + File.separator + String.format("s_%d_%d.locs", lane, tile); final ProcessExecutor.ExitStatusAndOutput output = ProcessExecutor.executeAndReturnInterleavedOutput(new String[]{"ln", "-fs", baseFile.getAbsolutePath(), newName}); if (output.exitStatus != 0) { throw new PicardException("Could not create symlink: " + output.stdout); } } } else { throw new PicardException(String.format("Could not create lane directory: %s.", newFileBase.getAbsolutePath())); } } else { throw new PicardException(String.format("Locations file %s does not exist.", baseFile.getAbsolutePath())); } } /** * Use fileUtil to find the data types that would be used by IlluminaDataProvider. Verify that for the expected * tiles/cycles/data types that all the files needed to provide their data is present. This method logs every * error that is found (excluding file faking errors) and returns the number of errors found * * @param fileUtil A file util paramterized with the directory/lane to check * @param expectedTiles The tiles we expect to be available/well-formed * @param cycles The cycles we expect to be available/well-formed * @param dataTypes The data types we expect to be available/well-formed * @return The number of errors found/logged for this directory/lane */ private static final int verifyLane(final IlluminaFileUtil fileUtil, final List expectedTiles, final int[] cycles, final Set dataTypes, final boolean fakeFiles) { if (expectedTiles.isEmpty()) { throw new PicardException( "0 input tiles were specified! Check to make sure this lane is in the InterOp file!"); } if (cycles.length == 0) { throw new PicardException("0 output cycles were specified!"); } int numFailures = 0; //find what request IlluminaDataTypes we have files for and select the most preferred file format available for that type final Map> formatToDataTypes = IlluminaDataProviderFactory.determineFormats(dataTypes, fileUtil); //find if we have any IlluminaDataType with NO available file formats and, if any exist, increase the error count final Set unmatchedDataTypes = IlluminaDataProviderFactory.findUnmatchedTypes(dataTypes, formatToDataTypes); if (!unmatchedDataTypes.isEmpty()) { if (fakeFiles) { for (final IlluminaDataType dataType : unmatchedDataTypes) { final IlluminaFileUtil.SupportedIlluminaFormat format = IlluminaDataProviderFactory.findPreferredFormat(dataType, fileUtil); fileUtil.getUtil(format).fakeFiles(expectedTiles, cycles, format); } } log.info("Could not find a format with available files for the following data types: " + StringUtil .join(", ", new ArrayList(unmatchedDataTypes))); numFailures += unmatchedDataTypes.size(); } for (final IlluminaFileUtil.SupportedIlluminaFormat format : formatToDataTypes.keySet()) { final ParameterizedFileUtil util = fileUtil.getUtil(format); final List failures = util.verify(expectedTiles, cycles); //if we have failures and we want to fake files then fake them now. if (!failures.isEmpty() && fakeFiles) { //fake files util.fakeFiles(expectedTiles, cycles, format); } numFailures += failures.size(); for (final String failure : failures) { log.info(failure); } } return numFailures; } @Override protected String[] customCommandLineValidation() { IOUtil.assertDirectoryIsReadable(BASECALLS_DIR); final List errors = new ArrayList(); for (final Integer lane : LANES) { if (lane < 1) { errors.add( "LANES must be greater than or equal to 1. LANES passed in " + StringUtil.join(", ", LANES)); break; } } if (errors.isEmpty()) { return null; } else { return errors.toArray(new String[errors.size()]); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy