All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.campagnelab.dl.somatic.tools.QuickConcat Maven / Gradle / Ivy

package org.campagnelab.dl.somatic.tools;

import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.logging.ProgressLogger;
import org.apache.commons.compress.utils.IOUtils;
import org.campagnelab.dl.framework.tools.Predict;
import org.campagnelab.dl.framework.tools.arguments.AbstractTool;
import org.campagnelab.dl.somatic.intermediaries.QuickConcatArguments;
import org.campagnelab.goby.baseinfo.SequenceBaseInformationReader;
import org.campagnelab.goby.baseinfo.SequenceBaseInformationWriter;
import org.campagnelab.goby.compression.MessageChunksWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.nio.channels.FileChannel;
import java.util.Properties;

/**
 * A utility to quickly concatenate a list of .sbi files. Concatenation does not decompress each file and simply
 * concatenates the bytes and add up the number of records.
 */
public class QuickConcat extends AbstractTool {
   static private Logger LOG = LoggerFactory.getLogger(QuickConcat.class);

    @Override
    public QuickConcatArguments createArguments() {
        return new QuickConcatArguments();
    }

    @Override
    public void execute() {

        performQuickConcat(arguments.inputFiles.toArray(new String[0]), arguments.outputFile);

    }

    public static void main(String[] args) throws IOException {
        QuickConcat r = new QuickConcat();
        r.parseArguments(args, "QuickConcat", r.createArguments());
        r.execute();

    }


    /**
     * This version does a quick concat. It does NO filtering. It gathers no stats,
     * but, will quickly concat multiple compact-reads files together using NIO.
     * It should be noted that this method is >MUCH< faster.
     * Copy all of the input files except the last MessageChunksWriter.DELIMITER_LENGTH
     * bytes of the first n-1 input files and the entire last input file
     * to the output file.
     *
     * @param inputFilenames
     * @param outputBasename
     * @throws IOException
     */
    private void performQuickConcat(String[] inputFilenames, String outputBasename) {
        System.out.println("quick concatenating files");
        File outputFile = new File(outputBasename);
        if (outputFile.exists()) {
            System.err.println("The output file already exists. Please delete it before running concat.");
            return;
        }
        try {
            outputFile.createNewFile();
        } catch (IOException e) {
            throw new RuntimeException("Unable to create destination file", e);
        }
        //set up logger
        ProgressLogger progressLogger = new ProgressLogger(LOG);
        progressLogger.itemsName = "files";
        progressLogger.expectedUpdates = inputFilenames.length;
        progressLogger.displayFreeMemory = true;
        progressLogger.start();
        FileChannel input = null;
        FileChannel output = null;
        long bufferSize = arguments.copyBufferSize;

        ObjectList properties = new ObjectArrayList<>();
        for (final String inputFilename : inputFilenames) {
            try {
                SequenceBaseInformationReader reader = new SequenceBaseInformationReader(inputFilename);
                properties.add(reader.getProperties());
                reader.close();
            } catch (IOException e) {
                throw new RuntimeException("Unable to open " + inputFilename, e);
            }
        }

        try {
            SequenceBaseInformationWriter.writeProperties(outputBasename, properties);
        } catch (FileNotFoundException e) {
            throw new RuntimeException("Unable to write properties", e);
        }
        try {
            output = new FileOutputStream(outputFile + ".sbi").getChannel();
            int lastFileNumToCopy = inputFilenames.length - 1;
            int curFileNum = 0;
            for (final String inputFilename : inputFilenames) {
                System.out.printf("Reading from %s%n", inputFilename);
                input = new FileInputStream(inputFilename).getChannel();
                long bytesToCopy = input.size();
                if (curFileNum++ < lastFileNumToCopy) {
                    // Compact-reads files end with a delimiter (8 x 0xff)
                    // followed by a 4 byte int 0 (4 x 0x00). Strip
                    // these on all but the last file.
                    bytesToCopy -= (MessageChunksWriter.DELIMITER_LENGTH + 1 + MessageChunksWriter.SIZE_OF_MESSAGE_LENGTH);
                }

                // Copy the file about 10 megabytes at a time. It would probably
                // be marginally faster to jugit st tell NIO to copy the ENTIRE file
                // in one go, but with very large files Java will freeze until the
                // entire chunck is copied so this makes for a more responsive program
                // should you want to ^C in the middle of the copy. Also, with the single
                // transferTo() you might not see any file size changes in the output file
                // until the entire copy is complete.
                long position = 0;
                while (position < bytesToCopy) {
                    long bytesToCopyThisTime = Math.min(bufferSize, bytesToCopy - position);
                    position += input.transferTo(position, bytesToCopyThisTime, output);
                }
                input.close();
                input = null;
                progressLogger.update();
            }
            System.out.printf("Concatenated %d files.%n", lastFileNumToCopy + 1);
            progressLogger.stop();
        } catch (Exception e) {
            throw new RuntimeException("Unable to concatenate", e);
        } finally {
            IOUtils.closeQuietly(input);
            IOUtils.closeQuietly(output);
        }
    }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy