All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.unkrig.commons.file.fileprocessing.FileProcessings Maven / Gradle / Ivy

Go to download

A versatile Java(TM) library that implements many useful container and utility classes.

There is a newer version: 1.1.12
Show newest version

/*
 * de.unkrig.commons - A general-purpose Java class library
 *
 * Copyright (c) 2011, Arno Unkrig
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
 * following conditions are met:
 *
 *    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
 *       following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 *       following disclaimer in the documentation and/or other materials provided with the distribution.
 *    3. The name of the author may not be used to endorse or promote products derived from this software without
 *       specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

package de.unkrig.commons.file.fileprocessing;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.compressors.CompressorInputStream;

import de.unkrig.commons.file.CompressUtil;
import de.unkrig.commons.file.CompressUtil.ArchiveHandler;
import de.unkrig.commons.file.CompressUtil.CompressorHandler;
import de.unkrig.commons.file.CompressUtil.NormalContentsHandler;
import de.unkrig.commons.file.ExceptionHandler;
import de.unkrig.commons.file.contentsprocessing.ContentsProcessings;
import de.unkrig.commons.file.contentsprocessing.ContentsProcessings.ArchiveCombiner;
import de.unkrig.commons.file.contentsprocessing.ContentsProcessor;
import de.unkrig.commons.file.org.apache.commons.compress.archivers.ArchiveFormat;
import de.unkrig.commons.file.org.apache.commons.compress.compressors.CompressionFormat;
import de.unkrig.commons.file.org.apache.commons.compress.compressors.CompressionFormatFactory;
import de.unkrig.commons.lang.AssertionUtil;
import de.unkrig.commons.lang.ExceptionUtil;
import de.unkrig.commons.lang.protocol.HardReference;
import de.unkrig.commons.lang.protocol.Predicate;
import de.unkrig.commons.lang.protocol.ProducerWhichThrows;
import de.unkrig.commons.nullanalysis.Nullable;
import de.unkrig.commons.util.concurrent.ConcurrentUtil;
import de.unkrig.commons.util.concurrent.SquadExecutor;

/**
 * Various file processing utility methods.
 */
public final
class FileProcessings {

    static { AssertionUtil.enableAssertionsForThisClass(); }

    private static final Logger LOGGER = Logger.getLogger(FileProcessings.class.getName());

    private
    FileProcessings() {}

    /**
     * @return A {@link FileProcessor} that does nothing and returns {@code null}.
     */
    @SuppressWarnings("unchecked") public static  FileProcessor
    nop() { return (FileProcessor) FileProcessings.NOP_FILE_PROCESSOR; }

    private static final FileProcessor NOP_FILE_PROCESSOR = new FileProcessor() {
        @Override @Nullable public Object process(String path, File in) { return null; }
        @Override public String           toString()                    { return "NOP"; }
    };

    /**
     * @see #process(List, FileProcessor, ExceptionHandler)
     */
    public static  void
    process(List files, FileProcessor fileProcessor) throws IOException, InterruptedException {
        FileProcessings.process(files, fileProcessor, ExceptionHandler.defaultHandler());
    }

    /**
     * Invokes the {@code fileProcessor} for each of the {@code files}.
     *
     * @param exceptionHandler Called if the processing of one of the {@code inputFiles} throws an {@link IOException}
     *                         or a {@link RuntimeException}
     */
    public static  void
    process(List files, FileProcessor fileProcessor, ExceptionHandler exceptionHandler)
    throws IOException, InterruptedException {
        for (File file : files) {

            String path = file.getPath();

            try {
                fileProcessor.process(path, file);
            } catch (IOException ioe) {
                exceptionHandler.handle(path, ioe);
            } catch (RuntimeException re) {
                exceptionHandler.handle(path, re);
            }
        }
    }

    /**
     * Returns a {@link FileProcessor} which processes directories and regular files exactly like the {@link
     * FileProcessor} returned by {@link #directoryProcessor(Predicate, FileProcessor, Comparator, FileProcessor,
     * DirectoryCombiner, SquadExecutor, ExceptionHandler)}, except that it processes directory members
     * recursively.
     * 

* Notice that the list passed to the directoryCombiner can contain {@code null} values iff the * regularFileProcessor returns {@code null} values. *

* * @param directoryMemberNameComparator The comparator used to sort a directory's members; a {@code null} value * means to NOT sort the members, i.e. leave them in their 'natural' order as * {@link File#list()} returns them * @see #directoryProcessor(Predicate, FileProcessor, Comparator, FileProcessor, * DirectoryCombiner, SquadExecutor, ExceptionHandler) */ public static FileProcessor directoryTreeProcessor( Predicate pathPredicate, FileProcessor regularFileProcessor, @Nullable Comparator directoryMemberNameComparator, DirectoryCombiner directoryCombiner, SquadExecutor squadExecutor, ExceptionHandler exceptionHandler ) { final HardReference> loopback = new HardReference>(); FileProcessor directoryMemberProcessor = new FileProcessor() { @Override @Nullable public T process(String path, File file) throws IOException, InterruptedException { FileProcessor fp = loopback.get(); assert fp != null; return fp.process(path, file); } }; FileProcessor result = FileProcessings.directoryProcessor( pathPredicate, regularFileProcessor, directoryMemberNameComparator, directoryMemberProcessor, directoryCombiner, squadExecutor, exceptionHandler ); loopback.set(result); return result; } /** * Returns a {@link FileProcessor} which processes directories and regular files. *

* Its behavior is as follows: *

    *
  • * Iff the subject file is not a directory, the {@code regularFileProcessor} is invoked and its result is * returned. *
  • *
  • * Otherwise the subject file is a directory. *
      *
    • * Iff, according to the {@code pathPredicate}, the directory can impossibly contain relevant documents, * {@code null} is returned. *
    • *
    • Otherwise, the directory members are listed.
    • *
    • * Iff the {@code directoryMemberNameComparator} is not {@code null}, the members are sorted according to * the {@code directoryMemberNameComparator} *
    • *
    • * The {@code directoryMemberProcessor} is invoked for each member, and the return values are stored in a * list. *
    • *
    • * The {@code directoryCombiner} is invoked with that list. (Notice that the list may contain {@code null} * values iff the directoryMemberProcessor returns {@code null} values.) *
    • *
    • * The value produced by the {@code directoryCombiner} is returned. *
    • *
    *
  • *
* Notice that the {@code pathPredicate} is only used to avoid unnecessary directory scans; apart from that the * {@code regularFileProcessor} and the {@code directoryMemberProcessor} are called for any file without * further ado. *

* If you use {@link Void} for {@code }, then {@link #nopDirectoryCombiner()} is the right {@code * directoryCombiner}. * * @param The return type of all {@link FileProcessor#process(String, File)} methods * @param squadExecutor Is used to process independent subtrees - could be {@link * ConcurrentUtil#SEQUENTIAL_EXECUTOR_SERVICE} * @param directoryMemberNameComparator The comparator used to sort a directory's members; a {@code null} value * means to NOT sort the members, i.e. leave them in their 'natural' order as * {@link File#list()} returns them */ public static FileProcessor directoryProcessor( final Predicate pathPredicate, final FileProcessor regularFileProcessor, @Nullable final Comparator directoryMemberNameComparator, final FileProcessor directoryMemberProcessor, final DirectoryCombiner directoryCombiner, final SquadExecutor squadExecutor, final ExceptionHandler exceptionHandler ) { final FileProcessor dp = new FileProcessor() { /** * @throws IOException {@code directory} is not a listable directory * @see #DirectoryProcessor(Comparator, FileProcessor, DirectoryCombiner, ExceptionHandler) */ @Override @Nullable public T process(final String directoryPath, final File directory) throws IOException, InterruptedException { FileProcessings.LOGGER.log( Level.FINER, "Processing directory \"{0}\" (path is \"{1}\")", new Object[] { directory, directoryPath } ); String[] memberNames = directory.list(); if (memberNames == null) throw new IOException("'" + directory + "' is not a listable directory"); if (directoryMemberNameComparator != null) Arrays.sort(memberNames, directoryMemberNameComparator); // Submit callables that do the actual work for each member. List> futures = new ArrayList>(memberNames.length); for (final String memberName : memberNames) { futures.add(squadExecutor.submit(new Callable() { @Override @Nullable public T call() throws IOException, InterruptedException { String memberPath = directoryPath + File.separatorChar + memberName; try { return directoryMemberProcessor.process(memberPath, new File(directory, memberName)); } catch (IOException ioe) { exceptionHandler.handle(memberPath, ioe); } catch (RuntimeException re) { exceptionHandler.handle(memberPath, re); } return null; } })); } // Now wait until the callables complete and pick their results. final List combinables = new ArrayList(memberNames.length); for (Future future : futures) { try { combinables.add(future.get()); } catch (ExecutionException ee) { Throwable cause = ee.getCause(); if (cause instanceof IOException) { throw (IOException) cause; // SUPPRESS CHECKSTYLE AvoidHidingCause } throw new IllegalStateException(ee); } } // Now call the "directory combiner" with the directory members' results. return directoryCombiner.combine(directoryPath, directory, combinables); } }; return new FileProcessor() { @Override @Nullable public T process(String path, File file) throws IOException, InterruptedException { if (file.isDirectory()) { return pathPredicate.evaluate(path + '/') ? dp.process(path, file) : null; } else { return pathPredicate.evaluate(path) ? regularFileProcessor.process(path, file) : null; } } }; } /** * Transforms the return values of the invocations of {@code directoryMemberProcessor} for each member. * * @param The return type of {@link FileProcessor#process(String, File)} */ public interface DirectoryCombiner { /** @see DirectoryCombiner */ @Nullable T combine(String directoryPath, File directory, List combinables); } /** * A {@link DirectoryCombiner} which ignores the combinables and returns {@code null}. */ @SuppressWarnings("unchecked") public static DirectoryCombiner nopDirectoryCombiner() { return (DirectoryCombiner) FileProcessings.NOP_DIRECTORY_COMBINER; } private static final DirectoryCombiner NOP_DIRECTORY_COMBINER = new DirectoryCombiner() { @Override @Nullable public Object combine(String directoryPath, File directory, List combinables) { return null; } }; /** * Returns a {@link FileProcessor} which processes files by feeding their contents either to the {@code * archiveContentsProcessor}, the {@code compressedContentsProcessor} or the normalContentsProcessor}. *

* Archive files and compressed files are introspected iff {@code lookIntoFormat} evaluates {@code true} for * "format:path". * * @param lookIntoFormat See {@link CompressUtil#processFile(String, File, Predicate, ArchiveHandler, * CompressorHandler, NormalContentsHandler)} */ public static FileProcessor archiveCompressedAndNormalFileProcessor( final Predicate lookIntoFormat, final ContentsProcessor archiveContentsProcessor, final ArchiveCombiner archiveEntryCombiner, final ContentsProcessor compressedContentsProcessor, final ContentsProcessor normalContentsProcessor, final ExceptionHandler exceptionHandler ) { return new FileProcessor() { @Override @Nullable public T process(final String path, final File file) throws FileNotFoundException, IOException { return CompressUtil.processFile( path, file, // file lookIntoFormat, // lookIntoFormat FileProcessings.archiveHandler( // archiveHandler path, archiveContentsProcessor, archiveEntryCombiner, file, exceptionHandler ), FileProcessings.compressorHandler( // compressorHandler path, compressedContentsProcessor, file ), FileProcessings.normalContentsHandler( // normalContentsHandler path, normalContentsProcessor, file ) ); } @Override public String toString() { return "compressedAndArchiveFileProcessor"; } }; } /** * Returns a {@link FileProcessor} which processes files by feeding their contents through the {@code delegate} * (just like the {@link FileContentsProcessor}), but automagically detects various archive and compression formats * (also nested) and processes the entries of the archive and the uncompressed contents instead of * the "raw" contents. *

* Archive files/entries and compressed files/entries are introspected iff {@code lookIntoFormat} evaluates {@code * true} for "format:path". */ public static FileProcessor recursiveCompressedAndArchiveFileProcessor( final Predicate lookIntoFormat, ArchiveCombiner archiveEntryCombiner, final ContentsProcessor delegate, final ExceptionHandler exceptionHandler ) { ContentsProcessor tmp = ContentsProcessings.recursiveCompressedAndArchiveContentsProcessor( lookIntoFormat, archiveEntryCombiner, delegate, exceptionHandler ); return FileProcessings.archiveCompressedAndNormalFileProcessor( lookIntoFormat, // lookIntoFormat tmp, // archiveContentsProcessor archiveEntryCombiner, // archiveEntryCombiner tmp, // compressedContentsProcessor delegate, // normalContentsProcessor exceptionHandler // exceptionHandler ); } private static ArchiveHandler archiveHandler( final String path, final ContentsProcessor contentsProcessor, final ArchiveCombiner archiveEntryCombiner, final File archiveFile, final ExceptionHandler exceptionHandler ) { return new ArchiveHandler() { @Nullable @Override public T handleArchive( final ArchiveInputStream archiveInputStream, final ArchiveFormat archiveFormat ) throws IOException { return ContentsProcessings.processArchive( path, // archivePath archiveInputStream, // archiveInputStream contentsProcessor, // contentsProcessor archiveEntryCombiner, // archiveEntryCombiner new ProducerWhichThrows() { // archiveOpener @Override @Nullable public ArchiveInputStream produce() throws IOException { try { return archiveFormat.open(archiveFile); } catch (ArchiveException ae) { throw ExceptionUtil.wrap(null, ae, IOException.class); } } }, exceptionHandler // exceptionHandler ); } }; } private static CompressorHandler compressorHandler( final String path, final ContentsProcessor contentsProcessor, final File compressedFile ) { return new CompressorHandler() { @Nullable @Override public T handleCompressor(CompressorInputStream compressorInputStream, final CompressionFormat compressionFormat) throws IOException { @SuppressWarnings("deprecation") long uncompressedSize = CompressionFormatFactory.getUncompressedSize(compressorInputStream); return contentsProcessor.process( path + '!', // path compressorInputStream, // compressorInputStream uncompressedSize, // size -1L, // crc32 new ProducerWhichThrows() { // opener @Override @Nullable public InputStream produce() throws IOException { return compressionFormat.open(compressedFile); } } ); } }; } private static NormalContentsHandler normalContentsHandler(final String path, final ContentsProcessor contentsProcessor, final File normalFile) { return new NormalContentsHandler() { @Override @Nullable public T handleNormalContents(InputStream inputStream) throws IOException { return contentsProcessor.process( path, // path inputStream, // inputStream -1L, // size -1L, // crc32 new ProducerWhichThrows() { // opener @Override @Nullable public InputStream produce() throws IOException { return new FileInputStream(normalFile); } } ); } }; } }