All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.gov.nationalarchives.droid.command.ResultPrinter Maven / Gradle / Ivy

There is a newer version: 6.8.0
Show newest version
/**
 * Copyright (c) 2016, The National Archives 
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following
 * conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 *  * Neither the name of the The National Archives nor the
 *    names of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package uk.gov.nationalarchives.droid.command;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import uk.gov.nationalarchives.droid.command.action.CommandExecutionException;
import uk.gov.nationalarchives.droid.command.archive.ArchiveConfiguration;
import uk.gov.nationalarchives.droid.command.archive.Bzip2ArchiveContentIdentifier;
import uk.gov.nationalarchives.droid.command.archive.GZipArchiveContentIdentifier;
import uk.gov.nationalarchives.droid.command.archive.IsoArchiveContainerIdentifier;
import uk.gov.nationalarchives.droid.command.archive.RarArchiveContainerIdentifier;
import uk.gov.nationalarchives.droid.command.archive.SevenZipArchiveContainerIdentifier;
import uk.gov.nationalarchives.droid.command.archive.ZipArchiveContentIdentifier;
import uk.gov.nationalarchives.droid.command.archive.TarArchiveContentIdentifier;
import uk.gov.nationalarchives.droid.command.archive.ArcArchiveContentIdentifier;
import uk.gov.nationalarchives.droid.command.archive.WarcArchiveContentIdentifier;
import uk.gov.nationalarchives.droid.command.container.Ole2ContainerContentIdentifier;
import uk.gov.nationalarchives.droid.command.container.ZipContainerContentIdentifier;
import uk.gov.nationalarchives.droid.container.ContainerFileIdentificationRequestFactory;
import uk.gov.nationalarchives.droid.container.ContainerSignatureDefinitions;
import uk.gov.nationalarchives.droid.container.TriggerPuid;
import uk.gov.nationalarchives.droid.container.ole2.Ole2IdentifierEngine;
import uk.gov.nationalarchives.droid.container.zip.ZipIdentifierEngine;
import uk.gov.nationalarchives.droid.core.BinarySignatureIdentifier;
import uk.gov.nationalarchives.droid.core.interfaces.IdentificationRequest;
import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResult;
import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultCollection;
import uk.gov.nationalarchives.droid.core.interfaces.archive.IdentificationRequestFactory;

/**
 * File identification results printer.
 *
 * NB: This class is called recursively when archive files are opened 
 * 
 * @author rbrennan
 */
public class ResultPrinter {

    private static final String R_SLASH = "/";
    private static final String DOUBLE_SLASH = "\\";
    private static final String L_BRACKET = "(";
    private static final String R_BRACKET = ")";
    private static final String SPACE = " ";

    private static final String OLE2_CONTAINER = "OLE2";
    private static final String ZIP = "ZIP";
    private static final String ZIP_ARCHIVE = "x-fmt/263";
    private static final String RAR_ARCHIVE = "x-fmt/264";
    private static final String RAR_ARCHIVE_OTHER = "fmt/411";
    private static final String TAR_ARCHIVE = "x-fmt/265";
    private static final String GZIP_ARCHIVE = "x-fmt/266";
    private static final String ARC_ARCHIVE = "x-fmt/219";
    private static final String ARC_ARCHIVE_OTHER = "fmt/410";
    private static final String WARC_ARCHIVE = "fmt/289";
    private static final String WARC_ARCHIVE_1_1 = "fmt/1281";
    private static final String WARC_ARCHIVE_1_0 = "fmt/1355";
    private static final String ISO_9660 = "fmt/468";
    private static final String SEVEN_ZIP = "fmt/484";
    private static final String BZIP2_ARCHIVE = "x-fmt/267";
    private static final String BZIP2_ARCHIVE_OTHER = "x-fmt/268";


    private BinarySignatureIdentifier binarySignatureIdentifier;
    private ContainerSignatureDefinitions containerSignatureDefinitions;
    private List triggerPuids;
    private IdentificationRequestFactory requestFactory;
    private String path;
    private String slash;
    private String slash1;
    private String wrongSlash;
    private ArchiveConfiguration archiveConfiguration;

    /**
     * Store signature files.
     *  @param binarySignatureIdentifier     binary signature identifier
     * @param containerSignatureDefinitions container signatures
     * @param path                          current file/container path
     * @param slash                         local path element delimiter
     * @param slash1                        local first container prefix delimiter
     * @param archiveConfiguration          configuration to expand archives and web archives
     */
    public ResultPrinter(final BinarySignatureIdentifier binarySignatureIdentifier,
                         final ContainerSignatureDefinitions containerSignatureDefinitions,
                         final String path, final String slash, final String slash1, ArchiveConfiguration archiveConfiguration) {

        this.binarySignatureIdentifier = binarySignatureIdentifier;
        this.containerSignatureDefinitions = containerSignatureDefinitions;
        this.path = path;
        this.slash = slash;
        this.slash1 = slash1;
        this.wrongSlash = this.slash.equals(R_SLASH) ? DOUBLE_SLASH : R_SLASH;
        this.archiveConfiguration = archiveConfiguration;
        if (containerSignatureDefinitions != null) {
            triggerPuids = containerSignatureDefinitions.getTiggerPuids();
        }
    }

    /**
     * Output identification for this file.
     * 
     * @param results                       identification Results
     * @param request                       identification Request
     * 
     * @throws CommandExecutionException    if unexpected container type encountered
     */
    //CHECKSTYLE:OFF
    public void print(final IdentificationResultCollection results,
            final IdentificationRequest request) throws CommandExecutionException {
        
        final String fileName = (path + request.getFileName()).replace(wrongSlash, slash);
        final IdentificationResultCollection containerResults =
                getContainerResults(results, request, fileName);

        IdentificationResultCollection finalResults = new IdentificationResultCollection(request);
        boolean container = false;
        if (containerResults.getResults().size() > 0) {
            container = true;
            finalResults = containerResults;
        } else if (results.getResults().size() > 0) {
            finalResults = results;
        }
        if (finalResults.getResults().size() > 0) {
            binarySignatureIdentifier.removeLowerPriorityHits(finalResults);
        }
        if (finalResults.getResults().size() > 0) {
            for (IdentificationResult identResult : finalResults.getResults()) {
                String puid = identResult.getPuid();
                System.out.println(fileName + "," + puid);
                if (!container) {
                    switch (puid){
                        case GZIP_ARCHIVE:
                            if ((archiveConfiguration.getExpandAllArchives() || containsCaseInsensitive("GZIP", archiveConfiguration.getExpandArchiveTypes()))) {
                                GZipArchiveContentIdentifier gzipArchiveIdentifier =
                                        new GZipArchiveContentIdentifier(binarySignatureIdentifier,
                                                containerSignatureDefinitions, path, slash, slash1, archiveConfiguration);
                                gzipArchiveIdentifier.identify(results.getUri(), request);
                            }
                            break;
                        case TAR_ARCHIVE:
                            if ((archiveConfiguration.getExpandAllArchives() || containsCaseInsensitive("TAR", archiveConfiguration.getExpandArchiveTypes()))) {
                                TarArchiveContentIdentifier tarArchiveIdentifier =
                                        new TarArchiveContentIdentifier(binarySignatureIdentifier,
                                                containerSignatureDefinitions, path, slash, slash1, archiveConfiguration);
                                tarArchiveIdentifier.identify(results.getUri(), request);
                            }
                            break;
                        case ZIP_ARCHIVE:
                            if ((archiveConfiguration.getExpandAllArchives() || containsCaseInsensitive(ZIP, archiveConfiguration.getExpandArchiveTypes()))) {
                                ZipArchiveContentIdentifier zipArchiveIdentifier =
                                        new ZipArchiveContentIdentifier(binarySignatureIdentifier,
                                                containerSignatureDefinitions, path, slash, slash1, archiveConfiguration);
                                zipArchiveIdentifier.identify(results.getUri(), request);
                            }
                            break;
                        case ISO_9660:
                            if ((archiveConfiguration.getExpandAllArchives() || containsCaseInsensitive("ISO", archiveConfiguration.getExpandArchiveTypes()))) {
                                IsoArchiveContainerIdentifier isoArchiveContainerIdentifier =
                                        new IsoArchiveContainerIdentifier(binarySignatureIdentifier,
                                                containerSignatureDefinitions, path, slash, slash1, archiveConfiguration);
                                isoArchiveContainerIdentifier.identify(results.getUri(), request);
                            }
                            break;
                        case SEVEN_ZIP:
                            if ((archiveConfiguration.getExpandAllArchives() || containsCaseInsensitive("7ZIP", archiveConfiguration.getExpandArchiveTypes()))) {
                                SevenZipArchiveContainerIdentifier sevenZipArchiveContainerIdentifier =
                                        new SevenZipArchiveContainerIdentifier(binarySignatureIdentifier,
                                                containerSignatureDefinitions, path, slash, slash1, archiveConfiguration);
                                sevenZipArchiveContainerIdentifier.identify(results.getUri(), request);
                            }
                            break;
                        case BZIP2_ARCHIVE:
                        case BZIP2_ARCHIVE_OTHER:
                            if ((archiveConfiguration.getExpandAllArchives() || containsCaseInsensitive("BZIP2", archiveConfiguration.getExpandArchiveTypes()))) {
                                Bzip2ArchiveContentIdentifier bzip2ArchiveContentIdentifier =
                                        new Bzip2ArchiveContentIdentifier(binarySignatureIdentifier,
                                                containerSignatureDefinitions, path, slash, slash1, archiveConfiguration);
                                bzip2ArchiveContentIdentifier.identify(results.getUri(), request);
                            }
                            break;
                        case RAR_ARCHIVE:
                        case RAR_ARCHIVE_OTHER:
                            if ((archiveConfiguration.getExpandAllArchives() || containsCaseInsensitive("RAR", archiveConfiguration.getExpandArchiveTypes()))) {
                                RarArchiveContainerIdentifier rarArchiveContentIdentifier =
                                        new RarArchiveContainerIdentifier(binarySignatureIdentifier,
                                                containerSignatureDefinitions, path, slash, slash1, archiveConfiguration);
                                rarArchiveContentIdentifier.identify(results.getUri(), request);
                            }
                            break;

                        case ARC_ARCHIVE:
                        case ARC_ARCHIVE_OTHER:
                            if ((archiveConfiguration.getExpandAllWebArchives() || containsCaseInsensitive("ARC", archiveConfiguration.getExpandWebArchiveTypes()))) {
                                ArcArchiveContentIdentifier arcArchiveIdentifier =
                                        new ArcArchiveContentIdentifier(binarySignatureIdentifier,
                                                containerSignatureDefinitions, path, slash, slash1, archiveConfiguration);
                                arcArchiveIdentifier.identify(results.getUri(), request);
                            }
                            break;
                        case WARC_ARCHIVE:
                        case WARC_ARCHIVE_1_0:
                        case WARC_ARCHIVE_1_1:
                            if ((archiveConfiguration.getExpandAllWebArchives() || containsCaseInsensitive("WARC", archiveConfiguration.getExpandWebArchiveTypes()))) {
                                WarcArchiveContentIdentifier warcArchiveIdentifier =
                                        new WarcArchiveContentIdentifier(binarySignatureIdentifier,
                                                containerSignatureDefinitions, path, slash, slash1, archiveConfiguration);
                                warcArchiveIdentifier.identify(results.getUri(), request);
                            }
                            break;
                    }

                }
            }
        } else {
            System.out.println(fileName + ",Unknown");
        }
    }

    public boolean containsCaseInsensitive(String needle, String[] haystack) {
        if(haystack==null){
            return false;
        }
        return Arrays.stream(haystack).anyMatch(x -> x.equalsIgnoreCase(needle));
    }

    //CHECKSTYLE:ON
    private IdentificationResultCollection getContainerResults(
        final IdentificationResultCollection results,
        final IdentificationRequest request, final String fileName) throws CommandExecutionException {
    
        IdentificationResultCollection containerResults =
                new IdentificationResultCollection(request);
                
        if (results.getResults().size() > 0 && containerSignatureDefinitions != null) {
            for (IdentificationResult identResult : results.getResults()) {
                String filePuid = identResult.getPuid();
                if (filePuid != null) {
                    TriggerPuid containerPuid = getTriggerPuidByPuid(filePuid);
                    if (containerPuid != null) {
                            
                        requestFactory = new ContainerFileIdentificationRequestFactory();
                        String containerType = containerPuid.getContainerType();

                        if (OLE2_CONTAINER.equals(containerType)) {
                            try {
                                Ole2ContainerContentIdentifier ole2Identifier =
                                        new Ole2ContainerContentIdentifier();
                                ole2Identifier.init(containerSignatureDefinitions, containerType);
                                Ole2IdentifierEngine ole2IdentifierEngine = new Ole2IdentifierEngine();
                                ole2IdentifierEngine.setRequestFactory(requestFactory);
                                ole2Identifier.setIdentifierEngine(ole2IdentifierEngine);
                                containerResults = ole2Identifier.process(
                                    request.getSourceInputStream(), containerResults);
                            } catch (IOException e) {   // carry on after container i/o problems
                                System.err.println(e + SPACE + L_BRACKET + fileName + R_BRACKET);
                            }
                        } else if (ZIP.equals(containerType)) {
                            try {
                                ZipContainerContentIdentifier zipIdentifier =
                                            new ZipContainerContentIdentifier();
                                zipIdentifier.init(containerSignatureDefinitions, containerType);
                                ZipIdentifierEngine zipIdentifierEngine = new ZipIdentifierEngine();
                                zipIdentifierEngine.setRequestFactory(requestFactory);
                                zipIdentifier.setIdentifierEngine(zipIdentifierEngine);
                                containerResults = zipIdentifier.process(
                                    request.getSourceInputStream(), containerResults);
                            } catch (IOException e) {   // carry on after container i/o problems
                                System.err.println(e + SPACE + L_BRACKET + fileName + R_BRACKET);
                            }
                        } else {
                            throw new CommandExecutionException("Unknown container type: " + containerPuid);
                        }
                    }
                }
            }
        }
        return containerResults;
    }
    
    private TriggerPuid getTriggerPuidByPuid(final String puid) {
        for (final TriggerPuid tp : triggerPuids) {
            if (tp.getPuid().equals(puid)) {
                return tp;
            }
        }
        return null;
    }

    /**
     *
     * @return configuration to expand web archives and archives
     */
    public ArchiveConfiguration getArchiveConfiguration() {
        return archiveConfiguration;
    }

    /**
     *
     * @param archiveConfiguration configuration to expand web archives and archives
     */
    public void setArchiveConfiguration(ArchiveConfiguration archiveConfiguration) {
        this.archiveConfiguration = archiveConfiguration;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy