All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.loc.repository.bagit.verify.PayloadVerifier Maven / Gradle / Ivy

Go to download

The BAGIT LIBRARY is a software library intended to support the creation, manipulation, and validation of bags. Its current version is 0.97. It is version aware with the earliest supported version being 0.93.

There is a newer version: 5.2.0
Show newest version
package gov.loc.repository.bagit.verify;

import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashSet;
import java.util.ResourceBundle;
import java.util.Set;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.helpers.MessageFormatter;

import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.domain.Manifest;
import gov.loc.repository.bagit.domain.Version;
import gov.loc.repository.bagit.exceptions.FileNotInPayloadDirectoryException;
import gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException;
import gov.loc.repository.bagit.exceptions.MaliciousPathException;
import gov.loc.repository.bagit.exceptions.UnsupportedAlgorithmException;
import gov.loc.repository.bagit.hash.BagitAlgorithmNameToSupportedAlgorithmMapping;
import gov.loc.repository.bagit.hash.StandardBagitAlgorithmNameToSupportedAlgorithmMapping;
import gov.loc.repository.bagit.reader.ManifestReader;
import gov.loc.repository.bagit.util.PathUtils;

/**
 * Responsible for all things related to the manifest during verification.
 */
public class PayloadVerifier implements AutoCloseable{
  private static final Logger logger = LoggerFactory.getLogger(PayloadVerifier.class);
  private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");

  private final BagitAlgorithmNameToSupportedAlgorithmMapping nameMapping;
  private final ExecutorService executor;
  
  /**
   * Create a PayloadVerifier using a cached thread pool and the 
   * {@link StandardBagitAlgorithmNameToSupportedAlgorithmMapping} mapping
   */
  public PayloadVerifier(){
    this(new StandardBagitAlgorithmNameToSupportedAlgorithmMapping(), Executors.newCachedThreadPool());
  }

  /**
   * Create a PayloadVerifier using a cached thread pool and a custom mapping
   * 
   * @param nameMapping the mapping between BagIt algorithm name and the java supported algorithm
   */
  public PayloadVerifier(final BagitAlgorithmNameToSupportedAlgorithmMapping nameMapping) {
    this(nameMapping, Executors.newCachedThreadPool());
  }
  
  /**
   * Create a PayloadVerifier using a custom thread pool and the 
   * {@link StandardBagitAlgorithmNameToSupportedAlgorithmMapping} mapping
   * 
   * @param executor the thread pool to use when doing work
   */
  public PayloadVerifier(final ExecutorService executor) {
    this(new StandardBagitAlgorithmNameToSupportedAlgorithmMapping(), executor);
  }
  
  /**
   * Create a PayloadVerifier using a custom thread pool and a custom mapping
   * 
   * @param nameMapping the mapping between BagIt algorithm name and the java supported algorithm
   * @param executor the thread pool to use when doing work
   */
  public PayloadVerifier(final BagitAlgorithmNameToSupportedAlgorithmMapping nameMapping, final ExecutorService executor) {
    this.nameMapping = nameMapping;
    this.executor = executor;
  }
  
  @Override
  public void close() throws SecurityException{
    //shutdown the thread pool so the resource isn't leaked
    executor.shutdown();
  }

  /**
   * Verify that all the files in the payload directory are listed in the manifest and 
   * all files listed in the manifests exist.
   * 
   * @param bag the bag to check to check
   * @param ignoreHiddenFiles to ignore hidden files unless they are specifically listed in a manifest
   * @throws IOException if there is a problem reading a file
   * @throws MaliciousPathException the path in the manifest was specifically crafted to cause harm
   * @throws UnsupportedAlgorithmException if the algorithm used for the manifest is unsupported
   * @throws InvalidBagitFileFormatException if any of the manifests don't conform to the bagit specification
   * @throws FileNotInPayloadDirectoryException if a file is listed in a manifest but doesn't exist in the payload directory
   * @throws InterruptedException if a thread is interrupted while doing work
   */
  public void verifyPayload(final Bag bag, final boolean ignoreHiddenFiles)
      throws IOException, MaliciousPathException, UnsupportedAlgorithmException, 
      InvalidBagitFileFormatException, FileNotInPayloadDirectoryException, InterruptedException {
    
    final Set allFilesListedInManifests = getAllFilesListedInManifests(bag);
    checkAllFilesListedInManifestExist(allFilesListedInManifests);

    if (bag.getVersion().isOlder(new Version(1, 0))) {
      checkAllFilesInPayloadDirAreListedInAtLeastOneAManifest(allFilesListedInManifests, PathUtils.getDataDir(bag), ignoreHiddenFiles);
    } else {
      CheckAllFilesInPayloadDirAreListedInAllManifests(bag.getPayLoadManifests(), PathUtils.getDataDir(bag), ignoreHiddenFiles);
    }
  }

  /*
   * get all the files listed in all the manifests
   */
  private Set getAllFilesListedInManifests(final Bag bag)
      throws IOException, MaliciousPathException, UnsupportedAlgorithmException, InvalidBagitFileFormatException {
    logger.debug(messages.getString("all_files_in_manifests"));
    final Set filesListedInManifests = new HashSet<>();

    try(DirectoryStream directoryStream = 
        Files.newDirectoryStream(PathUtils.getBagitDir(bag.getVersion(), bag.getRootDir()))){
      for (final Path path : directoryStream) {
        final String filename = PathUtils.getFilename(path);
        if (filename.startsWith("tagmanifest-") || filename.startsWith("manifest-")) {
          logger.debug(messages.getString("get_listing_in_manifest"), path);
          final Manifest manifest = ManifestReader.readManifest(nameMapping, path, bag.getRootDir(),
              bag.getFileEncoding());
          filesListedInManifests.addAll(manifest.getFileToChecksumMap().keySet());
        }
      }
    }

    return filesListedInManifests;
  }

  /*
   * Make sure all the listed files actually exist
   */
  @SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops")
  private void checkAllFilesListedInManifestExist(final Set files) throws FileNotInPayloadDirectoryException, InterruptedException {
    final CountDownLatch latch = new CountDownLatch(files.size());
    final Set missingFiles = new ConcurrentSkipListSet<>();

    logger.info(messages.getString("check_all_files_in_manifests_exist"));
    for (final Path file : files) {
      executor.execute(new CheckIfFileExistsTask(file, missingFiles, latch));
    }

    latch.await();

    if (!missingFiles.isEmpty()) {
      final String formattedMessage = messages.getString("missing_payload_files_error");
      throw new FileNotInPayloadDirectoryException(MessageFormatter.format(formattedMessage, missingFiles).getMessage());
    }
  }

  /*
   * Make sure all files in the directory are in at least 1 manifest
   */
  private static void checkAllFilesInPayloadDirAreListedInAtLeastOneAManifest(final Set filesListedInManifests,
      final Path payloadDir, final boolean ignoreHiddenFiles) throws IOException {
    logger.debug(messages.getString("checking_file_in_at_least_one_manifest"), payloadDir);
    if (Files.exists(payloadDir)) {
      Files.walkFileTree(payloadDir,
          new PayloadFileExistsInAtLeastOneManifestVistor(filesListedInManifests, ignoreHiddenFiles));
    }
  }

  /*
   * as per the bagit-spec 1.0+ all files have to be listed in all manifests
   */
  private static void CheckAllFilesInPayloadDirAreListedInAllManifests(final Set payLoadManifests,
      final Path payloadDir, final boolean ignoreHiddenFiles) throws IOException {
    logger.debug(messages.getString("checking_file_in_all_manifests"), payloadDir);
    if (Files.exists(payloadDir)) {
      Files.walkFileTree(payloadDir, new PayloadFileExistsInAllManifestsVistor(payLoadManifests, ignoreHiddenFiles));
    }
  }

  public BagitAlgorithmNameToSupportedAlgorithmMapping getNameMapping() {
    return nameMapping;
  }

  public ExecutorService getExecutor() {
    return executor;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy