All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.jscancella.domain.Bag Maven / Gradle / Ivy

Go to download

This is a software library intended to support the creation, manipulation, and validation of "bags" from the bagit specification. It currently supports version 0.93 through 1.0.

There is a newer version: 5.2
Show newest version
package com.github.jscancella.domain;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.ResourceBundle;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.github.jscancella.domain.Manifest.ManifestBuilder;
import com.github.jscancella.domain.Metadata.MetadataBuilder;
import com.github.jscancella.domain.internal.EclipseGenerated;
import com.github.jscancella.exceptions.CorruptChecksumException;
import com.github.jscancella.exceptions.FileNotInPayloadDirectoryException;
import com.github.jscancella.exceptions.InvalidBagitFileFormatException;
import com.github.jscancella.exceptions.MaliciousPathException;
import com.github.jscancella.exceptions.MissingBagitFileException;
import com.github.jscancella.exceptions.MissingPayloadDirectoryException;
import com.github.jscancella.exceptions.MissingPayloadManifestException;
import com.github.jscancella.hash.BagitChecksumNameMapping;
import com.github.jscancella.hash.Hasher;
import com.github.jscancella.internal.ManifestFilter;
import com.github.jscancella.internal.PathUtils;
import com.github.jscancella.reader.internal.BagitTextFileReader;
import com.github.jscancella.reader.internal.FetchReader;
import com.github.jscancella.reader.internal.ManifestReader;
import com.github.jscancella.reader.internal.MetadataReader;
import com.github.jscancella.verify.internal.BagitTextFileVerifier;
import com.github.jscancella.verify.internal.MandatoryVerifier;
import com.github.jscancella.verify.internal.ManifestVerifier;
import com.github.jscancella.writer.internal.BagitFileWriter;
import com.github.jscancella.writer.internal.FetchWriter;
import com.github.jscancella.writer.internal.ManifestWriter;
import com.github.jscancella.writer.internal.MetadataWriter;

/**
 * The main representation of the bagit spec. This is an immutable object.
 */
@SuppressWarnings({ "PMD.TooManyMethods", "PMD.GodClass" })
public final class Bag {  
  private static final Logger logger = LoggerFactory.getLogger(Bag.class);
  private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");
  
  //The original version of the bag
  private final Version version;
  
  //from the bagit.txt or UTF-8 for new bags
  private final Charset fileEncoding;// = StandardCharsets.UTF_8;
  
  //equivalent to the manifest-.txt files
  private final Set payLoadManifests;
  
  //equivalent to the tagmanifest-.txt  files
  private final Set tagManifests;
  
  //equivalent to the fetch.txt
  private final List itemsToFetch;
  
  //equivalent to the bag-info.txt 
  private final Metadata metadata;
  
  //the current location of the bag on the filesystem
  private final Path rootDir;
  
  /**
   * An immutable Bag object. Typically used by {@link BagBuilder}.
   * 
   * @param version the version of the bagit specification this adheres to
   * @param fileEncoding the encoding of the tag files
   * @param payloadManifests the manifest(s) that define the payload files
   * @param tagManifests the manifest(s) that define the tag files
   * @param itemsToFetch items to fetch to make this bag complete (see {@link #isComplete(boolean)}
   * @param metadata the human readable information to keep with the bag
   * @param rootDir the root path of the bag on the filesystem
   */
  public Bag(final Version version, final Charset fileEncoding, final Set payloadManifests, 
      final Set tagManifests, final List itemsToFetch, final Metadata metadata, final Path rootDir){
    this.version = version;
    this.fileEncoding = fileEncoding;
    this.payLoadManifests = new HashSet<>(payloadManifests);
    this.tagManifests = new HashSet<>(tagManifests);
    this.itemsToFetch = new ArrayList<>(itemsToFetch);
    this.metadata = metadata;
    this.rootDir = rootDir;
  }
  
  
  /**
   * @return the directory that contains the payload files
   */
  public Path getDataDir(){
    return rootDir.resolve("data");
  }
  
  /**
   * @return the directory that contains the tag files
   */
  public Path getTagFileDir(){
    return rootDir; //whenever .bagit comes around this will be very helpful
  }
  
  /**
   * @return the bagit specification version this bag adheres to
   */
  public Version getVersion(){
    return version;
  }

  /**
   * @return the manifests that make up the payload files
   */
  public Set getPayLoadManifests() {
    return Collections.unmodifiableSet(payLoadManifests);
  }

  /**
   * @return the manifests that make up the tag files
   */
  public Set getTagManifests() {
    return Collections.unmodifiableSet(tagManifests);
  }

  /**
   * @return files to fetch
   */
  public List getItemsToFetch() {
    return Collections.unmodifiableList(itemsToFetch);
  }

  /**
   * @return the human readable information kept with a bag
   */
  public Metadata getMetadata() {
    return metadata;
  }

  /**
   * @return the file encoding of the tag files. You SHOULD be using {@link StandardCharsets#UTF_8}
   */
  public Charset getFileEncoding() {
    return fileEncoding;
  }
  
  /**
   * @return the root directory of a bag, usually the folder name
   */
  public Path getRootDir() {
    return rootDir;
  }

  @EclipseGenerated
  @Override
  public String toString() {
    final StringBuilder builder = new StringBuilder(95);
    builder.append("Bag [version=").append(version)
    .append(", fileEncoding=").append(fileEncoding)
    .append(", payLoadManifests=[");
    for(final Manifest payloadManifest : payLoadManifests){
      builder.append(payloadManifest).append(' ');
    }
    builder.append("], tagManifests=[");
    for(final Manifest tagManifest : tagManifests){
      builder.append(tagManifest).append(' ');
    }
    builder.append("], itemsToFetch=").append(itemsToFetch)
    .append(", metadata=").append(metadata).append(']');
    
    return builder.toString();
  }

  @EclipseGenerated
  @Override
  public int hashCode() {
    return Objects.hash(version, fileEncoding, payLoadManifests, tagManifests, itemsToFetch, metadata);
  }

  @EclipseGenerated
  @Override
  public boolean equals(final Object obj) {
    boolean isEqual = false;
    
    if (obj instanceof Bag){
      final Bag other = (Bag) obj;
      isEqual = Objects.equals(this.version, other.getVersion()) && 
          Objects.equals(this.fileEncoding, other.getFileEncoding()) &&
          Objects.equals(this.payLoadManifests, other.getPayLoadManifests()) && 
          Objects.equals(this.tagManifests, other.getTagManifests()) &&
          Objects.equals(this.itemsToFetch, other.getItemsToFetch()) &&
          Objects.equals(this.metadata, other.getMetadata());
    }
    
    return isEqual;
  }
  
  /**
   * See https://tools.ietf.org/html/draft-kunze-bagit#section-3
* A bag is valid if the bag is complete and every checksum has been * verified against the contents of its corresponding file. * * @param ignoreHiddenFiles to include hidden files when checking * * @return true if the bag is valid or throws an exception * * @throws InvalidBagitFileFormatException if the file(s) are not formatted correctly * @throws IOException if there is a problem reading a file * @throws CorruptChecksumException the checksum doesn't match what was listed in the manifest * @throws FileNotInPayloadDirectoryException file listed in manifest but doesn't exist * @throws MissingBagitFileException the bagit.txt file is missing * @throws MissingPayloadDirectoryException if a bag is missing a payload directory * @throws MissingPayloadManifestException if there is no payload manifest */ public boolean isValid(final boolean ignoreHiddenFiles) throws IOException { BagitTextFileVerifier.checkBagitTextFile(this); return isComplete(ignoreHiddenFiles) && justValidate(); } /** * See https://tools.ietf.org/html/draft-kunze-bagit#section-3
* Just checks every checksum has been * verified against the contents of its corresponding file. * * @return true if the bag is valid or throws an exception * * @throws InvalidBagitFileFormatException if the file(s) are not formatted correctly * @throws IOException if there is a problem reading a file * @throws CorruptChecksumException the checksum doesn't match what was listed in the manifest * @throws FileNotInPayloadDirectoryException file listed in manifest but doesn't exist * @throws MissingBagitFileException the bagit.txt file is missing * @throws MissingPayloadDirectoryException if a bag is missing a payload directory * @throws MissingPayloadManifestException if there is no payload manifest */ public boolean justValidate() throws IOException{ boolean isValid = true; BagitTextFileVerifier.checkBagitTextFile(this); for(final Manifest payloadManifest : payLoadManifests){ isValid = checkHashes(payloadManifest) && isValid; } for(final Manifest tagManifest : tagManifests){ isValid = checkHashes(tagManifest) && isValid; } return isValid; } private boolean checkHashes(final Manifest manifest) throws IOException{ final Hasher hasher = BagitChecksumNameMapping.get(manifest.getBagitAlgorithmName()); for(final ManifestEntry entry : manifest.getEntries()) { if(Files.exists(entry.getPhysicalLocation())) { final String hash = hasher.hash(entry.getPhysicalLocation()); //https://www.rfc-editor.org/rfc/rfc8493#section-2.1.3 //* The hex-encoded checksum MAY use uppercase and/or lowercase letters. if (!hash.equalsIgnoreCase(entry.getChecksum())){ throw new CorruptChecksumException("File [{}] is suppose to have a [{}] hash of [{}] but was computed [{}].", entry.getPhysicalLocation(), //entry.getRelativeLocation(), manifest.getBagitAlgorithmName(), entry.getChecksum(), hash); } } } return true; } /** * See https://tools.ietf.org/html/draft-kunze-bagit#section-3
* A bag is complete if
*
    *
  • every element is present *
  • every file in the payload manifest(s) are present *
  • every file in the tag manifest(s) are present. Tag files not listed in a * tag manifest may be present. *
  • every file in the data directory must be listed in at least one payload * manifest *
  • each element must comply with the bagit spec *
* * @param ignoreHiddenFiles when checking to ignore hidden files * * @return true or throws an exception * * @throws InvalidBagitFileFormatException if the file(s) are not formatted correctly * @throws IOException if there is a problem reading a file * @throws CorruptChecksumException the checksum doesn't match what was listed in the manifest * @throws FileNotInPayloadDirectoryException file listed in manifest but doesn't exist * @throws MissingBagitFileException the bagit.txt file is missing * @throws MissingPayloadDirectoryException if a bag is missing a payload directory * @throws MissingPayloadManifestException if there is no payload manifest * @throws MaliciousPathException if the path is specifying a path outside the bag */ public boolean isComplete(final boolean ignoreHiddenFiles) throws IOException { MandatoryVerifier.checkFetchItemsExist(itemsToFetch, rootDir); MandatoryVerifier.checkBagitFileExists(this); MandatoryVerifier.checkPayloadDirectoryExists(this); MandatoryVerifier.checkIfAtLeastOnePayloadManifestsExist(this); ManifestVerifier.verifyManifests(this, ignoreHiddenFiles); return true; } /** * Write a bag to a physical location (on disk). * * @param writeTo the root location of the bag * @return a new immutable bag * @throws IOException if there is a problem writing the files */ public Bag write(final Path writeTo) throws IOException { if(Files.exists(rootDir) && writeTo.equals(rootDir)) { logger.warn(messages.getString("skipping_write_to_same_location"), writeTo); } logger.info(messages.getString("writing_bag_to_path"), rootDir); Files.createDirectories(writeTo); final Path bagitFile = BagitFileWriter.writeBagitFile(version, fileEncoding, writeTo); Optional metadataFile = Optional.empty(); if(!metadata.isEmpty()) { metadataFile = Optional.of(MetadataWriter.writeBagMetadata(metadata, version, writeTo, fileEncoding)); } Optional fetchFile = Optional.empty(); if(!itemsToFetch.isEmpty()){ fetchFile = Optional.of(FetchWriter.writeFetchFile(itemsToFetch, writeTo, version, fileEncoding)); } final Set newPayloadManifests = writeManifests(writeTo, payLoadManifests); final Set newPayloadManifestFiles = ManifestWriter.writePayloadManifests(newPayloadManifests, writeTo, version, fileEncoding); final Set updatedTagManifests = updateTagManifests(bagitFile, newPayloadManifestFiles, metadataFile, fetchFile); final Set newTagManifests = writeManifests(writeTo, updatedTagManifests); ManifestWriter.writeTagManifests(newTagManifests, writeTo, version, fileEncoding); return new Bag(version, fileEncoding, newPayloadManifests, newTagManifests, itemsToFetch, metadata, writeTo); } /* * because certain files like the payload manifest just got created, * we need to now add them to the tag manifest(s) */ private Set updateTagManifests(final Path bagitFile, final Set newPayloadManifestFiles, final Optional metadataFile, final Optional fetchFile) throws IOException{ final Set updatedTagManifests = new HashSet<>(tagManifests.size()); for(final Manifest manifest : tagManifests) { //clone the existing manifest as it may contain other tag files final ManifestBuilder builder = new ManifestBuilder(manifest); builder.addFile(bagitFile, Paths.get("")); if(metadataFile.isPresent()) { builder.addFile(metadataFile.get(), Paths.get("")); } if(fetchFile.isPresent()) { builder.addFile(fetchFile.get(), Paths.get("")); } for(final Path payloadManifestFile : newPayloadManifestFiles) { builder.addFile(payloadManifestFile, Paths.get("")); } updatedTagManifests.add(builder.build()); } return updatedTagManifests; } @SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops") private Set writeManifests(final Path writeTo, final Set manifests) throws IOException{ final Set newTagManifests = new HashSet<>(); for(final Manifest manifest : manifests) { final ManifestBuilder manifestBuilder = new ManifestBuilder(manifest.getBagitAlgorithmName()); for(final ManifestEntry entry : manifest.getEntries()) { updateEntry(writeTo, manifestBuilder, entry); } newTagManifests.add(manifestBuilder.build()); } return newTagManifests; } private void updateEntry(final Path writeTo, final ManifestBuilder manifestBuilder, final ManifestEntry entry) throws IOException { //update physical location in new manifest entry final ManifestEntry newEntry = new ManifestEntry(writeTo.resolve(entry.getRelativeLocation()), entry.getRelativeLocation(), entry.getChecksum()); manifestBuilder.addEntry(newEntry); //copy to new location createDirectoriesIfNeeded(newEntry); if(!Files.exists(newEntry.getPhysicalLocation())) { Files.copy(entry.getPhysicalLocation(), newEntry.getPhysicalLocation(), StandardCopyOption.REPLACE_EXISTING); } } private void createDirectoriesIfNeeded(final ManifestEntry entry) throws IOException { final Path newParentLocation = entry.getPhysicalLocation().getParent(); if(newParentLocation != null && !Files.exists(newParentLocation)) { Files.createDirectories(newParentLocation); } } /** * Convenience method for getting a new builder * @return a new builder instance */ public static BagBuilder getBuilder() { return new BagBuilder(); } /** * Reads a bag from a physical location (on disk). * * @param rootDir the base directory of a bag * @return a immutable bag * @throws IOException if there is a problem reading a file */ public static Bag read(final Path rootDir) throws IOException{ final Path bagitFile = rootDir.resolve("bagit.txt"); final SimpleImmutableEntry bagitInfo = BagitTextFileReader.readBagitTextFile(bagitFile); final Version version = bagitInfo.getKey(); final Charset encoding = bagitInfo.getValue(); final List> metadataLines = MetadataReader.readBagMetadata(rootDir, encoding); final MetadataBuilder metadataBuilder = new MetadataBuilder(); metadataBuilder.addAll(metadataLines); final Path fetchFile = rootDir.resolve("fetch.txt"); final List itemsToFetch = new ArrayList<>(); if(Files.exists(fetchFile)){ itemsToFetch.addAll(FetchReader.readFetch(fetchFile, encoding, rootDir, version)); } final Set payloadManifests = new HashSet<>(); final Set tagManifests = new HashSet<>(); try(DirectoryStream manifests = Files.newDirectoryStream(rootDir, new ManifestFilter())){ for (final Path path : manifests){ final String filename = PathUtils.getFilename(path); if(filename.startsWith("tagmanifest-")){ final Manifest tagManifest = ManifestReader.readManifest(path, rootDir, version, encoding); tagManifests.add(tagManifest); } else if(filename.startsWith("manifest-")){ final Manifest payloadManifest = ManifestReader.readManifest(path, rootDir, version, encoding); payloadManifests.add(payloadManifest); } } } return new Bag(version, encoding, payloadManifests, tagManifests, itemsToFetch, metadataBuilder.build(), rootDir); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy