All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.jscancella.conformance.BagLinter Maven / Gradle / Ivy

Go to download

This is a software library intended to support the creation, manipulation, and validation of "bags" from the bagit specification. It currently supports version 0.93 through 1.0.

There is a newer version: 5.2
Show newest version
package com.github.jscancella.conformance;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.ResourceBundle;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.github.jscancella.conformance.exceptions.BagitVersionIsNotAcceptableException;
import com.github.jscancella.conformance.exceptions.FetchFileNotAllowedException;
import com.github.jscancella.conformance.exceptions.MetatdataValueIsNotAcceptableException;
import com.github.jscancella.conformance.exceptions.MetatdataValueIsNotRepeatableException;
import com.github.jscancella.conformance.exceptions.RequiredManifestNotPresentException;
import com.github.jscancella.conformance.exceptions.RequiredMetadataFieldNotPresentException;
import com.github.jscancella.conformance.exceptions.RequiredTagFileNotPresentException;
import com.github.jscancella.conformance.internal.BagProfileChecker;
import com.github.jscancella.conformance.internal.EncodingChecker;
import com.github.jscancella.conformance.internal.LargeBagChecker;
import com.github.jscancella.conformance.internal.ManifestChecker;
import com.github.jscancella.conformance.internal.MetadataChecker;
import com.github.jscancella.conformance.internal.VersionChecker;
import com.github.jscancella.conformance.profile.BagitProfile;
import com.github.jscancella.domain.Bag;
import com.github.jscancella.domain.Version;
import com.github.jscancella.exceptions.DataDirectoryMustBeEmptyException;
import com.github.jscancella.exceptions.FetchFileDoesNotExistException;
import com.github.jscancella.exceptions.InvalidBagitFileFormatException;
import com.github.jscancella.exceptions.MaliciousPathException;
import com.github.jscancella.exceptions.UnparsableVersionException;
import com.github.jscancella.reader.internal.BagitTextFileReader;
import com.github.jscancella.reader.internal.KeyValueReader;

/**
 * The main class for checking if a bag conforms to the specified profile or has other problems
 */
public enum BagLinter {
  ; // using enum to ensure singleton
  private static final Logger logger = LoggerFactory.getLogger(BagLinter.class);
  private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");

  /**
   * Check a bag against a bagit-profile as described by https://github.com/ruebot/bagit-profiles
   * 
* Note: This implementation does not check the Serialization part of the * profile! * * * @param jsonProfile the conformance profile to check the bag against * @param bag the bag to check against the conformance profile * * @return true if the bag meets the conformance profile * * @throws JsonParseException if there is an error parsing the conformance profile * @throws JsonMappingException if there is an error mapping the parsed profile to a {@link BagitProfile} * @throws IOException If there is an error while reading the bag * * @throws FetchFileNotAllowedException If the bag contains a fetch file but the profile forbids it * @throws RequiredMetadataFieldNotPresentException If the bag is missing a required metadata field * @throws MetatdataValueIsNotAcceptableException if the metadata value present in the bag is not in the list of acceptable values from the profile * @throws RequiredManifestNotPresentException if the manifest present doesn't use the required checksum algorithm from the profile * @throws BagitVersionIsNotAcceptableException if the bag is too old * @throws RequiredTagFileNotPresentException if a tag file is missing * @throws MetatdataValueIsNotRepeatableException if there is a repeat of metadata in the bag * @throws FetchFileDoesNotExistException if there is no fetch file in the bag when the profile requires it * @throws DataDirectoryMustBeEmptyException if the data directory is not empty when the profile prohibits it */ public static boolean checkAgainstProfile(final InputStream jsonProfile, final Bag bag) throws JsonParseException, JsonMappingException, IOException, FetchFileNotAllowedException, RequiredMetadataFieldNotPresentException, MetatdataValueIsNotAcceptableException, RequiredManifestNotPresentException, BagitVersionIsNotAcceptableException, RequiredTagFileNotPresentException, MetatdataValueIsNotRepeatableException, FetchFileDoesNotExistException, DataDirectoryMustBeEmptyException{ BagProfileChecker.bagConformsToProfile(jsonProfile, bag); return true; } /** * The BagIt specification is very flexible in what it allows which leads to * situations where something may be technically allowed, but should be * discouraged. This method checks a bag for potential problems, or other items * that are allowed but discouraged. This does not validate a * bag. * * @param rootDir the directory that contains a bag * * @return a set of warnings that were found in the bag * * @throws IOException if there was a problem reading a bag file * @throws UnparsableVersionException if there was a problem parsing the version of the bag * @throws InvalidBagitFileFormatException if a file is not formatted correctly * @throws MaliciousPathException if the bag is trying to be malicious */ public static Set lintBag(final Path rootDir) throws IOException{ return lintBag(rootDir, Collections.emptyList()); } /** * The BagIt specification is very flexible in what it allows which leads to * situations where something may be technically allowed, but should be * discouraged. This method checks a bag for potential problems, or other items * that are allowed but discouraged. This does not validate a * bag. See {@link Bag#isComplete(boolean)} instead. * * @param bagitDir the firectory that contains a bag * @param warningsToIgnore a collection of warnings you would like the linter to ignore * * @return a set of warnings that were found in the bag * * @throws IOException if there was a problem reading a bag file * @throws UnparsableVersionException if there was a problem parsing the version of the bag * @throws InvalidBagitFileFormatException if a file is not formatted correctly * @throws MaliciousPathException if the bag is trying to be maliciou */ public static Set lintBag(final Path bagitDir, final Collection warningsToIgnore) throws IOException{ final Set warnings = new HashSet<>(); final Path bagitFile = bagitDir.resolve("bagit.txt"); checkForExtraLines(bagitFile, warnings, warningsToIgnore); final SimpleImmutableEntry bagitInfo = BagitTextFileReader.readBagitTextFile(bagitFile); logger.info(messages.getString("checking_encoding_problems")); EncodingChecker.checkEncoding(bagitInfo.getValue(), warnings, warningsToIgnore); logger.info(messages.getString("checking_latest_version")); VersionChecker.checkVersion(bagitInfo.getKey(), warnings, warningsToIgnore); logger.info(messages.getString("checking_size")); LargeBagChecker.checkForLargeBag(bagitDir, warnings, warningsToIgnore); logger.info(messages.getString("checking_manifest_problems")); ManifestChecker.checkManifests(bagitDir, bagitInfo.getValue(), warnings, warningsToIgnore); logger.info(messages.getString("checking_metadata_problems")); MetadataChecker.checkBagMetadata(bagitDir, bagitInfo.getValue(), warnings, warningsToIgnore); return warnings; } /* * After version 1.0 the specification read that the bagit.txt MUST contain EXACTLY 2 lines */ private static void checkForExtraLines(final Path bagitFile, final Collection warnings, final Collection warningsToIgnore) throws IOException{ if(warningsToIgnore.contains(BagitWarning.EXTRA_LINES_IN_BAGIT_FILES)){ logger.debug(messages.getString("skipping_check_extra_lines")); return; } logger.debug(messages.getString("checking_extra_lines")); final List> pairs = KeyValueReader.readKeyValuesFromFile(bagitFile, ":", StandardCharsets.UTF_8); for(final SimpleImmutableEntry pair : pairs){ if("BagIt-Version".equals(pair.getKey())){ final Version version = BagitTextFileReader.parseVersion(pair.getValue()); //versions after 1.0 specified it must be exactly 2 lines if(pairs.size() > 2 && version.isSameOrNewer(Version.VERSION_1_0())){ logger.warn(messages.getString("extra_lines_warning"), pairs.size()); warnings.add(BagitWarning.EXTRA_LINES_IN_BAGIT_FILES); } } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy