All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.jscancella.reader.internal.FetchReader Maven / Gradle / Ivy

Go to download

This is a software library intended to support the creation, manipulation, and validation of "bags" from the bagit specification. It currently supports version 0.93 through 1.0.

There is a newer version: 5.2
Show newest version
package com.github.jscancella.reader.internal;

import java.io.BufferedReader;
import java.io.IOException;
import java.net.URI;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.ResourceBundle;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.github.jscancella.domain.FetchItem;
import com.github.jscancella.domain.Version;
import com.github.jscancella.exceptions.InvalidBagitFileFormatException;
import com.github.jscancella.exceptions.MaliciousPathException;

/**
 * This class is responsible for reading and parsing fetch.txt file from the filesystem
 */
public enum FetchReader {;//using enum to enforce singleton
  private static final Logger logger = LoggerFactory.getLogger(FetchReader.class);
  private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");
  private static final String FETCH_LINE_REGEX = ".*[ \t]*(\\d*|-)[ \t]*.*";

  /**
   * Reads a fetch.txt file
   * 
   * @param fetchFile the specific fetch file
   * @param encoding the encoding to read the file with
   * @param bagRootDir the root directory of the bag
   * @param version the version of the bag
   * @return a list of items to fetch
   * 
   * @throws IOException if there is a problem reading a file
   * @throws MaliciousPathException if the path was crafted to point outside the bag directory
   * @throws InvalidBagitFileFormatException if the fetch format does not follow the bagit specification
   */
  @SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops")
  public static List readFetch(final Path fetchFile, final Charset encoding, final Path bagRootDir, final Version version) throws IOException{
    logger.info(messages.getString("reading_fetch_file"), fetchFile);
    final List itemsToFetch = new ArrayList<>();
    
    try(BufferedReader reader = Files.newBufferedReader(fetchFile, encoding)){
      String line = reader.readLine();
      while(line != null){
        if(line.matches(FETCH_LINE_REGEX) && !line.matches("\\s*")){
          final String[] parts = line.split("\\s+", 3);
          final Path path = TagFileReader.createFileFromManifest(bagRootDir, parts[2], version, encoding);
          final long length = "-".equals(parts[1]) ? -1 : Long.decode(parts[1]);
          final URI url = URI.create(parts[0]);
          
          logger.debug(messages.getString("read_fetch_file_line"), url, length, parts[2], fetchFile);
          final FetchItem itemToFetch = new FetchItem(url, length, path);
          itemsToFetch.add(itemToFetch);
        }
        else{
          throw new InvalidBagitFileFormatException(messages.getString("invalid_fetch_file_line_error").replace("{}", line));
        }
        
        line = reader.readLine();
      }
    }

    return itemsToFetch;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy