org.fcrepo.http.api.ExternalContentPathValidator Maven / Gradle / Ivy
/*
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree.
*/
package org.fcrepo.http.api;
import static org.apache.commons.lang3.StringUtils.isEmpty;
import static org.slf4j.LoggerFactory.getLogger;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.fcrepo.kernel.api.exception.ExternalMessageBodyException;
import org.fcrepo.kernel.api.utils.AutoReloadingConfiguration;
import org.slf4j.Logger;
/**
* Validates external content paths to ensure that they are within a configured allowed list of paths.
*
* @author bbpennel
*/
public class ExternalContentPathValidator extends AutoReloadingConfiguration {
private static final Logger LOGGER = getLogger(ExternalContentPathValidator.class);
private static final Set ALLOWED_SCHEMES = new HashSet<>(Arrays.asList("file", "http", "https"));
private static final Pattern SCHEME_PATTERN = Pattern.compile("^(http|https|file):/.*", Pattern.CASE_INSENSITIVE);
// Pattern to check that an http uri contains a / after the domain if a domain is present
private static final Pattern HTTP_DOMAIN_PATTERN = Pattern.compile("^(http|https)://([^/]+/.*|$)");
private static final Pattern RELATIVE_MOD_PATTERN = Pattern.compile(".*(^|/)\\.\\.($|/).*");
private static final Pattern NORMALIZE_FILE_URI = Pattern.compile("^file:/{2,3}");
private List allowedList;
/**
* Validates that an external path is valid. The path must be an HTTP or file URI within the allow list of paths,
* be absolute, and contain no relative modifier.
*
* @param extPath external binary path to validate
* @throws ExternalMessageBodyException thrown if the path is invalid.
*/
public void validate(final String extPath) throws ExternalMessageBodyException {
if (allowedList == null || allowedList.size() == 0) {
throw new ExternalMessageBodyException("External content is disallowed by the server");
}
if (isEmpty(extPath)) {
throw new ExternalMessageBodyException("External content path was empty");
}
final String path = normalizeUri(extPath);
final URI uri;
try {
// Ensure that the path is a valid URL
uri = new URI(path);
uri.toURL();
} catch (final Exception e) {
throw new ExternalMessageBodyException("Path was not a valid URI: " + extPath);
}
// Decode the uri and ensure that it does not contain modifiers
final String decodedPath = uri.getPath();
if (RELATIVE_MOD_PATTERN.matcher(decodedPath).matches()) {
throw new ExternalMessageBodyException("Path was not absolute: " + extPath);
}
// Require that the path is absolute
if (!uri.isAbsolute()) {
throw new ExternalMessageBodyException("Path was not absolute: " + extPath);
}
// Ensure that an accept scheme was provided
final String scheme = uri.getScheme();
if (!ALLOWED_SCHEMES.contains(scheme)) {
throw new ExternalMessageBodyException("Path did not provide an allowed scheme: " + extPath);
}
// If a file, verify that it exists
if (scheme.equals("file") && !Paths.get(uri).toFile().exists()) {
throw new ExternalMessageBodyException("Path did not match any allowed external content paths: " +
extPath);
}
// Check that the uri is within an allowed path
if (allowedList.stream().anyMatch(allowed -> path.startsWith(allowed))) {
return;
}
throw new ExternalMessageBodyException("Path did not match any allowed external content paths: " + extPath);
}
private String normalizeUri(final String path) {
// lowercase the scheme since it is case insensitive
final String[] parts = path.split(":", 2);
final String normalized;
if (parts.length == 2) {
normalized = parts[0].toLowerCase() + ":" + parts[1];
} else {
return path;
}
// file uris can have between 1 and 3 slashes depending on if the authority is present
if (normalized.startsWith("file://")) {
return NORMALIZE_FILE_URI.matcher(normalized).replaceFirst("file:/");
}
return normalized;
}
/**
* Loads the allowed list.
*
* @throws IOException thrown if the allowed list configuration file cannot be read.
*/
@Override
protected synchronized void loadConfiguration() throws IOException {
LOGGER.info("Loading list of allowed external content locations from {}", configPath);
try (final Stream stream = Files.lines(Paths.get(configPath))) {
allowedList = stream.map(line -> normalizeUri(line.trim()))
.filter(line -> isAllowanceValid(line))
.collect(Collectors.toList());
}
}
private boolean isAllowanceValid(final String allowance) {
final Matcher schemeMatcher = SCHEME_PATTERN.matcher(allowance);
final boolean schemeMatches = schemeMatcher.matches();
if (!schemeMatches || RELATIVE_MOD_PATTERN.matcher(allowance).matches()) {
LOGGER.error("Invalid path {} specified in external path configuration {}",
allowance, configPath);
return false;
}
final String protocol = schemeMatcher.group(1).toLowerCase();
if ("file".equals(protocol)) {
// If a file uri ends with / it must be a directory, otherwise it must be a file.
final File allowing = new File(URI.create(allowance).getPath());
if ((allowance.endsWith("/") && !allowing.isDirectory()) || (!allowance.endsWith("/") && !allowing
.isFile())) {
LOGGER.error("Invalid path {} in configuration {}, directories must end with a '/'",
allowance, configPath);
return false;
}
} else if ("http".equals(protocol) || "https".equals(protocol)) {
if (!HTTP_DOMAIN_PATTERN.matcher(allowance).matches()) {
LOGGER.error("Invalid path {} in configuration {}, domain must end with a '/'",
allowance, configPath);
return false;
}
}
return true;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy