All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yourmediashelf.fedora.akubra.PrefixingHashPathIdMapper Maven / Gradle / Ivy

/**
 * Copyright (C) 2012 MediaShelf 
 *
 * This file is part of uuid-datepath-idmapper.
 *
 * uuid-datepath-idmapper is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * uuid-datepath-idmapper is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with uuid-datepath-idmapper.  If not, see .
 */
package com.yourmediashelf.fedora.akubra;

import java.io.UnsupportedEncodingException;

import java.net.URI;
import java.net.URLDecoder;
import java.net.URLEncoder;

import org.akubraproject.map.IdMapper;

import com.twmacinta.util.MD5;


/**
 * Implementation of org.fcrepo.server.storage.lowlevel.akubra.HashPathIdMapper 
 * that takes an IdMapperPrefixer as a constructor argument.
 * 
 * Provides a hash-based file: mapping for any URI.
 * 

* The path component of each internal URI is derived from an MD5 hash of * the external URI. The filename component is a reversible encoding of the * external URI that is safe to use as a filename on modern filesystems. *

*

Hash Path Patterns

* The pattern given at construction time determines how the path component * of each internal URI will be composed. Within the pattern, the # character * is a stand-in for a hexadecimal [0-f] digit from the MD5 hash of the * external id. *

* Patterns: *

    *
  • must consist only of # and / characters.
  • *
  • must contain between 1 and 32 # characters.
  • *
  • must not begin or end with the / character.
  • *
  • must not contain consecutive / characters.
  • *
*

* Example patterns: *

    *
  • Good: #
  • *
  • Good: ##/#
  • *
  • Good: ##/##/##
  • *
  • Bad: a
  • *
  • Bad: ##/
  • *
  • Bad: ##//##
  • *
*

*

Filesystem-Safe Encoding

* The last part of the internal URI is a "filesystem-safe" encoding of the * external URI. All characters will be UTF-8 percent-encoded ("URI escaped") * except for the following: a-z A-Z 0-9 = ( ) [ ] - * In addition, . (period) will be escaped as %2E when * it occurs as the last character of the URI. *

*

Example Mappings

* With pattern #/#: *
    *
  • urn:example1 becomes file:0/8/urn%3Aexample1
  • *
  • http://tinyurl.com/cxzzf becomes file:6/2/http%3A%2F%2Ftinyurl.com%2Fcxzzf
  • *
* With pattern ##/##: *
    *
  • urn:example1 becomes file:08/86/urn%3Aexample1
  • *
  • http://tinyurl.com/cxzzf becomes file:62/ca/http%3A%2F%2Ftinyurl.com%2Fcxzzf
  • *
* * @author Chris Wilper * @author Edwin Shin */ public class PrefixingHashPathIdMapper implements IdMapper { private static final String internalScheme = "file"; private final String pattern; private final IdMapperPrefixer prefixer; static { MD5.initNativeLibrary(true); // don't attempt to use the native libs, ever. } /** * Creates an instance that will use the given pattern. * * @param pathPattern the pattern to use, possibly null or "". * @param prefixer The IdMapperPrefixer to use, or null. * * @throws IllegalArgumentException if the pattern is invalid. */ public PrefixingHashPathIdMapper(String pattern, IdMapperPrefixer prefixer) { this.pattern = validatePattern(pattern); this.prefixer = prefixer; } public URI getExternalId(URI internalId) throws NullPointerException { String fullPath = internalId.toString().substring( internalScheme.length() + 1); int i = fullPath.lastIndexOf('/'); String encodedURI; if (i == -1) encodedURI = fullPath; else encodedURI = fullPath.substring(i + 1); return URI.create(decode(encodedURI)); } public URI getInternalId(URI externalId) throws NullPointerException { if (externalId == null) { throw new NullPointerException(); } String uri = externalId.toString(); return URI.create(internalScheme + ":" + getPath(uri) + encode(uri)); } public String getInternalPrefix(String externalPrefix) throws NullPointerException { if (externalPrefix == null) { throw new NullPointerException(); } // we can only do this if pattern is "" if (pattern.length() == 0) { return internalScheme + ":" + encode(externalPrefix); } else { return null; } } // gets the path based on the hash of the uri, or "" if the pattern is empty private String getPath(String uri) { String nsPrefix = ""; if (prefixer != null) { nsPrefix = prefixer.getPrefix(uri); if (!nsPrefix.isEmpty()) { nsPrefix = nsPrefix + '/'; } } if (pattern.length() == 0) { return nsPrefix; } StringBuilder builder = new StringBuilder(nsPrefix); String hash = getHash(uri); int hashPos = 0; for (int i = 0; i < pattern.length(); i++) { char c = pattern.charAt(i); if (c == '#') { builder.append(hash.charAt(hashPos++)); } else { builder.append(c); } } builder.append('/'); return builder.toString(); } // computes the md5 and returns a 32-char lowercase hex string private static String getHash(String uri) { return MD5.asHex(new MD5(uri).Final()); } private static String encode(String uri) { // encode char-by-char because we only want to borrow // URLEncoder.encode's behavior for some characters StringBuilder out = new StringBuilder(); for (int i = 0; i < uri.length(); i++) { char c = uri.charAt(i); if (c >= 'a' && c <= 'z') { out.append(c); } else if (c >= '0' && c <= '9') { out.append(c); } else if (c >= 'A' && c <= 'Z') { out.append(c); } else if (c == '-' || c == '=' || c == '(' || c == ')' || c == '[' || c == ']' || c == ';') { out.append(c); } else if (c == ':') { out.append("%3A"); } else if (c == ' ') { out.append("%20"); } else if (c == '+') { out.append("%2B"); } else if (c == '_') { out.append("%5F"); } else if (c == '*') { out.append("%2A"); } else if (c == '.') { if (i == uri.length() - 1) { out.append("%2E"); } else { out.append("."); } } else { try { out.append(URLEncoder.encode("" + c, "UTF-8")); } catch (UnsupportedEncodingException wontHappen) { throw new RuntimeException(wontHappen); } } } return out.toString(); } private static String decode(String encodedURI) { if (encodedURI.endsWith("%2E")) { encodedURI = encodedURI.substring(0, encodedURI.length() - 3) + "."; } try { return URLDecoder.decode(encodedURI, "UTF-8"); } catch (UnsupportedEncodingException wontHappen) { throw new RuntimeException(wontHappen); } } private static String validatePattern(String pattern) { if (pattern == null) { return ""; } int count = 0; boolean prevWasSlash = false; for (int i = 0; i < pattern.length(); i++) { char c = pattern.charAt(i); if (c == '#') { count++; prevWasSlash = false; } else if (c == '/') { if (i == 0 || i == pattern.length() - 1) { throw new IllegalArgumentException("Pattern must not begin" + " or end with '/'"); } else if (prevWasSlash) { throw new IllegalArgumentException("Pattern must not" + " contain consecutive '/' characters"); } else { prevWasSlash = true; } } else { throw new IllegalArgumentException("Illegal character in" + " pattern: " + c); } } if (count > 32) { throw new IllegalArgumentException("Pattern must not contain more" + " than 32 '#' characters"); } return pattern; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy