All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dspace.core.Utils Maven / Gradle / Ivy

There is a newer version: 8.0
Show newest version
/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.core;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigInteger;
import java.net.Inet4Address;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.rmi.dgc.VMID;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.Random;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.coverity.security.Escape;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringSubstitutor;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;

/**
 * Utility functions for DSpace.
 *
 * @author Peter Breton
 */
public final class Utils {
    /**
     * log4j logger
     */
    private static final Logger log = LogManager.getLogger(Utils.class);

    private static final Pattern DURATION_PATTERN = Pattern
        .compile("(\\d+)([smhdwy])");

    private static final long MS_IN_SECOND = 1000L;

    private static final long MS_IN_MINUTE = 60000L;

    private static final long MS_IN_HOUR = 3600000L;

    private static final long MS_IN_DAY = 86400000L;

    private static final long MS_IN_WEEK = 604800000L;

    private static final long MS_IN_YEAR = 31536000000L;

    private static int counter = 0;

    private static final Random random = new Random();

    private static final VMID vmid = new VMID();

    // for parseISO8601Date
    private static final SimpleDateFormat parseFmt[]  = {
        // first try at parsing, has milliseconds (note General time zone)
        new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSSz"),

        // second try at parsing, no milliseconds (note General time zone)
        new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ssz"),

        // finally, try without any timezone (defaults to current TZ)
        new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSS"),

        new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss")
    };

    // for formatISO8601Date
    // output canonical format (note RFC22 time zone, easier to hack)
    private static final SimpleDateFormat outFmtSecond
            = new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ssZ");

    // output format with millsecond precision
    private static final SimpleDateFormat outFmtMillisec
            = new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSSZ");

    private static final Calendar outCal = GregorianCalendar.getInstance();

    /**
     * Private constructor
     */
    private Utils() { }

    /**
     * Return an MD5 checksum for data in hex format.
     *
     * @param data The data to checksum.
     * @return MD5 checksum for the data in hex format.
     */
    public static String getMD5(String data) {
        return getMD5(data.getBytes(StandardCharsets.UTF_8));
    }

    /**
     * Return an MD5 checksum for data in hex format.
     *
     * @param data The data to checksum.
     * @return MD5 checksum for the data in hex format.
     */
    public static String getMD5(byte[] data) {
        return toHex(getMD5Bytes(data));
    }

    /**
     * Return an MD5 checksum for data as a byte array.
     *
     * @param data The data to checksum.
     * @return MD5 checksum for the data as a byte array.
     */
    public static byte[] getMD5Bytes(byte[] data) {
        try {
            MessageDigest digest = MessageDigest.getInstance("MD5");

            return digest.digest(data);
        } catch (NoSuchAlgorithmException nsae) {
            // ignore
        }

        // Should never happen
        return null;
    }

    /**
     * Return a hex representation of the byte array
     *
     * @param data The data to transform.
     * @return A hex representation of the data.
     */
    public static String toHex(byte[] data) {
        if ((data == null) || (data.length == 0)) {
            return null;
        }

        StringBuilder result = new StringBuilder();

        // This is far from the most efficient way to do things...
        for (int i = 0; i < data.length; i++) {
            int low = (int) (data[i] & 0x0F);
            int high = (int) (data[i] & 0xF0);

            result.append(Integer.toHexString(high).substring(0, 1));
            result.append(Integer.toHexString(low));
        }

        return result.toString();
    }

    /**
     * Generate a unique key. The key is a long (length 38 to 40) sequence of
     * digits.
     *
     * @return A unique key as a long sequence of base-10 digits.
     */
    public static String generateKey() {
        return new BigInteger(generateBytesKey()).abs().toString();
    }

    /**
     * Generate a unique key. The key is a 32-character long sequence of hex
     * digits.
     *
     * @return A unique key as a long sequence of hex digits.
     */
    public static String generateHexKey() {
        return toHex(generateBytesKey());
    }

    /**
     * Generate a unique key as a byte array.
     *
     * @return A unique key as a byte array.
     */
    public static synchronized byte[] generateBytesKey() {
        byte[] junk = new byte[16];

        random.nextBytes(junk);

        String input = new StringBuilder()
                .append(vmid)
                .append(new java.util.Date())
                .append(Arrays.toString(junk))
                .append(counter++)
                .toString();

        return getMD5Bytes(input.getBytes(StandardCharsets.UTF_8));
    }

    // The following two methods are taken from the Jakarta IOUtil class.

    /**
     * Copy stream-data from source to destination. This method does not buffer,
     * flush or close the streams, as to do so would require making non-portable
     * assumptions about the streams' origin and further use. If you wish to
     * perform a buffered copy, use {@link #bufferedCopy}.
     *
     * @param input  The InputStream to obtain data from.
     * @param output The OutputStream to copy data to.
     * @throws IOException if IO error
     */
    public static void copy(final InputStream input, final OutputStream output)
        throws IOException {
        final int BUFFER_SIZE = 1024 * 4;
        final byte[] buffer = new byte[BUFFER_SIZE];

        while (true) {
            final int count = input.read(buffer, 0, BUFFER_SIZE);

            if (-1 == count) {
                break;
            }

            // write out those same bytes
            output.write(buffer, 0, count);
        }

        // needed to flush cache
        // output.flush();
    }

    /**
     * Copy stream-data from source to destination, with buffering. This is
     * equivalent to passing {@link #copy}a
     * java.io.BufferedInputStream and
     * java.io.BufferedOutputStream to {@link #copy}, and
     * flushing the output stream afterwards. The streams are not closed after
     * the copy.
     *
     * @param source      The InputStream to obtain data from.
     * @param destination The OutputStream to copy data to.
     * @throws IOException if IO error
     */
    public static void bufferedCopy(final InputStream source,
                                    final OutputStream destination) throws IOException {
        final BufferedInputStream input = new BufferedInputStream(source);
        final BufferedOutputStream output = new BufferedOutputStream(
            destination);
        copy(input, output);
        output.flush();
    }

    /**
     * Replace characters that could be interpreted as HTML codes with symbolic
     * references (entities). This function should be called before displaying
     * any metadata fields that could contain the characters {@code "<", ">", "&", "'"},
     * and double quotation marks. This will effectively disable HTML links
     * in metadata.
     *
     * @param value the metadata value to be scrubbed for display
     * @return the passed-in string, with html special characters replaced with
     * entities.
     */
    public static String addEntities(String value) {
        return Escape.html(value);
    }

    /**
     * Utility method to parse durations defined as {@code \d+[smhdwy]} (seconds,
     * minutes, hours, days, weeks, years)
     *
     * @param duration specified duration
     * @return number of milliseconds equivalent to duration.
     * @throws ParseException if the duration is of incorrect format
     */
    public static long parseDuration(String duration) throws ParseException {
        Matcher m = DURATION_PATTERN.matcher(duration.trim());
        if (!m.matches()) {
            throw new ParseException("'" + duration
                                         + "' is not a valid duration definition", 0);
        }

        String units = m.group(2);
        long multiplier = MS_IN_SECOND;

        if ("s".equals(units)) {
            multiplier = MS_IN_SECOND;
        } else if ("m".equals(units)) {
            multiplier = MS_IN_MINUTE;
        } else if ("h".equals(units)) {
            multiplier = MS_IN_HOUR;
        } else if ("d".equals(units)) {
            multiplier = MS_IN_DAY;
        } else if ("w".equals(units)) {
            multiplier = MS_IN_WEEK;
        } else if ("y".equals(units)) {
            multiplier = MS_IN_YEAR;
        } else {
            throw new ParseException(units
                                         + " is not a valid time unit (must be 'y', "
                                         + "'w', 'd', 'h', 'm' or 's')", duration.indexOf(units));
        }

        long qint = Long.parseLong(m.group(1));

        return qint * multiplier;
    }

    /**
     * Translates timestamp from an ISO 8601-standard format, which
     * is commonly used in XML and RDF documents.
     * This method is synchronized because it depends on a non-reentrant
     * static DateFormat (more efficient than creating a new one each call).
     *
     * @param s the input string
     * @return Date object, or null if there is a problem translating.
     */
    public static synchronized Date parseISO8601Date(String s) {
        // attempt to normalize the timezone to something we can parse;
        // SimpleDateFormat can't handle "Z"
        char tzSign = s.charAt(s.length() - 6);
        if (s.endsWith("Z")) {
            s = s.substring(0, s.length() - 1) + "GMT+00:00";
        } else if (tzSign == '-' || tzSign == '+') {
            // check for trailing timezone
            s = s.substring(0, s.length() - 6) + "GMT" + s.substring(s.length() - 6);
        }

        // try to parse without milliseconds
        ParseException lastError = null;
        for (int i = 0; i < parseFmt.length; ++i) {
            try {
                return parseFmt[i].parse(s);
            } catch (ParseException e) {
                lastError = e;
            }
        }
        if (lastError != null) {
            log.error("Error parsing date:", lastError);
        }
        return null;
    }

    /**
     * Convert a Date to String in the ISO 8601 standard format.
     * The RFC822 timezone is almost right, still need to insert ":".
     * This method is synchronized because it depends on a non-reentrant
     * static DateFormat (more efficient than creating a new one each call).
     *
     * @param d the input Date
     * @return String containing formatted date.
     */
    public static synchronized String formatISO8601Date(Date d) {
        String result;
        outCal.setTime(d);
        if (outCal.get(Calendar.MILLISECOND) == 0) {
            result = outFmtSecond.format(d);
        } else {
            result = outFmtMillisec.format(d);
        }
        int rl = result.length();
        return result.substring(0, rl - 2) + ":" + result.substring(rl - 2);
    }

    public static  java.util.Collection emptyIfNull(java.util.Collection collection) {
        return collection == null ? Collections.emptyList() : collection;
    }

    /**
     * Utility method to extract schema, element, qualifier from the metadata field key
     * Keep in mind that this method try to auto discover the common separator used in DSpace ("_" or ".")
     *
     * Return an array of token with size 3 which contains:
     * schema = tokens[0];
     * element = tokens[1];
     * qualifier = tokens[2]; //it can be empty string
     *
     * @param metadata (the field in the form dc.title or dc_title)
     * @return array of tokens
     */
    public static String[] tokenize(String metadata) {
        String separator = metadata.contains("_") ? "_" : ".";
        StringTokenizer dcf = new StringTokenizer(metadata, separator);

        String[] tokens = {"", "", ""};
        int i = 0;
        while (dcf.hasMoreTokens()) {
            tokens[i] = dcf.nextToken().trim();
            i++;
        }
        // Tokens contains:
        // schema = tokens[0];
        // element = tokens[1];
        // qualifier = tokens[2];
        return tokens;

    }

    /**
     * Make the metadata field key using the separator.
     *
     * @param schema
     * @param element
     * @param qualifier
     * @param separator (DSpace common separator are "_" or ".")
     * @return metadata field key
     */
    public static String standardize(String schema, String element, String qualifier, String separator) {
        if (StringUtils.isBlank(qualifier)) {
            return schema + separator + element;
        } else {
            return schema + separator + element + separator + qualifier;
        }
    }

    /**
     * Retrieve the baseurl from a given URL string
     * @param urlString URL string
     * @return baseurl (without any context path) or null (if URL was invalid)
     */
    public static String getBaseUrl(String urlString) {
        try {
            URL url = new URL(urlString);
            String baseUrl = url.getProtocol() + "://" + url.getHost();
            if (url.getPort() != -1) {
                baseUrl += (":" + url.getPort());
            }
            return baseUrl;
        } catch (MalformedURLException e) {
            return null;
        }
    }

    /**
     * Retrieve the hostname from a given URI string
     * @param uriString URI string
     * @return hostname (without any www.) or null (if URI was invalid)
     */
    public static String getHostName(String uriString) {
        try {
            URI uri = new URI(uriString);
            String hostname = uri.getHost();
            // remove the "www." from hostname, if it exists
            if (hostname != null) {
                return hostname.startsWith("www.") ? hostname.substring(4) : hostname;
            }
            return hostname;
        } catch (URISyntaxException e) {
            return null;
        }
    }

    /**
     * Retrieve the IP address(es) of a given URI string.
     * 

* At this time, DSpace only supports IPv4, so this method will only return IPv4 addresses. * @param uriString URI string * @return IP address(es) in a String array (or null if not found) */ public static String[] getIPAddresses(String uriString) { String[] ipAddresses = null; // First, get the hostname String hostname = getHostName(uriString); if (StringUtils.isNotEmpty(hostname)) { try { // Then, get the list of all IPs for that hostname InetAddress[] inetAddresses = InetAddress.getAllByName(hostname); // Convert array of InetAddress objects to array of IP address Strings ipAddresses = Arrays.stream(inetAddresses) // Filter our array to ONLY include IPv4 addresses .filter((address) -> address instanceof Inet4Address) // Call getHostAddress() on each to get the IPv4 address as a string .map((address) -> ((Inet4Address) address).getHostAddress()) .toArray(String[]::new); } catch (UnknownHostException ex) { return null; } } return ipAddresses; } /** * Replaces configuration placeholders within a String with the corresponding value * from DSpace's Configuration Service. *

* For example, given a String like "My DSpace is installed at ${dspace.dir}", this * method will replace "${dspace.dir}" with the configured value of that property. * @param string source string * @return string with any placeholders replaced with configured values. */ public static String interpolateConfigsInString(String string) { ConfigurationService config = DSpaceServicesFactory.getInstance().getConfigurationService(); return StringSubstitutor.replace(string, config.getProperties()); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy