gov.nasa.arc.pds.tools.util.StrUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of pds4-jparser Show documentation
This is the parser library for the PDS4 planetary data standard.
There is a newer version: 2.8.4
package gov.nasa.arc.pds.tools.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.text.NumberFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * This class provides utilities for string formatting.
 * 
 * @author jagander
 */
@SuppressWarnings("nls")
public class StrUtils {

	// default length to truncate to
	public static final int DEFAULT_TRUNCATE_LENGTH = 100;

	// javascript newline
	public static final String JS_NEWLINE = "\n";

	// xhtml newline
	public static final String HTML_NEWLINE = "
";

	// default stack trace lines in exception toString
	public static final int DEFAULT_STACK_LINES = 4;

	// regex to collapse whitespace
	public static final Pattern NORMALIZE_WHITE_SPACE_REGEX = Pattern.compile(
			"\\s+", Pattern.MULTILINE);

	// regex to get base part of url, assumes valid url
	public static final Pattern URL_BASE_REGEX = Pattern
			.compile("^(https?://[^/]+)(([^\\?]*/).*)?");

	// all valid ascii characters including printable control characters
	public static final String ASCII_CHAR_RANGE = "\\r\\n\\t -~";

	public static final Pattern ASCII_CHARS_REGEX = Pattern.compile("^["
			+ ASCII_CHAR_RANGE + "]*$", Pattern.MULTILINE);

	public static final Pattern STRIP_ASCII_REGEX = Pattern.compile("["
			+ ASCII_CHAR_RANGE + "]+", Pattern.MULTILINE);

	// 1-3 digits followed by N collections of a separator and 3 digits followed
	// by an optional decimal separator and N digits
	// "10.000.000"
	public static final Pattern NUMERIC_REGEX = Pattern
			.compile("^[+-]?\\d{1,3}(((,\\d{3})*(\\.\\d+)?)|((\\.\\d{3})*(,\\d+)?))?$");

	public static final Pattern SIMPLE_NUMERIC_REGEX = Pattern
			.compile("^[+-]?((\\d+((,|\\.)\\d*)?)|((,|\\.)\\d+))$");

	// TODO: figure out how to support something like ...enum(';','a');
	public static final Pattern STATEMENT_REGEX = Pattern.compile("^[^;]+;\n",
			Pattern.MULTILINE | Pattern.DOTALL);

	public static final Pattern STRIP_PADDING = Pattern
			.compile(
					"[ \\t\\x0B]*([^ \\t\\x0B\\n\\r\\f]([^\\n\\r\\f]*[^ \\t\\x0B\\n\\r\\f])?)[ \\t\\x0B]*",
					Pattern.MULTILINE);

	/**
	 * Special characters within a regular expression. Listed here so that
	 * variable strings that may be used in a regular expression match can be
	 * escaped.
	 * 
	 * @see #escapeRegex(String)
	 */
	public final static String[] REGEX_SPECIAL_CHARS = new String[] { "\\",
			"/", ".", "*", "+", "?", "|", "(", ")", "[", "]", "{", "}" };

	/**
	 * Scrub a String for things that make it unsuitable to insert into a
	 * JavaScript string. Modify the string as necessary to make it safe but
	 * preserve display.
	 * 
	 * Currently this only addresses non-escaped single quotes, double quotes
	 * and non-meaningful backslashes. Other issues will be addressed as
	 * discovered.
	 * 
	 * @param string
	 *            - the string to be cleaned.
	 * @return The cleaned string.
	 */
	// TODO: fix this to not escape things already escaped?
	public static String safeJS(final String string) {
		String returnString = string;
		// replace backslashes - make odd number even if not followed by char's
		// we want to remain escaped
		// returnString =
		// returnString.replaceAll("([^\\\\]?+(\\\\\\\\)*+\\\\[^\'\"\\\\]?+)",
		// "\\\\$1");
		// replace non escaped double quotes with escaped quotes
		// returnString =
		// returnString.replaceAll("((^\\\\)?+(\\\\\\\\)*+)(\"|\')",
		// "$1\\\\$4");
		returnString = returnString.replaceAll("(\"|\'|\\\\)", "\\\\$1");
		return returnString;
	}

	/**
	 * Remove quotes surrounding a string. This is useful in cases that you are
	 * retrieving strings from a file and each value is quoted.
	 * 
	 * @param string
	 *            - the string to remove surrounding quotes from
	 * @return original string minus surrounding quotes
	 */
	public static String dequote(final String string) {
		return string.replaceFirst("^\"(.*)\"$", "$1");
	}

	/**
	 * A generic string representation of a list. It just calls the toString
	 * method on each item and inserts a xhtml break or newline between the
	 * items. What type of newline to use is dependent on the flag for whether
	 * the output is for javascript.
	 * 
	 * @param list
	 *            - a list of objects with a usable toString() on each object
	 * @param isJS
	 *            - flag for whether the output is for javascript or xhtml
	 *            markup
	 * @return a string representation of the list
	 */
	public static String toString(final List list, final Boolean isJS) {
		String lineBreak = null;
		String returnString = "";

		if (isJS == null || !isJS) {
			lineBreak = "
";
		} else {
			lineBreak = "\n";
		}
		Iterator it = list.iterator();
		while (it.hasNext()) {
			Object element = it.next();
			returnString += element.toString();
			if (it.hasNext()) {
				returnString += lineBreak;
			}
		}
		return returnString;
	}

	/**
	 * Get a reasonable string representation of an exception. "Reasonable" is
	 * either the exception message or exception name plus some number of lines
	 * of the stacktrace.
	 * 
	 * @param e
	 *            - exception to convert to string
	 * @param isJS
	 *            - flag to indicate if you want javascript newlines or html
	 *            newlines
	 * @param lines
	 *            - number of stacktrace lines to add to representation
	 * @return a string representation of an exception
	 * 
	 */
	public static String toString(final Exception e, Boolean isJS, Integer lines) {
		// default to HTML newlines if not set
		String newline = isJS == null || !isJS ? HTML_NEWLINE : JS_NEWLINE;
		final StackTraceElement[] stackTrace = e.getStackTrace();
		StackTraceElement[] causeTrace = null;
		Throwable causeE = e.getCause();
		if (causeE != null) {
			causeTrace = causeE.getStackTrace();
		}
		// default to full stack if set
		Integer numLines = lines == null ? stackTrace.length : lines;

		// get message
		String message = e.getMessage();

		// if there was no message, get the toString of the exception
		if (message == null) {
			message = e.toString();
		}

		// add a portion of the stack trace
		if (numLines > 0) {
			for (int i = 0; i < numLines; i++) {
				String stackString = stackTrace[i].toString();
				if (!stackString.equals("")) {
					message += newline + stackString;
				}
			}
		}

		if (causeTrace != null) {
			numLines = causeTrace.length;
			message += newline + "Caused by";
			// add a portion of the stack trace
			if (numLines > 0) {
				for (int i = 0; i < numLines; i++) {
					String stackString = causeTrace[i].toString();
					if (!stackString.equals("")) {
						message += newline + stackString;
					}
				}
			}
		}

		return message;
	}

	// use default number of lines in stack
	public static String toString(final Exception e, final Boolean isJS) {
		return toString(e, isJS, DEFAULT_STACK_LINES);
	}

	/**
	 * The default usage of {@link #toString(List, Boolean)}, outputting in
	 * xhtml format.
	 * 
	 * @param list
	 *            - a list of objects with a usable toString() on each object
	 * @return a string representation of the list
	 */
	public static String toString(final List list) {
		return toString(list, null);
	}

	public static String toString(final Object[] list) {
		return toString(Arrays.asList(list), false);
	}

	public static String toString(final Object[] list, final Boolean isJS) {
		return toString(Arrays.asList(list), isJS);
	}

	public static String toString(final String[] list, final Boolean isJS) {
		return toString(Arrays.asList(list), isJS);
	}

	// NOTE: that this is not necessarily an accurate representation of file
	// since newlines may not match
	// NOTE: adds trailing newline to file even if there wasn't one
	// TODO: do we want to close stream hear or in outer
	public static String toString(final InputStream is) throws IOException {
		BufferedReader reader = new BufferedReader(new InputStreamReader(is));
		StringBuilder sb = new StringBuilder();

		String line = null;

		while ((line = reader.readLine()) != null) {
			sb.append(line + "\n");
		}

		is.close();

		return sb.toString();
	}

	// null protect toString()
	public static String toString(final Object object) {
		return object == null ? null : object.toString();
	}

	/**
	 * Escape a string for use in a regular expression. This is useful when a
	 * block of text is to be used in a regular expression match.
	 * 
	 * @param regexSource
	 *            - source string to escape
	 * @return an escaped version of the string for use in a regular expression
	 * 
	 * @see #REGEX_SPECIAL_CHARS
	 */
	public static String escapeRegex(final String regexSource) {
		String returnString = regexSource;
		for (final String specialChar : REGEX_SPECIAL_CHARS) {
			returnString = returnString.replaceAll("\\" + specialChar, "\\\\\\"
					+ specialChar);
		}
		return returnString;
	}

	/**
	 * Truncate a given string to a default length.
	 * 
	 * @param string
	 *            - string to truncate if necessary
	 * @return truncated string if longer than specified length + an elipses
	 *         else the original string
	 * 
	 * @see #DEFAULT_TRUNCATE_LENGTH
	 * @see #truncate(String, Integer)
	 */
	public static String truncate(final String string) {
		return truncate(string, null);
	}

	/**
	 * Truncate a given string to a provided length.
	 * 

	 * Note that the returned string may be longer than the specified length due
	 * to the addition of an elipses.
	 * 

	 * Note that truncate length is defaulted if null
	 * 
	 * @param string
	 *            - string to truncate if necessary
	 * @param length
	 *            - length to truncate to
	 * @return truncated string if longer than specified length + an elipses
	 *         else the original string
	 * 
	 * @see #DEFAULT_TRUNCATE_LENGTH
	 */
	public static String truncate(final String string, final Integer length) {
		Integer truncateLength = length == null ? DEFAULT_TRUNCATE_LENGTH
				: length;
		if (string == null || string.length() <= truncateLength) {
			return string;
		}
		return string.substring(0, truncateLength) + "...";
	}

	/**
	 * Normalize a string by trimming and compacting whitespace.
	 * 
	 * @param string
	 *            - string to normalize
	 * @return a trimmed and whitespace-compressed version of the string
	 * 
	 * @see #NORMALIZE_WHITE_SPACE_REGEX
	 */
	public static String normalize(final String string) {
		String returnString = string.trim();
		final Matcher matcher = NORMALIZE_WHITE_SPACE_REGEX
				.matcher(returnString);
		return matcher.replaceAll(" ");
	}

	/**
	 * Get the base of a url such that you have a url string to the same folder.
	 * This is not particularly useful to a restful interface.
	 * 

	 * Note that this requires the url string passed in to be a valid url.
	 * Typical usage is to get the base from the current URL. In that case, the
	 * url will always be valid.
	 * 
	 * @param string
	 *            - url to get base of.
	 * @return The base of the passed in url string.
	 * 
	 * @see #URL_BASE_REGEX
	 */
	public static String getURLBase(final String string) {
		final Matcher matcher = URL_BASE_REGEX.matcher(string);
		if (matcher.matches()) {
			String urlBase = matcher.group(1);
			final String pastDomain = matcher.group(3);
			if (pastDomain != null && pastDomain.length() > 0) {
				return urlBase + pastDomain;
			}
			return urlBase + "/";
		}
		return null;
	}

	public static String getURLFilename(final String url) {
		// strip get params which may contain a forward slash
		// ignore domain stuff
		URL URLObj = null;
		try {
			URLObj = new URL(url);
		} catch (MalformedURLException e) {
			// not a valid url
			return null;
		}
		return getURLFilename(URLObj);
	}

	public static String getURIFilename(final URI uri) {
		try {
			return getURLFilename(uri.toURL());
		} catch (MalformedURLException e) {
			return null;
		}
	}

	public static String getURLFilename(final URL url) {
		String filename = url.getFile();
		int paramIndex = filename.lastIndexOf("?");
		if (paramIndex != -1) {
			filename = filename.substring(0, paramIndex);
		}
		// strip non-file part
		filename = filename.substring(filename.lastIndexOf("/") + 1, filename
				.length());

		return filename;
	}

	/**
	 * Test to see if string contains only ASCII characters.
	 * 

	 * Note that '^', '*' and '$' match the empty string in the regex and the
	 * single occurrence of the empty string token can't do triple duty. We
	 * return true for an empty string explicitly rather than using the regex.
	 * 
	 * @param string
	 *            - string to test for non-ascii chars.
	 * @return boolean indicating if the string is all ASCII chars
	 * 
	 * @see #ASCII_CHARS_REGEX
	 */
	public static boolean isASCII(final String string) {
		if (string.length() == 0) {
			return true;
		}
		final Matcher matcher = ASCII_CHARS_REGEX.matcher(string);
		return matcher.matches();
	}

	/**
	 * Get any and all characters that are not ASCII.
	 * 
	 * @param string
	 *            - string to pull non-ASCII characters from
	 * @return string containing all non-ASCII chars
	 * 
	 * @see #STRIP_ASCII_REGEX
	 */
	public static String getNonASCII(final String string) {
		final Matcher matcher = STRIP_ASCII_REGEX.matcher(string);
		String returnString = matcher.replaceAll("");
		return returnString;
	}

	/**
	 * A generic comma separated string representation of a list. It just calls
	 * the toString method on each item and inserts a comma between the items.
	 * 
	 * @param list
	 *            - a list of objects with a usable toString() on each object
	 * 
	 * @return a comma separated string representation of the list
	 */
	// TODO: allow different separator char, quoting, or other formatting
	public static String toSeparatedString(final List list) {
		String returnString = "";

		Iterator it = list.iterator();
		while (it.hasNext()) {
			Object element = it.next();
			String val = null;
			val = element.toString();
			returnString += val;
			if (it.hasNext()) {
				returnString += ", ";
			}
		}
		return returnString;
	}

	public static String toSeparatedString(final Object[] array) {
		if (array == null) {
			return "";
		}
		List