All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.nasa.arc.pds.tools.util.StrUtils Maven / Gradle / Ivy

Go to download

The PDS3 Product Tools Library project supports design/generation, validation and submission of archival products to the PDS. This project consists of a library of software classes to support the development of tools to perform these functions and is designed to be utilized by developers from the Engineering Node, Discipline Nodes and the PDS community.

The newest version!
// Copyright 2019, California Institute of Technology ("Caltech").
// U.S. Government sponsorship acknowledged.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// • Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// • Redistributions must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other
// materials provided with the distribution.
// • Neither the name of Caltech nor its operating division, the Jet Propulsion
// Laboratory, nor the names of its contributors may be used to endorse or
// promote products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

package gov.nasa.arc.pds.tools.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.text.NumberFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * This class provides utilities for string formatting.
 * 
 * @author jagander
 */
@SuppressWarnings("nls")
public class StrUtils {

	// default length to truncate to
	public static final int DEFAULT_TRUNCATE_LENGTH = 100;

	// javascript newline
	public static final String JS_NEWLINE = "\n";

	// xhtml newline
	public static final String HTML_NEWLINE = "
"; // default stack trace lines in exception toString public static final int DEFAULT_STACK_LINES = 4; // regex to collapse whitespace public static final Pattern NORMALIZE_WHITE_SPACE_REGEX = Pattern.compile( "\\s+", Pattern.MULTILINE); // regex to get base part of url, assumes valid url public static final Pattern URL_BASE_REGEX = Pattern .compile("^(https?://[^/]+)(([^\\?]*/).*)?"); // all valid ascii characters including printable control characters public static final String ASCII_CHAR_RANGE = "\\r\\n\\t -~"; public static final Pattern ASCII_CHARS_REGEX = Pattern.compile("^[" + ASCII_CHAR_RANGE + "]*$", Pattern.MULTILINE); public static final Pattern STRIP_ASCII_REGEX = Pattern.compile("[" + ASCII_CHAR_RANGE + "]+", Pattern.MULTILINE); // 1-3 digits followed by N collections of a separator and 3 digits followed // by an optional decimal separator and N digits // "10.000.000" public static final Pattern NUMERIC_REGEX = Pattern .compile("^[+-]?\\d{1,3}(((,\\d{3})*(\\.\\d+)?)|((\\.\\d{3})*(,\\d+)?))?$"); public static final Pattern SIMPLE_NUMERIC_REGEX = Pattern .compile("^[+-]?((\\d+((,|\\.)\\d*)?)|((,|\\.)\\d+))$"); // TODO: figure out how to support something like ...enum(';','a'); public static final Pattern STATEMENT_REGEX = Pattern.compile("^[^;]+;\n", Pattern.MULTILINE | Pattern.DOTALL); public static final Pattern STRIP_PADDING = Pattern .compile( "[ \\t\\x0B]*([^ \\t\\x0B\\n\\r\\f]([^\\n\\r\\f]*[^ \\t\\x0B\\n\\r\\f])?)[ \\t\\x0B]*", Pattern.MULTILINE); /** * Special characters within a regular expression. Listed here so that * variable strings that may be used in a regular expression match can be * escaped. * * @see #escapeRegex(String) */ public final static String[] REGEX_SPECIAL_CHARS = new String[] { "\\", "/", ".", "*", "+", "?", "|", "(", ")", "[", "]", "{", "}" }; /** * Scrub a String for things that make it unsuitable to insert into a * JavaScript string. Modify the string as necessary to make it safe but * preserve display. *

* Currently this only addresses non-escaped single quotes, double quotes * and non-meaningful backslashes. Other issues will be addressed as * discovered. * * @param string * - the string to be cleaned. * @return The cleaned string. */ // TODO: fix this to not escape things already escaped? public static String safeJS(final String string) { String returnString = string; // replace backslashes - make odd number even if not followed by char's // we want to remain escaped // returnString = // returnString.replaceAll("([^\\\\]?+(\\\\\\\\)*+\\\\[^\'\"\\\\]?+)", // "\\\\$1"); // replace non escaped double quotes with escaped quotes // returnString = // returnString.replaceAll("((^\\\\)?+(\\\\\\\\)*+)(\"|\')", // "$1\\\\$4"); returnString = returnString.replaceAll("(\"|\'|\\\\)", "\\\\$1"); return returnString; } /** * Remove quotes surrounding a string. This is useful in cases that you are * retrieving strings from a file and each value is quoted. * * @param string * - the string to remove surrounding quotes from * @return original string minus surrounding quotes */ public static String dequote(final String string) { return string.replaceFirst("^\"(.*)\"$", "$1"); } /** * A generic string representation of a list. It just calls the toString * method on each item and inserts a xhtml break or newline between the * items. What type of newline to use is dependent on the flag for whether * the output is for javascript. * * @param list * - a list of objects with a usable toString() on each object * @param isJS * - flag for whether the output is for javascript or xhtml * markup * @return a string representation of the list */ public static String toString(final List list, final Boolean isJS) { String lineBreak = null; String returnString = ""; if (isJS == null || !isJS) { lineBreak = "
"; } else { lineBreak = "\n"; } Iterator it = list.iterator(); while (it.hasNext()) { Object element = it.next(); returnString += element.toString(); if (it.hasNext()) { returnString += lineBreak; } } return returnString; } /** * Get a reasonable string representation of an exception. "Reasonable" is * either the exception message or exception name plus some number of lines * of the stacktrace. * * @param e * - exception to convert to string * @param isJS * - flag to indicate if you want javascript newlines or html * newlines * @param lines * - number of stacktrace lines to add to representation * @return a string representation of an exception * */ public static String toString(final Exception e, Boolean isJS, Integer lines) { // default to HTML newlines if not set String newline = isJS == null || !isJS ? HTML_NEWLINE : JS_NEWLINE; final StackTraceElement[] stackTrace = e.getStackTrace(); StackTraceElement[] causeTrace = null; Throwable causeE = e.getCause(); if (causeE != null) { causeTrace = causeE.getStackTrace(); } // default to full stack if set Integer numLines = lines == null ? stackTrace.length : lines; // get message String message = e.getMessage(); // if there was no message, get the toString of the exception if (message == null) { message = e.toString(); } // add a portion of the stack trace if (numLines > 0) { for (int i = 0; i < numLines; i++) { String stackString = stackTrace[i].toString(); if (!stackString.equals("")) { message += newline + stackString; } } } if (causeTrace != null) { numLines = causeTrace.length; message += newline + "Caused by"; // add a portion of the stack trace if (numLines > 0) { for (int i = 0; i < numLines; i++) { String stackString = causeTrace[i].toString(); if (!stackString.equals("")) { message += newline + stackString; } } } } return message; } // use default number of lines in stack public static String toString(final Exception e, final Boolean isJS) { return toString(e, isJS, DEFAULT_STACK_LINES); } /** * The default usage of {@link #toString(List, Boolean)}, outputting in * xhtml format. * * @param list * - a list of objects with a usable toString() on each object * @return a string representation of the list */ public static String toString(final List list) { return toString(list, null); } public static String toString(final Object[] list) { return toString(Arrays.asList(list), false); } public static String toString(final Object[] list, final Boolean isJS) { return toString(Arrays.asList(list), isJS); } public static String toString(final String[] list, final Boolean isJS) { return toString(Arrays.asList(list), isJS); } // NOTE: that this is not necessarily an accurate representation of file // since newlines may not match // NOTE: adds trailing newline to file even if there wasn't one // TODO: do we want to close stream hear or in outer public static String toString(final InputStream is) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(is)); StringBuilder sb = new StringBuilder(); String line = null; while ((line = reader.readLine()) != null) { sb.append(line + "\n"); } is.close(); return sb.toString(); } // null protect toString() public static String toString(final Object object) { return object == null ? null : object.toString(); } /** * Escape a string for use in a regular expression. This is useful when a * block of text is to be used in a regular expression match. * * @param regexSource * - source string to escape * @return an escaped version of the string for use in a regular expression * * @see #REGEX_SPECIAL_CHARS */ public static String escapeRegex(final String regexSource) { String returnString = regexSource; for (final String specialChar : REGEX_SPECIAL_CHARS) { returnString = returnString.replaceAll("\\" + specialChar, "\\\\\\" + specialChar); } return returnString; } /** * Truncate a given string to a default length. * * @param string * - string to truncate if necessary * @return truncated string if longer than specified length + an elipses * else the original string * * @see #DEFAULT_TRUNCATE_LENGTH * @see #truncate(String, Integer) */ public static String truncate(final String string) { return truncate(string, null); } /** * Truncate a given string to a provided length. *

* Note that the returned string may be longer than the specified length due * to the addition of an elipses. *

* Note that truncate length is defaulted if null * * @param string * - string to truncate if necessary * @param length * - length to truncate to * @return truncated string if longer than specified length + an elipses * else the original string * * @see #DEFAULT_TRUNCATE_LENGTH */ public static String truncate(final String string, final Integer length) { Integer truncateLength = length == null ? DEFAULT_TRUNCATE_LENGTH : length; if (string == null || string.length() <= truncateLength) { return string; } return string.substring(0, truncateLength) + "..."; } /** * Normalize a string by trimming and compacting whitespace. * * @param string * - string to normalize * @return a trimmed and whitespace-compressed version of the string * * @see #NORMALIZE_WHITE_SPACE_REGEX */ public static String normalize(final String string) { String returnString = string.trim(); final Matcher matcher = NORMALIZE_WHITE_SPACE_REGEX .matcher(returnString); return matcher.replaceAll(" "); } /** * Get the base of a url such that you have a url string to the same folder. * This is not particularly useful to a restful interface. *

* Note that this requires the url string passed in to be a valid url. * Typical usage is to get the base from the current URL. In that case, the * url will always be valid. * * @param string * - url to get base of. * @return The base of the passed in url string. * * @see #URL_BASE_REGEX */ public static String getURLBase(final String string) { final Matcher matcher = URL_BASE_REGEX.matcher(string); if (matcher.matches()) { String urlBase = matcher.group(1); final String pastDomain = matcher.group(3); if (pastDomain != null && pastDomain.length() > 0) { return urlBase + pastDomain; } return urlBase + "/"; } return null; } public static String getURLFilename(final String url) { // strip get params which may contain a forward slash // ignore domain stuff URL URLObj = null; try { URLObj = new URL(url); } catch (MalformedURLException e) { // not a valid url return null; } return getURLFilename(URLObj); } public static String getURIFilename(final URI uri) { try { return getURLFilename(uri.toURL()); } catch (MalformedURLException e) { return null; } } public static String getURLFilename(final URL url) { String filename = url.getFile(); int paramIndex = filename.lastIndexOf("?"); if (paramIndex != -1) { filename = filename.substring(0, paramIndex); } // strip non-file part filename = filename.substring(filename.lastIndexOf("/") + 1, filename .length()); return filename; } /** * Test to see if string contains only ASCII characters. *

* Note that '^', '*' and '$' match the empty string in the regex and the * single occurrence of the empty string token can't do triple duty. We * return true for an empty string explicitly rather than using the regex. * * @param string * - string to test for non-ascii chars. * @return boolean indicating if the string is all ASCII chars * * @see #ASCII_CHARS_REGEX */ public static boolean isASCII(final String string) { if (string.length() == 0) { return true; } final Matcher matcher = ASCII_CHARS_REGEX.matcher(string); return matcher.matches(); } /** * Get any and all characters that are not ASCII. * * @param string * - string to pull non-ASCII characters from * @return string containing all non-ASCII chars * * @see #STRIP_ASCII_REGEX */ public static String getNonASCII(final String string) { final Matcher matcher = STRIP_ASCII_REGEX.matcher(string); String returnString = matcher.replaceAll(""); return returnString; } /** * A generic comma separated string representation of a list. It just calls * the toString method on each item and inserts a comma between the items. * * @param list * - a list of objects with a usable toString() on each object * * @return a comma separated string representation of the list */ // TODO: allow different separator char, quoting, or other formatting public static String toSeparatedString(final List list) { String returnString = ""; Iterator it = list.iterator(); while (it.hasNext()) { Object element = it.next(); String val = null; val = element.toString(); returnString += val; if (it.hasNext()) { returnString += ", "; } } return returnString; } public static String toSeparatedString(final Object[] array) { if (array == null) { return ""; } List list = Arrays.asList(array); return toSeparatedString(list); } public static boolean isNumber(final String string) { if (string == null) { return false; } final Matcher simpleMatcher = SIMPLE_NUMERIC_REGEX.matcher(string); if (simpleMatcher.matches()) { return true; } final Matcher complexMatcher = NUMERIC_REGEX.matcher(string); return complexMatcher.matches(); } public static Number getNumber(final String string, final Locale locale) { try { if (!isNumber(string)) { return null; } return NumberFormat.getInstance(locale).parse( string.replaceAll("\\+", "")); } catch (ParseException e) { // shouldn't be able to get here unless isNumber has a bug return null; } } public static Number getNumber(final String string) { final Locale locale = new Locale("en_us"); return getNumber(string, locale); } public static Number getNumberLoose(final String string) { final String numberVal = string.replaceAll("[^0-9]", ""); try { return NumberFormat.getInstance(new Locale("en_us")).parse( numberVal); } catch (ParseException e) { return null; } } public static List toStatements(final String sql) { final List statements = new ArrayList(); final Matcher matcher = STATEMENT_REGEX.matcher(sql); while (matcher.find()) { final String currentMatch = sql.substring(matcher.start(), matcher .end()); statements.add(currentMatch); } return Collections.unmodifiableList(statements); } public static boolean nullOrEmpty(final String string) { if (string == null || string.trim().equals("")) { return true; } return false; } // multiline strip trailing, useful when evaluating a multiline textual // value with padding public static String stripPadding(final String string) { if (string == null) { return null; } // TODO: consolidate replacements and see about making performant // strip lines that are just whitespace String temp = string.replaceAll("^[ \\t\\x0B]*(\\r\\n|\r|\n)", ""); // any whitespace followed by not whitespace or newline followed by any // amount of not newline followed by non newline or whitespace // (optional) followed by any number of whitespace temp = temp .replaceAll( "[ \\t\\x0B]*([^ \\t\\x0B\\n\\r\\f]([^\\n\\r\\f]*[^ \\t\\x0B\\n\\r\\f])?)[ \\t\\x0B]*", "$1"); // treat line endings as spaces so return plus line feed don't count as // 2 chars return temp.replaceAll("(\\r\\n|\\n\\r|\r|\n)", " "); } public static String toUpper(final String string) { String out = ""; final String source = string.toLowerCase(); String[] parts = source.split(" "); for (int i = 0; i < parts.length; i++) { final String part = parts[i]; out += part.substring(0, 1).toUpperCase() + part.substring(1); if (i < parts.length - 1) { out += " "; } } return out; } // get a presentation version of the string, default to: trimming, replacing // underscores with spaces, uppercasing // TODO: add more actions and flags public static String getNice(final String string, final Boolean capitalize, final Boolean replaceUnderscore) { String out = string.trim(); if (replaceUnderscore == null || replaceUnderscore) { out = out.replaceAll("_", " "); } if (capitalize == null || capitalize) { out = toUpper(out); } return out; } public static String getNice(final String string) { return getNice(string, true, true); } public static String addWildcards(final String string) { return string.replaceFirst("^\\**+(.*?)\\**+$", "*$1*"); } public static String getAlternateCaseString(final String string) { Character character = string.charAt(0); if (Character.isLowerCase(character)) { return string.toUpperCase(); } return string.toLowerCase(); } // ex. get source string where might have either url, uri, file, or // string... public static String getNonNull(final Object... objects) { for (Object object : objects) { if (object != null) { return object.toString(); } } return null; } }