org.openrdf.repository.object.LangString Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of alibaba-composition-object Show documentation
The Object Composition library merges multiple Java objects into a single multi-subject object.
The newest version!
/*
 * Copyright (c) 2012, 3 Round Stones Inc. Some rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * - Redistributions of source code must retain the above copyright notice, this
 *   list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution. 
 * - Neither the name of the openrdf.org nor the names of its contributors may
 *   be used to endorse or promote products derived from this software without
 *   specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 */
package org.openrdf.repository.object;

import java.io.Serializable;
import java.util.Locale;
import java.util.regex.PatternSyntaxException;

/**
 * Represents a string and language tag, and thus represents a plain RDF literal
 * with a language tag.
 * 
 * This class includes a number of similar methods to {@link java.lang.String},
 * that preserve the language tag. In addition it includes a method to compare
 * language tags {@link #matchesLang(String)}.
 * 
 * @author James Leigh
 * 
 */
public class LangString implements CharSequence, Serializable,
		Comparable {
	private static final long serialVersionUID = 8175463447271413979L;

	/**
	 * Constructs a LangString using the default {@link Locale} for the
	 * language.
	 * 
	 * @param label
	 * @return a LangString with the given label and a default language
	 */
	public static LangString valueOf(String label) {
		return new LangString(label);
	}

	/**
	 * Constructs a LangString using the given label and language.
	 * 
	 * @param label
	 * @param language
	 * @return a LangString with the given label and language
	 */
	public static LangString valueOf(String label, String language) {
		return new LangString(label, language);
	}

	private static String toLang(Locale locale) {
		String language = locale.getLanguage();
		String country = locale.getCountry();
		String variant = locale.getVariant();
		boolean l = language.length() != 0;
		boolean c = country.length() != 0;
		boolean v = variant.length() != 0;
		StringBuilder result = new StringBuilder(language);
		if (c || (l && v)) {
			result.append('-').append(country.toLowerCase());
		}
		if (v && (l || c)) {
			result.append('-').append(variant);
		}
		return result.toString();
	}

	private final String label;
	private final String lang;
	private Locale locale;

	/**
	 * Constructs a LangString using the default {@link Locale} for the
	 * language.
	 * 
	 * @param label
	 */
	public LangString(String label) {
		this(label, Locale.getDefault());
	}

	/**
	 * Constructs a LangString using the given label and language.
	 * 
	 * @param label
	 * @param lang
	 */
	public LangString(String label, String lang) {
		assert label != null;
		if (lang != null && lang.length() < 1)
			throw new IllegalArgumentException("language cannot be the empty string");
		this.label = label;
		this.lang = lang == null ? toLang(Locale.getDefault()) : lang;
	}

	/**
	 * Constructs a LangString using the given label and locale.
	 * 
	 * @param label
	 * @param locale
	 */
	public LangString(String label, Locale locale) {
		this(label, toLang(locale));
		this.locale = locale;
	}

	public String getLang() {
		return lang;
	}

	/**
	 * The {@link String} portion of this object
	 * 
	 * @return this string without a language
	 */
	@Override
	public String toString() {
		return label;
	}

	/**
	 * The language of the current LangString as a Locale.
	 * 
	 * @return this language as a Locale
	 */
	public synchronized Locale getLocale() {
		if (locale == null) {
			String[] split = getLang().split("-", 3);
			if (split.length == 1) {
				locale = new Locale(getLang());
			} else if (split.length == 2) {
				locale = new Locale(split[0], split[1]);
			} else {
				locale = new Locale(split[0], split[1], split[2]);
			}
		}
		return locale;
	}

	/**
	 * Returns a hash code for this string. The hash code for a
	 * String object is computed as 
	 * 
	 * 	 * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
	 * 
	 * 
	 * 
 using int arithmetic, where s[i]
	 * is the ith character of the string, n is the length
	 * of the string, and ^ indicates exponentiation. (The hash
	 * value of the empty string is zero.)
	 * 
	 * @return a hash code value for this object.
	 */
	@Override
	public int hashCode() {
		return toString().hashCode();
	}

	/**
	 * Compares this string to the specified object. The result is {@code true}
	 * if and only if the argument is not {@code null} and is a {@code LangString}
	 * object that represents the same sequence of characters as this object.
	 * 
	 * @param o
	 *            The object to compare this {@code LangString} against
	 * 
	 * @return {@code true} if the given object represents a {@code LangString}
	 *         equivalent to this string, {@code false} otherwise
	 * 
	 * @see #compareTo(LangString)
	 * @see #equalsIgnoreCase(Object)
	 */
	@Override
	public boolean equals(Object o) {
		if (this == o)
			return true;
		if (o instanceof LangString) {
			LangString other = (LangString) o;
			if (!toString().equals(other.toString()))
				return false;
			if (!getLang().equalsIgnoreCase(other.getLang()))
				return false;
			return true;
		}
		return false;
	}

	/**
	 * Compares this {@code LangString} to another {@code LangString}, ignoring case
	 * considerations. Two strings are considered equal ignoring case if they
	 * are of the same length and corresponding characters in the two strings
	 * are equal ignoring case.
	 * 
	 * 
	 * Two characters {@code c1} and {@code c2} are considered the same ignoring
	 * case if at least one of the following is true:
	 * 

	 * The two characters are the same (as compared by the {@code ==}
	 * operator)
	 * 
Applying the method {@link java.lang.Character#toUpperCase(char)} to
	 * each character produces the same result
	 * 
Applying the method {@link java.lang.Character#toLowerCase(char)} to
	 * each character produces the same result
	 * 
	 * 
	 * @param o
	 *            The {@code LangString} to compare this {@code LangString} against
	 * 
	 * @return {@code true} if the argument is not {@code null} and it
	 *         represents an equivalent {@code LangString} ignoring case;
	 *         {@code false} otherwise
	 * 
	 * @see #equals(Object)
	 */
	public boolean equalsIgnoreCase(Object o) {
		if (this == o)
			return true;
		if (o instanceof LangString) {
			LangString other = (LangString) o;
			if (!toString().equalsIgnoreCase(other.toString()))
				return false;
			if (!getLang().equalsIgnoreCase(other.getLang()))
				return false;
			return true;
		}
		return false;
	}

	/**
	 * Compares two strings lexicographically. The comparison is based on the
	 * Unicode value of each character in the strings. The character sequence
	 * represented by this String object is compared
	 * lexicographically to the character sequence represented by the argument
	 * string. The result is a negative integer if this String
	 * object lexicographically precedes the argument string. The result is a
	 * positive integer if this String object lexicographically
	 * follows the argument string. The result is zero if the strings are equal;
	 * compareTo returns 0 exactly when the
	 * {@link #equals(Object)} method would return true.
	 * 
	 * This is the definition of lexicographic ordering. If two strings are
	 * different, then either they have different characters at some index that
	 * is a valid index for both strings, or their lengths are different, or
	 * both. If they have different characters at one or more index positions,
	 * let k be the smallest such index; then the string whose character
	 * at position k has the smaller value, as determined by using the
	 * < operator, lexicographically precedes the other string. In this case,
	 * compareTo returns the difference of the two character values
	 * at position k in the two string -- that is, the value:
	 * 

	 * 
	 * 	 * this.charAt(k) - anotherString.charAt(k)
	 * 
	 * 
	 *  If there is no index position at which they differ, then
	 * the shorter string lexicographically precedes the longer string. In this
	 * case, compareTo returns the difference of the lengths of the
	 * strings -- that is, the value: 
	 * 
	 * 	 * this.length() - anotherString.length()
	 * 
	 * 
	 * 
	 * 
	 * @param o
	 *            the String to be compared.
	 * @return the value 0 if the argument string is equal to this
	 *         string; a value less than 0 if this string is
	 *         lexicographically less than the string argument; and a value
	 *         greater than 0 if this string is lexicographically
	 *         greater than the string argument.
	 */
	public int compareTo(LangString o) {
		if (this == o)
			return 0;
		if (o == null)
			return 1;
		int result = this.getLang().compareToIgnoreCase(o.getLang());
		if (result == 0)
			return this.toString().compareTo(o.toString());
		return result;
	}

	/**
	 * Compares two strings lexicographically, ignoring case differences. This
	 * method returns an integer whose sign is that of calling
	 * compareTo with normalized versions of the strings where case
	 * differences have been eliminated by calling
	 * Character.toLowerCase(Character.toUpperCase(character)) on
	 * each character.
	 * 
	 * Note that this method does not take locale into account, and
	 * will result in an unsatisfactory ordering for certain locales. The
	 * java.text package provides collators to allow locale-sensitive
	 * ordering.
	 * 
	 * @param o
	 *            the LangString to be compared.
	 * @return a negative integer, zero, or a positive integer as the specified
	 *         String is greater than, equal to, or less than this String,
	 *         ignoring case considerations.
	 * @see java.text.Collator#compare(String, String)
	 */
	public int compareToIgnoreCase(LangString o) {
		if (this == o)
			return 0;
		if (o == null)
			return 1;
		int result = this.getLang().compareToIgnoreCase(o.getLang());
		if (result == 0)
			return this.toString().compareToIgnoreCase(o.toString());
		return result;
	}

	/**
	 * Extended filtering compares extended language ranges to language tags.
	 * Each extended language range in the language priority list is considered
	 * in turn, according to priority. A language range matches a particular
	 * language tag if each respective list of subtags matches.
	 * 
	 * Two subtags match if either they are the same when compared
	 * case-insensitively or the language range's subtag is the wildcard '*'.
	 * 
	 * See http://tools.ietf.org/html/rfc4647
	 * 
	 * @param range
	 *            In a language range, each subtag MUST either be a sequence of
	 *            ASCII alphanumeric characters or the single character '*'
	 *            (%x2A, ASTERISK). The character '*' is a "wildcard" that
	 *            matches any sequence of subtags. The meaning and uses of
	 *            wildcards vary according to the type of language range.
	 * @return true if this has a language tag that matches the extended
	 *         language range given; otherwise, false
	 */
	public boolean matchesLang(String range) {
		// 1. Split both the extended language range and the language tag being
		// compared into a list of subtags by dividing on the hyphen (%x2D)
		// character.
		String[] subtags = getLang().split("-");
		String[] subranges = range.split("-");

		// 2. Begin with the first subtag in each list. If the first subtag in
		// the range does not match the first subtag in the tag, the overall
		// match fails. Otherwise, move to the next subtag in both the
		// range and the tag.
		if (!subtags[0].equalsIgnoreCase(subranges[0]))
			return false;

		// 3. While there are more subtags left in the language range's list:
		int r = 1, t = 1;
		while (r < subranges.length) {

			// A. If the subtag currently being examined in the range is the
			// wildcard ('*'), move to the next subtag in the range and
			// continue with the loop.
			if ("*".equals(subranges[r])) {
				r++;
				continue;
			}

			// B. Else, if there are no more subtags in the language tag's
			// list, the match fails.
			if (t >= subtags.length)
				return false;

			// C. Else, if the current subtag in the range's list matches the
			// current subtag in the language tag's list, move to the next
			// subtag in both lists and continue with the loop.
			if (subranges[r].equalsIgnoreCase(subtags[t])) {
				r++;
				t++;
				continue;
			}

			// D. Else, if the language tag's subtag is a "singleton" (a single
			// letter or digit, which includes the private-use subtag 'x')
			// the match fails.
			if (subtags[t].length() == 1)
				return false;

			// E. Else, move to the next subtag in the language tag's list and
			// continue with the loop.
			t++;
			continue;
		}
		// 4. When the language range's list has no more subtags, the match
		// succeeds.
		return true;
	}

	/**
	 * Concatenates the specified string to the end of this string.
	 * 

	 * If the length of the argument string is 0, then this
	 * String object is returned. Otherwise, a new
	 * String object is created, representing a character sequence
	 * that is the concatenation of the character sequence represented by this
	 * String object and the character sequence represented by the
	 * argument string.
	 * 

	 * Examples: 

	 * 
	 * 	 * "cares".concat("s") returns "caress"
	 * "to".concat("get").concat("her") returns "together"
	 * 
	 * 
	 * 
	 * 
	 * @param str
	 *            the String that is concatenated to the end of
	 *            this String.
	 * @return a string that represents the concatenation of this object's
	 *         characters followed by the string argument's characters.
	 * @throws IllegalArgumentException
	 *             if the languages are different
	 */
	public LangString concat(LangString str) {
		String concat = toString().concat(str.toString());
		// check for same lang tag
		String l1 = getLang();
		String l2 = str.getLang();
		if (l1.equalsIgnoreCase(l2))
			return new LangString(concat, l1);
		// check for semantic subset
		if (str.matchesLang(l1))
			return new LangString(concat, l1);
		if (matchesLang(l2))
			return new LangString(concat, l2);
		// use common prefix
		String prefix = l1.length() < l2.length() ? l1 : l2;
		String other = l1.length() < l2.length() ? l2 : l1;
		String common = "";
		int i = prefix.indexOf('-');
		for (; i >= 0; i = prefix.indexOf('-', i + 1)) {
			String substring = prefix.substring(0, i + 1);
			if (!substring.equalsIgnoreCase(other.substring(0, i + 1)))
				break;
			common = substring;
		}
		if (common.length() < 1)
			throw new IllegalArgumentException("Different languages cannot be concatenated: " + l1 + " and " + l2);
		return new LangString(concat, common);
	}

	/**
	 * Concatenates the specified string to the end of this string.
	 * 
	 * If the length of the argument string is 0, then this
	 * String object is returned. Otherwise, a new
	 * String object is created, representing a character sequence
	 * that is the concatenation of the character sequence represented by this
	 * String object and the character sequence represented by the
	 * argument string.
	 * 

	 * Examples: 

	 * 
	 * 	 * "cares".concat("s") returns "caress"
	 * "to".concat("get").concat("her") returns "together"
	 * 
	 * 
	 * 
	 * 
	 * @param str
	 *            the String that is concatenated to the end of
	 *            this String.
	 * @return a string that represents the concatenation of this object's
	 *         characters followed by the string argument's characters.
	 */
	public LangString concat(String str) {
		return new LangString(toString().concat(str), getLang());
	}

	/**
	 * Returns a new character sequence that is a subsequence of this sequence.
	 * 
	 * 
	 * An invocation of this method of the form
	 * 
	 * 

	 * 
	 * 	 * str.subSequence(begin, end)
	 * 
	 * 
	 * 
	 * 
	 * behaves in exactly the same way as the invocation
	 * 
	 * 
	 * 
	 * 	 * str.substring(begin, end)
	 * 
	 * 
	 * 
	 * 
	 * This method is defined so that the String class can implement
	 * the {@link CharSequence} interface.
	 * 
	 * 
	 * @param start
	 *            the begin index, inclusive.
	 * @param end
	 *            the end index, exclusive.
	 * @return the specified subsequence.
	 * 
	 * @throws IndexOutOfBoundsException
	 *             if beginIndex or endIndex are negative, if
	 *             endIndex is greater than length(), or if
	 *             beginIndex is greater than startIndex
	 */
	public LangString subSequence(int start, int end) {
		return new LangString(toString().substring(start, end), getLang());
	}

	/**
	 * Returns a new string that is a substring of this string. The substring
	 * begins with the character at the specified index and extends to the end
	 * of this string.
	 * 
	 * Examples: 

	 * 
	 * 	 * "unhappy".substring(2) returns "happy"
	 * "Harbison".substring(3) returns "bison"
	 * "emptiness".substring(9) returns "" (an empty string)
	 * 
	 * 
	 * 
	 * 
	 * @param beginIndex
	 *            the beginning index, inclusive.
	 * @return the specified substring.
	 * @exception IndexOutOfBoundsException
	 *                if beginIndex is negative or larger than the
	 *                length of this String object.
	 */
	public LangString substring(int beginIndex) {
		return new LangString(toString().substring(beginIndex), getLang());
	}

	/**
	 * Returns a new string that is a substring of this string. The substring
	 * begins at the specified beginIndex and extends to the
	 * character at index endIndex - 1. Thus the length of the
	 * substring is endIndex-beginIndex.
	 * 
	 * Examples: 

	 * 
	 * 	 * "hamburger".substring(4, 8) returns "urge"
	 * "smiles".substring(1, 5) returns "mile"
	 * 
	 * 
	 * 
	 * 
	 * @param beginIndex
	 *            the beginning index, inclusive.
	 * @param endIndex
	 *            the ending index, exclusive.
	 * @return the specified substring.
	 * @exception IndexOutOfBoundsException
	 *                if the beginIndex is negative, or
	 *                endIndex is larger than the length of this
	 *                String object, or beginIndex is
	 *                larger than endIndex.
	 */
	public LangString substring(int beginIndex, int endIndex) {
		return new LangString(toString().substring(beginIndex, endIndex),
				getLang());
	}

	/**
	 * Returns a new string resulting from replacing all occurrences of
	 * oldChar in this string with newChar.
	 * 
	 * If the character oldChar does not occur in the character
	 * sequence represented by this String object, then a reference
	 * to this String object is returned. Otherwise, a new
	 * String object is created that represents a character
	 * sequence identical to the character sequence represented by this
	 * String object, except that every occurrence of
	 * oldChar is replaced by an occurrence of newChar.
	 * 

	 * Examples: 

	 * 
	 * 	 * "mesquite in your cellar".replace('e', 'o')
	 *         returns "mosquito in your collar"
	 * "the war of baronets".replace('r', 'y')
	 *         returns "the way of bayonets"
	 * "sparring with a purple porpoise".replace('p', 't')
	 *         returns "starring with a turtle tortoise"
	 * "JonL".replace('q', 'x') returns "JonL" (no change)
	 * 
	 * 
	 * 
	 * 
	 * @param oldChar
	 *            the old character.
	 * @param newChar
	 *            the new character.
	 * @return a string derived from this string by replacing every occurrence
	 *         of oldChar with newChar.
	 */
	public LangString replace(char oldChar, char newChar) {
		return new LangString(toString().replace(oldChar, newChar), getLang());
	}

	/**
	 * Replaces the first substring of this string that matches the given regular expression with the
	 * given replacement.
	 * 
	 * 
	 * An invocation of this method of the form str
	 * .replaceFirst(regex, repl)
	 * yields exactly the same result as the expression
	 * 
	 * 

	 * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#compile
	 * compile}(regex).{@link
	 * java.util.regex.Pattern#matcher(java.lang.CharSequence)
	 * matcher}(str).{@link java.util.regex.Matcher#replaceFirst
	 * replaceFirst}(repl)
	 * 
	 * 
	 * Note that backslashes (\) and dollar signs ($) in the
	 * replacement string may cause the results to be different than if it were
	 * being treated as a literal replacement string; see
	 * {@link java.util.regex.Matcher#replaceFirst}. Use
	 * {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
	 * meaning of these characters, if desired.
	 * 
	 * @param regex
	 *            the regular expression to which this string is to be matched
	 * @param replacement
	 *            the string to be substituted for the first match
	 * 
	 * @return The resulting String
	 * 
	 * @throws PatternSyntaxException
	 *             if the regular expression's syntax is invalid
	 * 
	 * @see java.util.regex.Pattern
	 */
	public LangString replaceFirst(String regex, String replacement) {
		return new LangString(toString().replaceFirst(regex, replacement),
				getLang());
	}

	/**
	 * Replaces each substring of this string that matches the given regular expression with the
	 * given replacement.
	 * 
	 * 

	 * An invocation of this method of the form str.replaceAll(
	 * regex, repl) yields exactly the same
	 * result as the expression
	 * 
	 * 

	 * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#compile
	 * compile}(regex).{@link
	 * java.util.regex.Pattern#matcher(java.lang.CharSequence)
	 * matcher}(str).{@link java.util.regex.Matcher#replaceAll
	 * replaceAll}(repl)
	 * 
	 * 
	 * Note that backslashes (\) and dollar signs ($) in the
	 * replacement string may cause the results to be different than if it were
	 * being treated as a literal replacement string; see
	 * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}. Use
	 * {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
	 * meaning of these characters, if desired.
	 * 
	 * @param regex
	 *            the regular expression to which this string is to be matched
	 * @param replacement
	 *            the string to be substituted for each match
	 * 
	 * @return The resulting String
	 * 
	 * @throws PatternSyntaxException
	 *             if the regular expression's syntax is invalid
	 * 
	 * @see java.util.regex.Pattern
	 */
	public LangString replaceAll(String regex, String replacement) {
		return new LangString(toString().replaceAll(regex, replacement),
				getLang());
	}

	/**
	 * Replaces each substring of this string that matches the literal target
	 * sequence with the specified literal replacement sequence. The replacement
	 * proceeds from the beginning of the string to the end, for example,
	 * replacing "aa" with "b" in the string "aaa" will result in "ba" rather
	 * than "ab".
	 * 
	 * @param target
	 *            The sequence of char values to be replaced
	 * @param replacement
	 *            The replacement sequence of char values
	 * @return The resulting string
	 * @throws NullPointerException
	 *             if target or replacement is
	 *             null.
	 */
	public LangString replace(CharSequence target, CharSequence replacement) {
		return new LangString(toString().replace(target, replacement),
				getLang());
	}

	/**
	 * Splits this string around matches of the given regular expression.
	 * 
	 * 

	 * The array returned by this method contains each substring of this string
	 * that is terminated by another substring that matches the given expression
	 * or is terminated by the end of the string. The substrings in the array
	 * are in the order in which they occur in this string. If the expression
	 * does not match any part of the input then the resulting array has just
	 * one element, namely this string.
	 * 
	 * 

	 * The limit parameter controls the number of times the pattern is
	 * applied and therefore affects the length of the resulting array. If the
	 * limit n is greater than zero then the pattern will be applied at
	 * most n - 1 times, the array's length will be no greater
	 * than n, and the array's last entry will contain all input beyond
	 * the last matched delimiter. If n is non-positive then the pattern
	 * will be applied as many times as possible and the array can have any
	 * length. If n is zero then the pattern will be applied as many
	 * times as possible, the array can have any length, and trailing empty
	 * strings will be discarded.
	 * 
	 * 

	 * The string "boo:and:foo", for example, yields the following
	 * results with these parameters:
	 * 
	 * 

	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * Regex Limit Result
: 2 { "boo", "and:foo" }
: 5 { "boo", "and", "foo" }
: -2 { "boo", "and", "foo" }
o 5 { "b", "", ":and:f", "", "" }
o -2 { "b", "", ":and:f", "", "" }
o 0 { "b", "", ":and:f" }
	 * 
	 * 
	 * 
	 * An invocation of this method of the form str.split(
	 * regex, n) yields the same result as
	 * the expression
	 * 
	 * 
 {@link java.util.regex.Pattern}.
	 * {@link java.util.regex.Pattern#compile compile}(regex
	 * ).
	 * {@link java.util.regex.Pattern#split(java.lang.CharSequence,int) split}
	 * (str, n) 
	 * 
	 * 
	 * @param regex
	 *            the delimiting regular expression
	 * 
	 * @param limit
	 *            the result threshold, as described above
	 * 
	 * @return the array of strings computed by splitting this string around
	 *         matches of the given regular expression
	 * 
	 * @throws PatternSyntaxException
	 *             if the regular expression's syntax is invalid
	 * 
	 * @see java.util.regex.Pattern
	 */
	public LangString[] split(String regex, int limit) {
		String[] split = toString().split(regex, limit);
		LangString[] result = new LangString[split.length];
		for (int i = 0; i < split.length; i++) {
			result[i] = new LangString(split[i], getLang());
		}
		return result;
	}

	/**
	 * Splits this string around matches of the given regular expression.
	 * 
	 * 
	 * This method works as if by invoking the two-argument
	 * {@link #split(String, int) split} method with the given expression and a
	 * limit argument of zero. Trailing empty strings are therefore not included
	 * in the resulting array.
	 * 
	 * 

	 * The string "boo:and:foo", for example, yields the following
	 * results with these expressions:
	 * 
	 * 

	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * Regex Result
: { "boo", "and", "foo" }
o { "b", "", ":and:f" }
	 * 
	 * 
	 * 
	 * @param regex
	 *            the delimiting regular expression
	 * 
	 * @return the array of strings computed by splitting this string around
	 *         matches of the given regular expression
	 * 
	 * @throws PatternSyntaxException
	 *             if the regular expression's syntax is invalid
	 * 
	 * @see java.util.regex.Pattern
	 */
	public LangString[] split(String regex) {
		return split(regex, 0);
	}

	/**
	 * Converts all of the characters in this String to lower case
	 * using the rules of the given Locale. Case mapping is based
	 * on the Unicode Standard version specified by the
	 * {@link java.lang.Character Character} class. Since case mappings are not
	 * always 1:1 char mappings, the resulting String may be a
	 * different length than the original String.
	 * 
	 * Examples of lowercase mappings are in the following table:
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * Language Code of Locale Upper Case Lower Case Description
tr (Turkish) \u0130 \u0069 capital letter I with dot above -> small letter i
tr (Turkish) \u0049 \u0131 capital letter I -> small letter dotless i
(all) French Fries french fries lowercased all chars in String
(all)     lowercased all chars in String
	 * 
	 * @return the String, converted to lowercase.
	 * @see java.lang.String#toLowerCase()
	 * @see java.lang.String#toUpperCase()
	 * @see java.lang.String#toUpperCase(Locale)
	 */
	public LangString toLowerCase() {
		return new LangString(toString().toLowerCase(getLocale()), getLang());
	}

	/**
	 * Converts all of the characters in this String to upper case
	 * using the rules of the given Locale. Case mapping is based
	 * on the Unicode Standard version specified by the
	 * {@link java.lang.Character Character} class. Since case mappings are not
	 * always 1:1 char mappings, the resulting String may be a
	 * different length than the original String.
	 * 
	 * Examples of locale-sensitive and 1:M case mappings are in the following
	 * table.
	 * 

	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * 
	 * Language Code of Locale Lower Case Upper Case Description
tr (Turkish) \u0069 \u0130 small letter i -> capital letter I with dot above
tr (Turkish) \u0131 \u0049 small letter dotless i -> capital letter I
(all) \u00df \u0053 \u0053 small letter sharp s -> two letters: SS
(all) Fahrvergnügen FAHRVERGNÜGEN 
	 * 
	 * @return the String, converted to uppercase.
	 * @see java.lang.String#toUpperCase()
	 * @see java.lang.String#toLowerCase()
	 * @see java.lang.String#toLowerCase(Locale)
	 */
	public LangString toUpperCase() {
		return new LangString(toString().toUpperCase(getLocale()), getLang());
	}

	/**
	 * Returns a copy of the string, with leading and trailing whitespace
	 * omitted.
	 * 
	 * If this String object represents an empty character
	 * sequence, or the first and last characters of character sequence
	 * represented by this String object both have codes greater
	 * than '\u0020' (the space character), then a reference to
	 * this String object is returned.
	 * 

	 * Otherwise, if there is no character with a code greater than
	 * '\u0020' in the string, then a new String
	 * object representing an empty string is created and returned.
	 * 

	 * Otherwise, let k be the index of the first character in the string
	 * whose code is greater than '\u0020', and let m be
	 * the index of the last character in the string whose code is greater than
	 * '\u0020'. A new String object is created,
	 * representing the substring of this string that begins with the character
	 * at index k and ends with the character at index m-that is,
	 * the result of this.substring(k, m+1).
	 * 

	 * This method may be used to trim whitespace (as defined above) from the
	 * beginning and end of a string.
	 * 
	 * @return A copy of this string with leading and trailing white space
	 *         removed, or this string if it has no leading or trailing white
	 *         space.
	 */
	public LangString trim() {
		return new LangString(toString().trim(), getLang());
	}

	//////////////////////////////// Delegate toString() ////////////////////////////////

    /**
     * Returns the length of this string.
     * The length is equal to the number of Unicode
     * code units in the string.
     *
     * @return  the length of the sequence of characters represented by this
     *          object.
     */
	public int length() {
		return toString().length();
	}

    /**
     * Returns the char value at the
     * specified index. An index ranges from 0 to
     * length() - 1. The first char value of the sequence
     * is at index 0, the next at index 1,
     * and so on, as for array indexing.
     *
     * 
If the char value specified by the index is a
     * surrogate, the surrogate
     * value is returned.
     *
     * @param      index   the index of the char value.
     * @return     the char value at the specified index of this string.
     *             The first char value is at index 0.
     * @exception  IndexOutOfBoundsException  if the index
     *             argument is negative or not less than the length of this
     *             string.
     */
	public char charAt(int index) {
		return toString().charAt(index);
	}

	/**
     * Returns true if, and only if, {@link #length()} is 0.
     *
     * @return true if {@link #length()} is 0, otherwise
     * false
     */
	public final boolean isEmpty() {
		return toString().isEmpty();
	}

    /**
     * Compares this string to the specified {@code CharSequence}.  The result
     * is {@code true} if and only if this {@code LangString} represents the same
     * sequence of char values as the specified sequence.
     *
     * @param  cs
     *         The sequence to compare this {@code LangString} against
     *
     * @return  {@code true} if this {@code LangString} represents the same
     *          sequence of char values as the specified sequence, {@code
     *          false} otherwise
     */
	public final boolean contentEquals(CharSequence cs) {
		return toString().contentEquals(cs);
	}

    /**
     * Tests if two string regions are equal.
     * 

     * A substring of this String object is compared to a substring
     * of the argument other. The result is true if these substrings
     * represent identical character sequences. The substring of this
     * String object to be compared begins at index toffset
     * and has length len. The substring of other to be compared
     * begins at index ooffset and has length len. The
     * result is false if and only if at least one of the following
     * is true:
     * 
toffset is negative.
     * 
ooffset is negative.
     * 
toffset+len is greater than the length of this
     * String object.
     * 
ooffset+len is greater than the length of the other
     * argument.
     * 
There is some nonnegative integer k less than len
     * such that:
     * this.charAt(toffset+k) != other.charAt(ooffset+k)
     * 
     *
     * @param   toffset   the starting offset of the subregion in this string.
     * @param   other     the string argument.
     * @param   ooffset   the starting offset of the subregion in the string
     *                    argument.
     * @param   len       the number of characters to compare.
     * @return  true if the specified subregion of this string
     *          exactly matches the specified subregion of the string argument;
     *          false otherwise.
     */
	public final boolean regionMatches(int toffset, CharSequence other, int ooffset, int len) {
		return toString().regionMatches(toffset, other.toString(), ooffset, len);
	}

    /**
     * Tests if two string regions are equal.
     * 
     * A substring of this String object is compared to a substring
     * of the argument other. The result is true if these
     * substrings represent character sequences that are the same, ignoring
     * case if and only if ignoreCase is true. The substring of
     * this String object to be compared begins at index
     * toffset and has length len. The substring of
     * other to be compared begins at index ooffset and
     * has length len. The result is false if and only if
     * at least one of the following is true:
     * 
toffset is negative.
     * 
ooffset is negative.
     * 
toffset+len is greater than the length of this
     * String object.
     * 
ooffset+len is greater than the length of the other
     * argument.
     * 
ignoreCase is false and there is some nonnegative
     * integer k less than len such that:
     *      * this.charAt(toffset+k) != other.charAt(ooffset+k)
     * 
     * 
ignoreCase is true and there is some nonnegative
     * integer k less than len such that:
     *      * Character.toLowerCase(this.charAt(toffset+k)) !=
               Character.toLowerCase(other.charAt(ooffset+k))
     * 
     * and:
     *      * Character.toUpperCase(this.charAt(toffset+k)) !=
     *         Character.toUpperCase(other.charAt(ooffset+k))
     * 
     * 
     *
     * @param   ignoreCase   if true, ignore case when comparing
     *                       characters.
     * @param   toffset      the starting offset of the subregion in this
     *                       string.
     * @param   other        the string argument.
     * @param   ooffset      the starting offset of the subregion in the string
     *                       argument.
     * @param   len          the number of characters to compare.
     * @return  true if the specified subregion of this string
     *          matches the specified subregion of the string argument;
     *          false otherwise. Whether the matching is exact
     *          or case insensitive depends on the ignoreCase
     *          argument.
     */
	public final boolean regionMatches(boolean ignoreCase, int toffset, CharSequence other,
			int ooffset, int len) {
		return toString().regionMatches(ignoreCase, toffset, other.toString(), ooffset, len);
	}

    /**
     * Tests if the substring of this string beginning at the
     * specified index starts with the specified prefix.
     *
     * @param   prefix    the prefix.
     * @param   toffset   where to begin looking in this string.
     * @return  true if the character sequence represented by the
     *          argument is a prefix of the substring of this object starting
     *          at index toffset; false otherwise.
     *          The result is false if toffset is
     *          negative or greater than the length of this
     *          String object; otherwise the result is the same
     *          as the result of the expression
     *               *          this.substring(toffset).startsWith(prefix)
     *          
     */
	public final boolean startsWith(CharSequence prefix, int toffset) {
		return toString().startsWith(prefix.toString(), toffset);
	}

    /**
     * Tests if this string starts with the specified prefix.
     *
     * @param   prefix   the prefix.
     * @return  true if the character sequence represented by the
     *          argument is a prefix of the character sequence represented by
     *          this string; false otherwise.
     *          Note also that true will be returned if the
     *          argument is an empty string or is equal to this
     *          String object as determined by the
     *          {@link #equals(Object)} method.
     */
	public final boolean startsWith(CharSequence prefix) {
		return toString().startsWith(prefix.toString());
	}

    /**
     * Tests if this string ends with the specified suffix.
     *
     * @param   suffix   the suffix.
     * @return  true if the character sequence represented by the
     *          argument is a suffix of the character sequence represented by
     *          this object; false otherwise. Note that the
     *          result will be true if the argument is the
     *          empty string or is equal to this String object
     *          as determined by the {@link #equals(Object)} method.
     */
	public final boolean endsWith(CharSequence suffix) {
		return toString().endsWith(suffix.toString());
	}

    /**
     * Returns the index within this string of the first occurrence of
     * the specified character. If a character with value
     * ch occurs in the character sequence represented by
     * this String object, then the index (in Unicode
     * code units) of the first such occurrence is returned. For
     * values of ch in the range from 0 to 0xFFFF
     * (inclusive), this is the smallest value k such that:
     *      * this.charAt(k) == ch
     * 
     * is true. For other values of ch, it is the
     * smallest value k such that:
     *      * this.codePointAt(k) == ch
     * 
     * is true. In either case, if no such character occurs in this
     * string, then -1 is returned.
     *
     * @param   ch   a character (Unicode code point).
     * @return  the index of the first occurrence of the character in the
     *          character sequence represented by this object, or
     *          -1 if the character does not occur.
     */
	public final int indexOf(int ch) {
		return toString().indexOf(ch);
	}

    /**
     * Returns the index within this string of the first occurrence of
     * the specified character. If a character with value
     * ch occurs in the character sequence represented by
     * this String object, then the index (in Unicode
     * code units) of the first such occurrence is returned. For
     * values of ch in the range from 0 to 0xFFFF
     * (inclusive), this is the smallest value k such that:
     *      * this.charAt(k) == ch
     * 
     * is true. For other values of ch, it is the
     * smallest value k such that:
     *      * this.codePointAt(k) == ch
     * 
     * is true. In either case, if no such character occurs in this
     * string, then -1 is returned.
     *
     * @param   ch   a character (Unicode code point).
     * @return  the index of the first occurrence of the character in the
     *          character sequence represented by this object, or
     *          -1 if the character does not occur.
     */
	public final int indexOf(int ch, int fromIndex) {
		return toString().indexOf(ch, fromIndex);
	}

    /**
     * Returns the index within this string of the last occurrence of
     * the specified character. For values of ch in the
     * range from 0 to 0xFFFF (inclusive), the index (in Unicode code
     * units) returned is the largest value k such that:
     *      * this.charAt(k) == ch
     * 
     * is true. For other values of ch, it is the
     * largest value k such that:
     *      * this.codePointAt(k) == ch
     * 
     * is true.  In either case, if no such character occurs in this
     * string, then -1 is returned.  The
     * String is searched backwards starting at the last
     * character.
     *
     * @param   ch   a character (Unicode code point).
     * @return  the index of the last occurrence of the character in the
     *          character sequence represented by this object, or
     *          -1 if the character does not occur.
     */
	public final int lastIndexOf(int ch) {
		return toString().lastIndexOf(ch);
	}

    /**
     * Returns the index within this string of the last occurrence of
     * the specified character, searching backward starting at the
     * specified index. For values of ch in the range
     * from 0 to 0xFFFF (inclusive), the index returned is the largest
     * value k such that:
     *      * (this.charAt(k) == ch) && (k <= fromIndex)
     * 
     * is true. For other values of ch, it is the
     * largest value k such that:
     *      * (this.codePointAt(k) == ch) && (k <= fromIndex)
     * 
     * is true. In either case, if no such character occurs in this
     * string at or before position fromIndex, then
     * -1 is returned.
     *
     * All indices are specified in char values
     * (Unicode code units).
     *
     * @param   ch          a character (Unicode code point).
     * @param   fromIndex   the index to start the search from. There is no
     *          restriction on the value of fromIndex. If it is
     *          greater than or equal to the length of this string, it has
     *          the same effect as if it were equal to one less than the
     *          length of this string: this entire string may be searched.
     *          If it is negative, it has the same effect as if it were -1:
     *          -1 is returned.
     * @return  the index of the last occurrence of the character in the
     *          character sequence represented by this object that is less
     *          than or equal to fromIndex, or -1
     *          if the character does not occur before that point.
     */
	public final int lastIndexOf(int ch, int fromIndex) {
		return toString().lastIndexOf(ch, fromIndex);
	}

    /**
     * Returns the index within this string of the first occurrence of the
     * specified substring. The integer returned is the smallest value
     * k such that:
     * 
     * this.startsWith(str, k)
     * 
     * is true.
     *
     * @param   str   any string.
     * @return  if the string argument occurs as a substring within this
     *          object, then the index of the first character of the first
     *          such substring is returned; if it does not occur as a
     *          substring, -1 is returned.
     */
	public final int indexOf(CharSequence str) {
		return toString().indexOf(str.toString());
	}

    /**
     * Returns the index within this string of the first occurrence of the
     * specified substring, starting at the specified index.  The integer
     * returned is the smallest value k for which:
     *      *     k >= Math.min(fromIndex, this.length()) && this.startsWith(str, k)
     * 
     * If no such value of k exists, then -1 is returned.
     *
     * @param   str         the substring for which to search.
     * @param   fromIndex   the index from which to start the search.
     * @return  the index within this string of the first occurrence of the
     *          specified substring, starting at the specified index.
     */
	public final int indexOf(CharSequence str, int fromIndex) {
		return toString().indexOf(str.toString(), fromIndex);
	}

    /**
     * Returns the index within this string of the rightmost occurrence
     * of the specified substring.  The rightmost empty string "" is
     * considered to occur at the index value this.length().
     * The returned index is the largest value k such that
     *      * this.startsWith(str, k)
     * 
     * is true.
     *
     * @param   str   the substring to search for.
     * @return  if the string argument occurs one or more times as a substring
     *          within this object, then the index of the first character of
     *          the last such substring is returned. If it does not occur as
     *          a substring, -1 is returned.
     */
	public final int lastIndexOf(CharSequence str) {
		return toString().lastIndexOf(str.toString());
	}

    /**
     * Returns the index within this string of the last occurrence of the
     * specified substring, searching backward starting at the specified index.
     * The integer returned is the largest value k such that:
     *      *     k <= Math.min(fromIndex, this.length()) && this.startsWith(str, k)
     * 
     * If no such value of k exists, then -1 is returned.
     *
     * @param   str         the substring to search for.
     * @param   fromIndex   the index to start the search from.
     * @return  the index within this string of the last occurrence of the
     *          specified substring.
     */
	public final int lastIndexOf(CharSequence str, int fromIndex) {
		return toString().lastIndexOf(str.toString(), fromIndex);
	}

    /**
     * Tells whether or not this string matches the given regular expression.
     *
     *  An invocation of this method of the form
     * str.matches(regex) yields exactly the
     * same result as the expression
     *
     * 
 {@link java.util.regex.Pattern}.{@link
     * java.util.regex.Pattern#matches(String,CharSequence)
     * matches}(regex, str)
     *
     * @param   regex
     *          the regular expression to which this string is to be matched
     *
     * @return  true if, and only if, this string matches the
     *          given regular expression
     *
     * @throws  PatternSyntaxException
     *          if the regular expression's syntax is invalid
     *
     * @see java.util.regex.Pattern
     */
	public final boolean matches(String regex) {
		return toString().matches(regex);
	}

    /**
     * Returns true if and only if this string contains the specified
     * sequence of char values.
     *
     * @param s the sequence to search for
     * @return true if this string contains s, false otherwise
     * @throws NullPointerException if s is null
     */
	public final boolean contains(CharSequence s) {
		return toString().contains(s);
	}
}
Regex	Limit	Result
:	2	`{ "boo", "and:foo" }`
:	5	`{ "boo", "and", "foo" }`
:	-2	`{ "boo", "and", "foo" }`
o	5	`{ "b", "", ":and:f", "", "" }`
o	-2	`{ "b", "", ":and:f", "", "" }`
o	0	`{ "b", "", ":and:f" }`
Language Code of Locale	Upper Case	Lower Case	Description
tr (Turkish)	\u0130	\u0069	capital letter I with dot above -> small letter i
tr (Turkish)	\u0049	\u0131	capital letter I -> small letter dotless i
(all)	French Fries	french fries	lowercased all chars in String
(all)			lowercased all chars in String