
humanize.ICUHumanize Maven / Gradle / Ivy
Show all versions of humanize-icu Show documentation
/*
_ _ _ _ __ __ ___ _ _ ___ __________
| | | | | | | \/ | _ | \ | |_ _|__ / ____|
| |_| | | | | |\/| | |_| | \| || | / /| _|
| _ | |_| | | | | _ | |\ || | / /_| |___
|_| |_|\___/|_| |_|_| |_|_| \_|___/____|_____|
Copyright 2013 mfornos
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package humanize;
import static humanize.util.Constants.EMPTY;
import humanize.icu.spi.MessageFormat;
import humanize.icu.spi.context.DefaultICUContext;
import humanize.icu.spi.context.ICUContextFactory;
import humanize.spi.context.ContextFactory;
import humanize.text.util.InterpolationHelper;
import humanize.text.util.Replacer;
import java.text.ParseException;
import java.util.Date;
import java.util.Locale;
import java.util.ServiceLoader;
import java.util.concurrent.Callable;
import com.google.common.collect.ObjectArrays;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.CompactDecimalFormat.CompactStyle;
import com.ibm.icu.text.DateFormat;
import com.ibm.icu.text.DecimalFormat;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.RuleBasedNumberFormat;
import com.ibm.icu.text.SimpleDateFormat;
import com.ibm.icu.text.Transliterator;
/**
*
* Facility for adding a "human touch" to data. It is thread-safe and supports
* per-thread internationalization. Additionally provides a concise facade for
* access to the International Components for
* Unicode (ICU) Java APIs.
*
*
*/
public final class ICUHumanize
{
private static final ContextFactory contextFactory = loadContextFactory();
private static final ThreadLocal context = new ThreadLocal()
{
protected DefaultICUContext initialValue()
{
return (DefaultICUContext) contextFactory.createContext();
};
};
/**
*
* Same as {@link #compactDecimal(Number, CompactStyle) compactDecimal} but
* defaults to SHORT compact style.
*
*
* @param value
* The number to be abbreviated
* @return a compact textual representation of the given value
*/
public static String compactDecimal(final Number value)
{
NumberFormat fmt = context.get().getCompactDecimalFormat();
return fmt.format(value);
}
/**
* Produces abbreviated numbers. For example, '1.2B' instead of
* '1,200,000,000'. The format will be appropriate for the given language,
* such as '2,4 Millionen' for German.
*
* @param value
* The number to be abbreviated
* @param style
* The compaction style
* @return a compact textual representation of the given value
*/
public static String compactDecimal(final Number value, final CompactStyle style)
{
NumberFormat fmt = context.get().getCompactDecimalFormat(style);
return fmt.format(value);
}
/**
*
* Same as {@link #compactDecimal(Number, CompactStyle) compactDecimal} for
* the specified locale.
*
*
* @param value
* The number to be abbreviated
* @param style
* The compaction style
* @param locale
* The locale
* @return a compact textual representation of the given value
*/
public static String compactDecimal(final Number value, final CompactStyle style, final Locale locale)
{
return withinLocale(new Callable()
{
public String call() throws Exception
{
return compactDecimal(value, style);
}
}, locale);
}
/**
*
* Same as {@link #compactDecimal(Number) compactDecimal} for the specified
* locale.
*
*
* @param value
* The number to be abbreviated
* @param locale
* The locale
* @return a compact textual representation of the given value
*/
public static String compactDecimal(final Number value, final Locale locale)
{
return compactDecimal(value, CompactStyle.SHORT, locale);
}
/**
*
* Returns an ICU based DateFormat instance for the current thread.
*
*
* Date/Time format syntax:
*
*
* The date/time format is specified by means of a string time pattern. In
* this pattern, all ASCII letters are reserved as pattern letters, which
* are defined as the following:
*
*
*
*
* Symbol Meaning Presentation Example
* ------ ------- ------------ -------
* G era designator (Text) AD
* y year (Number) 1996
* Y year (week of year) (Number) 1997
* u extended year (Number) 4601
* U cyclic year name (Text,NumFallback) ren-chen (29)
* Q Quarter (Text & Number) Q2 & 02
* M month in year (Text & Number) July & 07
* d day in month (Number) 10
* h hour in am/pm (1~12) (Number) 12
* H hour in day (0~23) (Number) 0
* m minute in hour (Number) 30
* s second in minute (Number) 55
* S fractional second (Number) 978
* E day of week (Text) Tuesday
* e day of week (local 1~7) (Text & Number) Tues & 2
* D day in year (Number) 189
* F day of week in month (Number) 2 (2nd Wed in July)
* w week in year (Number) 27
* W week in month (Number) 2
* a am/pm marker (Text) PM
* k hour in day (1~24) (Number) 24
* K hour in am/pm (0~11) (Number) 0
* z time zone (Text) PST
* zzzz time zone (Text) Pacific Standard Time
* Z time zone (RFC 822) (Number) -0800
* ZZZZ time zone (RFC 822) (Text & Number) GMT-08:00
* ZZZZZ time zone (ISO 8601) (Text & Number) -08:00 & Z
* v time zone (generic) (Text) PT
* vvvv time zone (generic) (Text) Pacific Time
* V time zone (abreviation) (Text) PST
* VVVV time zone (location) (Text) United States Time (Los Angeles)
* g Julian day (Number) 2451334
* A milliseconds in day (Number) 69540000
* q stand alone quarter (Text & Number) Q2 & 02
* L stand alone month (Text & Number) July & 07
* c stand alone day of week (Text & Number) Tuesday & 2
* ' escape for text (Delimiter) 'Date='
* '' single quote (Literal) 'o''clock'
*
*
* The count of pattern letters determine the format.
*
*
* (Text): 4 or more, use full form, <4, use short or abbreviated form if
* it exists. (e.g., "EEEE" produces "Monday", "EEE" produces "Mon")
*
*
* (Number): the minimum number of digits. Shorter numbers are zero-padded
* to this amount (e.g. if "m" produces "6", "mm" produces "06"). Year is
* handled specially; that is, if the count of 'y' is 2, the Year will be
* truncated to 2 digits. (e.g., if "yyyy" produces "1997", "yy" produces
* "97".) Unlike other fields, fractional seconds are padded on the right
* with zero.
*
*
* (Text & Number): 3 or over, use text, otherwise use number. (e.g.,
* "M" produces "1", "MM" produces "01", "MMM" produces "Jan", and "MMMM"
* produces "January".)
*
*
* (Text,NumFallback): Behaves like Text if there is supporting data, like
* Number otherwise.
*
*
* Any characters in the pattern that are not in the ranges of ['a'..'z']
* and ['A'..'Z'] will be treated as quoted text. For instance, characters
* like ':', '.', ' ', '#' and '@' will appear in the resulting time text
* even they are not embraced within single quotes.
*
*
* A pattern containing any invalid pattern letter will result in a failing
* UErrorCode result during formatting or parsing.
*
*
* Examples using the US locale:
*
*
*
* Format Pattern Result
* -------------- -------
* "yyyy.MM.dd G 'at' HH:mm:ss vvvv" ->> 1996.07.10 AD at 15:08:56 Pacific Time
* "EEE, MMM d, ''yy" ->> Wed, July 10, '96
* "h:mm a" ->> 12:08 PM
* "hh 'o''clock' a, zzzz" ->> 12 o'clock PM, Pacific Daylight Time
* "K:mm a, vvv" ->> 0:00 PM, PT
* "yyyyy.MMMMM.dd GGG hh:mm aaa" ->> 1996.July.10 AD 12:08 PM
*
*
* @param pattern
* Format pattern that follows the conventions of
* {@link com.ibm.icu.text.DateFormat DateFormat}
* @return a DateFormat instance for the current thread
*/
public static DateFormat dateFormatInstance(final String pattern)
{
return DateFormat.getPatternInstance(pattern, context.get().getLocale());
}
/**
*
* Same as {@link #dateFormatInstance(String) dateFormatInstance} for the
* specified locale.
*
*
* @param pattern
* Format pattern that follows the conventions of
* {@link com.ibm.icu.text.DateFormat DateFormat}
* @param locale
* Target locale
* @return a DateFormat instance for the current thread
*/
public static DateFormat dateFormatInstance(final String pattern, final Locale locale)
{
return withinLocale(new Callable()
{
public DateFormat call() throws Exception
{
return dateFormatInstance(pattern);
}
}, locale);
}
/**
*
* Returns an ICU based DecimalFormat instance for the current thread. It
* has a variety of features designed to make it possible to parse and
* format numbers in any locale, including support for Western, Arabic, or
* Indic digits. It also supports different flavors of numbers, including
* integers ("123"), fixed-point numbers ("123.4"), scientific notation
* ("1.23E4"), percentages ("12%"), and currency amounts ("$123.00",
* "USD123.00", "123.00 US dollars"). All of these flavors can be easily
* localized.
*
*
* Patterns
*
*
* A DecimalFormat
consists of a pattern and a set of
* symbols. The pattern may be set directly using #applyPattern ,
* or indirectly using other API methods which manipulate aspects of the
* pattern, such as the minimum number of integer digits. The symbols are
* stored in a DecimalFormatSymbols object. When using the NumberFormat
* factory methods, the pattern and symbols are read from ICU's locale data.
*
*
Special Pattern Characters
*
*
* Many characters in a pattern are taken literally; they are matched during
* parsing and output unchanged during formatting. Special characters, on
* the other hand, stand for other characters, strings, or classes of
* characters. For example, the '#' character is replaced by a localized
* digit. Often the replacement character is the same as the pattern
* character; in the U.S. locale, the ',' grouping character is replaced by
* ','. However, the replacement is still happening, and if the symbols are
* modified, the grouping character changes. Some special characters affect
* the behavior of the formatter by their presence; for example, if the
* percent character is seen, then the value is multiplied by 100 before
* being displayed.
*
*
* To insert a special character in a pattern as a literal, that is, without
* any special meaning, the character must be quoted. There are some
* exceptions to this which are noted below.
*
*
* The characters listed here are used in non-localized patterns. Localized
* patterns use the corresponding characters taken from this formatter's
* DecimalFormatSymbols object instead, and these characters lose their
* special status. Two exceptions are the currency sign and quote, which are
* not localized.
*
*
*
* Symbol
* Location
* Localized?
* Meaning
*
* 0
* Number
* Yes
* Digit
*
* 1-9
* Number
* Yes
* '1' through '9' indicate rounding.
*
*
* @
* Number
* No
* Significant digit
*
* #
* Number
* Yes
* Digit, zero shows as absent
*
* .
* Number
* Yes
* Decimal separator or monetary decimal separator
*
* -
* Number
* Yes
* Minus sign
*
* ,
* Number
* Yes
* Grouping separator
*
* E
* Number
* Yes
* Separates mantissa and exponent in scientific notation.
* Need not be quoted in prefix or suffix.
*
* +
* Exponent
* Yes
* Prefix positive exponents with localized plus sign.
* Need not be quoted in prefix or suffix.
*
* ;
* Subpattern boundary
* Yes
* Separates positive and negative subpatterns
*
* %
* Prefix or suffix
* Yes
* Multiply by 100 and show as percentage
*
* \u2030
* Prefix or suffix
* Yes
* Multiply by 1000 and show as per mille
*
* ¤
(\u00A4
)
* Prefix or suffix
* No
* Currency sign, replaced by currency symbol. If doubled, replaced by
* international currency symbol. If tripled, replaced by currency plural
* names, for example, "US dollar" or "US dollars" for America. If present
* in a pattern, the monetary decimal separator is used instead of the
* decimal separator.
*
* '
* Prefix or suffix
* No
* Used to quote special characters in a prefix or suffix, for example,
* "'#'#"
formats 123 to "#123"
. To create a
* single quote itself, use two in a row: "# o''clock"
.
*
* *
* Prefix or suffix boundary
* Yes
* Pad escape, precedes pad character
*
*
* A DecimalFormat
pattern contains a postive and negative
* subpattern, for example, "#,##0.00;(#,##0.00)". Each subpattern has a
* prefix, a numeric part, and a suffix. If there is no explicit negative
* subpattern, the negative subpattern is the localized minus sign prefixed
* to the positive subpattern. That is, "0.00" alone is equivalent to
* "0.00;-0.00". If there is an explicit negative subpattern, it serves only
* to specify the negative prefix and suffix; the number of digits, minimal
* digits, and other characteristics are ignored in the negative subpattern.
* That means that "#,##0.0#;(#)" has precisely the same result as
* "#,##0.0#;(#,##0.0#)".
*
*
* The prefixes, suffixes, and various symbols used for infinity, digits,
* thousands separators, decimal separators, etc. may be set to arbitrary
* values, and they will appear properly during formatting. However, care
* must be taken that the symbols and strings do not conflict, or parsing
* will be unreliable. For example, either the positive and negative
* prefixes or the suffixes must be distinct for #parse to be able to
* distinguish positive from negative values. Another example is that the
* decimal separator and thousands separator should be distinct characters,
* or parsing will be impossible.
*
*
* The grouping separator is a character that separates clusters of
* integer digits to make large numbers more legible. It commonly used for
* thousands, but in some locales it separates ten-thousands. The
* grouping size is the number of digits between the grouping
* separators, such as 3 for "100,000,000" or 4 for "1 0000 0000". There are
* actually two different grouping sizes: One used for the least significant
* integer digits, the primary grouping size, and one used for all
* others, the secondary grouping size. In most locales these are
* the same, but sometimes they are different. For example, if the primary
* grouping interval is 3, and the secondary is 2, then this corresponds to
* the pattern "#,##,##0", and the number 123456789 is formatted as
* "12,34,56,789". If a pattern contains multiple grouping separators, the
* interval between the last one and the end of the integer defines the
* primary grouping size, and the interval between the last two defines the
* secondary grouping size. All others are ignored, so "#,##,###,####" ==
* "###,###,####" == "##,#,###,####".
*
*
* Illegal patterns, such as "#.#.#" or "#.###,###", will cause
* DecimalFormat
to throw an IllegalArgumentException with a
* message that describes the problem.
*
*
Pattern BNF
*
*
* pattern := subpattern (';' subpattern)?
* subpattern := prefix? number exponent? suffix?
* number := (integer ('.' fraction)?) | sigDigits
* prefix := '\u0000'..'\uFFFD' - specialCharacters
* suffix := '\u0000'..'\uFFFD' - specialCharacters
* integer := '#'* '0'* '0'
* fraction := '0'* '#'*
* sigDigits := '#'* '@' '@'* '#'*
* exponent := 'E' '+'? '0'* '0'
* padSpec := '*' padChar
* padChar := '\u0000'..'\uFFFD' - quote
*
* Notation:
* X* 0 or more instances of X
* X? 0 or 1 instances of X
* X|Y either X or Y
* C..D any character from C up to D, inclusive
* S-T characters in S, except those in T
*
*
* The first subpattern is for positive numbers. The second (optional)
* subpattern is for negative numbers.
*
*
* Not indicated in the BNF syntax above:
*
*
* - The grouping separator ',' can occur inside the integer and sigDigits
* elements, between any two pattern characters of that element, as long as
* the integer or sigDigits element is not followed by the exponent element.
*
*
- Two grouping intervals are recognized: That between the decimal point
* and the first grouping symbol, and that between the first and second
* grouping symbols. These intervals are identical in most locales, but in
* some locales they differ. For example, the pattern "#,##,###"
* formats the number 123456789 as "12,34,56,789".
*
* -
* The pad specifier
padSpec
may appear before the prefix,
* after the prefix, before the suffix, after the suffix, or not at all.
*
* -
* In place of '0', the digits '1' through '9' may be used to indicate a
* rounding increment.
*
*
* @param pattern
* Format pattern that follows the conventions of
* {@link com.ibm.icu.text.DecimalFormat DecimalFormat}
* @return a DecimalFormat instance for the current thread
*/
public static DecimalFormat decimalFormatInstance(final String pattern)
{
DecimalFormat decFmt = context.get().getDecimalFormat();
decFmt.applyPattern(pattern);
return decFmt;
}
/**
*
* Same as {@link #decimalFormatInstance(String) decimalFormatInstance} for
* the specified locale.
*
*
* @param pattern
* Format pattern that follows the conventions of
* {@link com.ibm.icu.text.DecimalFormat DecimalFormat}
* @param locale
* Target locale
* @return a DecimalFormat instance for the current thread
*/
public static DecimalFormat decimalFormatInstance(final String pattern, final Locale locale)
{
return withinLocale(new Callable()
{
public DecimalFormat call() throws Exception
{
return decimalFormatInstance(pattern);
}
}, locale);
}
/**
*
* Formats a number of seconds as hours, minutes and seconds.
*
*
* @param value
* Number of seconds
* @return Number of seconds as hours, minutes and seconds
*/
public static String duration(final Number value)
{
// NOTE: does not support any other locale
return withinLocale(new Callable()
{
public String call() throws Exception
{
return context.get().getRuleBasedNumberFormat(RuleBasedNumberFormat.DURATION).format(value);
}
}, Locale.ENGLISH);
}
/**
*
* Gets the ICU based DecimalFormat instance for the current thread with the
* given pattern and uses it to format the given arguments.
*
*
* @param pattern
* Format pattern that follows the conventions of
* {@link com.ibm.icu.text.MessageFormat MessageFormat}
* @param args
* Arguments
* @return The formatted String
*/
public static String format(final String pattern, final Object... args)
{
return messageFormatInstance(pattern).render(args);
}
/**
*
* Smartly formats the given number as a monetary amount.
*
*
*
* For en_GB:
*
*
* Input
* Output
*
*
* 34
* "£34"
*
*
* 1000
* "£1,000"
*
*
* 12.5
* "£12.50"
*
*
*
*
* @param value
* Number to be formatted
* @return String representing the monetary amount
*/
public static String formatCurrency(final Number value)
{
DecimalFormat decf = context.get().getCurrencyFormat();
return stripZeros(decf, decf.format(value));
}
/**
*
* Same as {@link #formatCurrency(Number) formatCurrency} for the specified
* locale.
*
*
* @param value
* Number to be formatted
* @param locale
* Target locale
* @return String representing the monetary amount
*/
public static String formatCurrency(final Number value, final Locale locale)
{
return withinLocale(new Callable()
{
public String call()
{
return formatCurrency(value);
}
}, locale);
}
/**
*
* Same as {@link #formatDate(int, Date) formatDate} with SHORT style.
*
*
* @param value
* Date to be formatted
* @return String representation of the date
*/
public static String formatDate(final Date value)
{
return formatDate(DateFormat.SHORT, value);
}
/**
*
* Same as {@link #formatDate(Date) formatDate} for the specified locale.
*
*
* @param value
* Date to be formatted
* @param locale
* Target locale
* @return String representation of the date
*/
public static String formatDate(final Date value, final Locale locale)
{
return withinLocale(new Callable()
{
public String call() throws Exception
{
return formatDate(value);
}
}, locale);
}
/**
*
* Formats a date according to the given pattern.
*
*
* @param value
* Date to be formatted
* @param pattern
* The pattern. See {@link dateFormatInstance(String)}
* @return a formatted date/time string
*/
public static String formatDate(final Date value, final String pattern)
{
return new SimpleDateFormat(pattern, context.get().getLocale()).format(value);
}
/**
*
* Same as {@link #formatDate(Date, String) formatDate} for the specified
* locale.
*
*
* @param value
* Date to be formatted
* @param pattern
* The pattern. See {@link dateFormatInstance(String)}
* @param locale
* Target locale
* @return a formatted date/time string
*/
public static String formatDate(final Date value, final String pattern, final Locale locale)
{
return withinLocale(new Callable()
{
public String call() throws Exception
{
return formatDate(value, pattern);
}
}, locale);
}
/**
*
* Formats the given date with the specified style.
*
*
* @param style
* DateFormat style
* @param value
* Date to be formatted
* @return String representation of the date
*/
public static String formatDate(final int style, final Date value)
{
return context.get().formatDate(style, value);
}
/**
*
* Same as {@link #formatDate(int, Date) formatDate} for the specified
* locale.
*
*
* @param style
* DateFormat style
* @param value
* Date to be formatted
* @param locale
* Target locale
* @return String representation of the date
*/
public static String formatDate(final int style, final Date value, final Locale locale)
{
return withinLocale(new Callable()
{
public String call() throws Exception
{
return formatDate(style, value);
}
}, locale);
}
/**
*
* Formats the given date/time with SHORT style.
*
*
* @param value
* Date to be formatted
* @return String representation of the date
*/
public static String formatDateTime(final Date value)
{
return context.get().formatDateTime(value);
}
/**
*
* Same as {@link #formatDateTime(Date) formatDateTime} for the specified
* locale.
*
*
* @param value
* Date to be formatted
* @param locale
* Target locale
* @return String representation of the date
*/
public static String formatDateTime(final Date value, final Locale locale)
{
return withinLocale(new Callable()
{
public String call() throws Exception
{
return formatDateTime(value);
}
}, locale);
}
/**
*
* Formats the given date/time with the specified styles.
*
*
* @param dateStyle
* Date style
* @param timeStyle
* Time style
* @param value
* Date to be formatted
* @return String representation of the date
*/
public static String formatDateTime(final int dateStyle, final int timeStyle, final Date value)
{
return context.get().formatDateTime(dateStyle, timeStyle, value);
}
/**
*
* Same as {@link #formatDateTime(int, int, Date) formatDateTime} for the
* specified locale.
*
*
* @param dateStyle
* Date style
* @param timeStyle
* Time style
* @param value
* Date to be formatted
* @param locale
* Target locale
* @return String representation of the date
*/
public static String formatDateTime(final int dateStyle, final int timeStyle, final Date value, final Locale locale)
{
return withinLocale(new Callable()
{
public String call() throws Exception
{
return formatDateTime(dateStyle, timeStyle, value);
}
}, locale);
}
/**
*
* Formats the given number to the standard decimal format for the default
* locale.
*
*
* @param value
* Number to be formatted
* @return Standard localized format representation
*/
public static String formatDecimal(final Number value)
{
return context.get().formatDecimal(value);
}
/**
*
* Same as {@link #formatDecimal(Number) formatDecimal} for the specified
* locale.
*
*
* @param value
* Number to be formatted
* @param locale
* Target locale
* @return Standard localized format representation
*/
public static String formatDecimal(final Number value, final Locale locale)
{
return withinLocale(new Callable()
{
public String call()
{
return formatDecimal(value);
}
}, locale);
}
/**
*
* Formats the given ratio as a percentage.
*
*
*
*
* Input
* Output
*
*
* 0.5
* "50%"
*
*
* 1
* "100%"
*
*
* 0.564
* "56%"
*
*
*
* @param value
* Ratio to be converted
* @return String representing the percentage
*/
public static String fo