org.apache.fop.complexscripts.util.NumberConverter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.fop Show documentation
Show all versions of org.apache.fop Show documentation
The core maven build properties
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id$ */
package org.apache.fop.complexscripts.util;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
// CSOFF: LineLengthCheck
/**
* Implementation of Number to String Conversion algorithm specified by
* XSL Transformations (XSLT) Version 2.0, W3C Recommendation, 23 January 2007.
*
* This algorithm differs from that specified in XSLT 1.0 in the following
* ways:
*
* - input numbers are greater than or equal to zero rather than greater than zero;
* - introduces format tokens { w, W, Ww };
* - introduces ordinal parameter to generate ordinal numbers;
*
*
* Implementation Defaults and Limitations
*
* - If language parameter is unspecified (null or empty string), then the value
* of DEFAULT_LANGUAGE is used, which is defined below as "eng" (English).
* - Only English, French, and Spanish word numerals are supported, and only if less than one trillion (1,000,000,000,000).
* - Ordinal word numerals are supported for French and Spanish only when less than or equal to ten (10).
*
*
* Implementation Notes
*
* - In order to handle format tokens outside the Unicode BMP, all processing is
* done in Unicode Scalar Values represented with Integer and Integer[]
* types. Without affecting behavior, this may be subsequently optimized to
* use int and int[] types.
* - In order to communicate various sub-parameters, including ordinalization, a features
* is employed, which consists of comma separated name and optional value tokens, where name and value
* are separated by an equals '=' sign.
* - Ordinal numbers are selected by specifying a word based format token in combination with a 'ordinal' feature with no value, in which case
* the features 'male' and 'female' may be used to specify gender for gender sensitive languages. For example, the feature string "ordinal,female"
* selects female ordinals.
*
*
* This work was originally authored by Glenn Adams ([email protected]).
*/
public class NumberConverter {
/** alphabetical */
public static final int LETTER_VALUE_ALPHABETIC = 1;
/** traditional */
public static final int LETTER_VALUE_TRADITIONAL = 2;
/** no token type */
private static final int TOKEN_NONE = 0;
/** alhphanumeric token type */
private static final int TOKEN_ALPHANUMERIC = 1;
/** nonalphanumeric token type */
private static final int TOKEN_NONALPHANUMERIC = 2;
/** default token */
private static final Integer[] DEFAULT_TOKEN = new Integer[] { (int) '1' };
/** default separator */
private static final Integer[] DEFAULT_SEPARATOR = new Integer[] { (int) '.' };
/** default language */
private static final String DEFAULT_LANGUAGE = "eng";
/** prefix token */
private Integer[] prefix;
/** suffix token */
private Integer[] suffix;
/** sequence of tokens, as parsed from format */
private Integer[][] tokens;
/** sequence of separators, as parsed from format */
private Integer[][] separators;
/** grouping separator */
private int groupingSeparator;
/** grouping size */
private int groupingSize;
/** letter value */
private int letterValue;
/** letter value system */
private String features;
/** language */
private String language;
/** country */
private String country;
/**
* Construct parameterized number converter.
* @param format format for the page number (may be null or empty, which is treated as null)
* @param groupingSeparator grouping separator (if zero, then no grouping separator applies)
* @param groupingSize grouping size (if zero or negative, then no grouping size applies)
* @param letterValue letter value (must be one of the above letter value enumeration values)
* @param features features (feature sub-parameters)
* @param language (may be null or empty, which is treated as null)
* @param country (may be null or empty, which is treated as null)
* @throws IllegalArgumentException if format is not a valid UTF-16 string (e.g., has unpaired surrogate)
*/
public NumberConverter(String format, int groupingSeparator, int groupingSize, int letterValue, String features, String language, String country)
throws IllegalArgumentException {
this.groupingSeparator = groupingSeparator;
this.groupingSize = groupingSize;
this.letterValue = letterValue;
this.features = features;
this.language = (language != null) ? language.toLowerCase() : null;
this.country = (country != null) ? country.toLowerCase() : null;
parseFormatTokens(format);
}
/**
* Convert a number to string according to conversion parameters.
* @param number number to conver
* @return string representing converted number
*/
public String convert(long number) {
List numbers = new ArrayList();
numbers.add(number);
return convert(numbers);
}
/**
* Convert list of numbers to string according to conversion parameters.
* @param numbers list of numbers to convert
* @return string representing converted list of numbers
*/
public String convert(List numbers) {
List scalars = new ArrayList();
if (prefix != null) {
appendScalars(scalars, prefix);
}
convertNumbers(scalars, numbers);
if (suffix != null) {
appendScalars(scalars, suffix);
}
return scalarsToString(scalars);
}
private void parseFormatTokens(String format) throws IllegalArgumentException {
List tokens = new ArrayList();
List separators = new ArrayList();
if ((format == null) || (format.length() == 0)) {
format = "1";
}
int tokenType = TOKEN_NONE;
List token = new ArrayList();
Integer[] ca = UTF32.toUTF32(format, 0, true);
for (Integer c : ca) {
int tokenTypeNew = isAlphaNumeric(c) ? TOKEN_ALPHANUMERIC : TOKEN_NONALPHANUMERIC;
if (tokenTypeNew != tokenType) {
if (token.size() > 0) {
if (tokenType == TOKEN_ALPHANUMERIC) {
tokens.add(token.toArray(new Integer[token.size()]));
} else {
separators.add(token.toArray(new Integer[token.size()]));
}
token.clear();
}
tokenType = tokenTypeNew;
}
token.add(c);
}
if (token.size() > 0) {
if (tokenType == TOKEN_ALPHANUMERIC) {
tokens.add(token.toArray(new Integer [ token.size() ]));
} else {
separators.add(token.toArray(new Integer [ token.size() ]));
}
}
if (!separators.isEmpty()) {
this.prefix = separators.remove(0);
}
if (!separators.isEmpty()) {
this.suffix = separators.remove(separators.size() - 1);
}
this.separators = separators.toArray(new Integer [ separators.size() ] []);
this.tokens = tokens.toArray(new Integer [ tokens.size() ] []);
}
private static boolean isAlphaNumeric(int c) {
switch (Character.getType(c)) {
case Character.DECIMAL_DIGIT_NUMBER: // Nd
case Character.LETTER_NUMBER: // Nl
case Character.OTHER_NUMBER: // No
case Character.UPPERCASE_LETTER: // Lu
case Character.LOWERCASE_LETTER: // Ll
case Character.TITLECASE_LETTER: // Lt
case Character.MODIFIER_LETTER: // Lm
case Character.OTHER_LETTER: // Lo
return true;
default:
return false;
}
}
private void convertNumbers(List scalars, List numbers) {
Integer[] tknLast = DEFAULT_TOKEN;
int tknIndex = 0;
int tknCount = tokens.length;
int sepIndex = 0;
int sepCount = separators.length;
int numIndex = 0;
for (Long number : numbers) {
Integer[] sep = null;
Integer[] tkn;
if (tknIndex < tknCount) {
if (numIndex > 0) {
if (sepIndex < sepCount) {
sep = separators [ sepIndex++ ];
} else {
sep = DEFAULT_SEPARATOR;
}
}
tkn = tokens [ tknIndex++ ];
} else {
tkn = tknLast;
}
appendScalars(scalars, convertNumber(number, sep, tkn));
tknLast = tkn;
numIndex++;
}
}
private Integer[] convertNumber(long number, Integer[] separator, Integer[] token) {
List sl = new ArrayList();
if (separator != null) {
appendScalars(sl, separator);
}
if (token != null) {
appendScalars(sl, formatNumber(number, token));
}
return sl.toArray(new Integer [ sl.size() ]);
}
private Integer[] formatNumber(long number, Integer[] token) {
Integer[] fn = null;
assert token.length > 0;
if (number < 0) {
throw new IllegalArgumentException("number must be non-negative");
} else if (token.length == 1) {
int s = token[0];
switch (s) {
case (int) '1':
fn = formatNumberAsDecimal(number, (int) '1', 1);
break;
case (int) 'W':
case (int) 'w':
fn = formatNumberAsWord(number, (s == (int) 'W') ? Character.UPPERCASE_LETTER : Character.LOWERCASE_LETTER);
break;
case (int) 'A': // handled as numeric sequence
case (int) 'a': // handled as numeric sequence
case (int) 'I': // handled as numeric special
case (int) 'i': // handled as numeric special
default:
if (isStartOfDecimalSequence(s)) {
fn = formatNumberAsDecimal(number, s, 1);
} else if (isStartOfAlphabeticSequence(s)) {
fn = formatNumberAsSequence(number, s, getSequenceBase(s), null);
} else if (isStartOfNumericSpecial(s)) {
fn = formatNumberAsSpecial(number, s);
} else {
fn = null;
}
break;
}
} else if ((token.length == 2) && (token[0] == (int) 'W') && (token[1] == (int) 'w')) {
fn = formatNumberAsWord(number, Character.TITLECASE_LETTER);
} else if (isPaddedOne(token)) {
int s = token[token.length - 1];
fn = formatNumberAsDecimal(number, s, token.length);
} else {
throw new IllegalArgumentException("invalid format token: \"" + UTF32.fromUTF32(token) + "\"");
}
if (fn == null) {
fn = formatNumber(number, DEFAULT_TOKEN);
}
assert fn != null;
return fn;
}
/**
* Format NUMBER as decimal using characters denoting digits that start at ONE,
* adding one or more (zero) padding characters as needed to fill out field WIDTH.
* @param number to be formatted
* @param one unicode scalar value denoting numeric value 1
* @param width non-negative integer denoting field width of number, possible including padding
* @return formatted number as array of unicode scalars
*/
private Integer[] formatNumberAsDecimal(long number, int one, int width) {
assert Character.getNumericValue(one) == 1;
assert Character.getNumericValue(one - 1) == 0;
assert Character.getNumericValue(one + 8) == 9;
List sl = new ArrayList();
int zero = one - 1;
while (number > 0) {
long digit = number % 10;
sl.add(0, zero + (int) digit);
number = number / 10;
}
while (width > sl.size()) {
sl.add(0, zero);
}
if ((groupingSize != 0) && (groupingSeparator != 0)) {
sl = performGrouping(sl, groupingSize, groupingSeparator);
}
return sl.toArray(new Integer [ sl.size() ]);
}
private static List performGrouping(List sl, int groupingSize, int groupingSeparator) {
assert groupingSize > 0;
assert groupingSeparator != 0;
if (sl.size() > groupingSize) {
List gl = new ArrayList();
for (int i = 0, n = sl.size(), g = 0; i < n; i++) {
int k = n - i - 1;
if (g == groupingSize) {
gl.add(0, groupingSeparator);
g = 1;
} else {
g++;
}
gl.add(0, sl.get(k));
}
return gl;
} else {
return sl;
}
}
/**
* Format NUMBER as using sequence of characters that start at ONE, and
* having BASE radix.
* @param number to be formatted
* @param one unicode scalar value denoting start of sequence (numeric value 1)
* @param base number of elements in sequence
* @param map if non-null, then maps sequences indices to unicode scalars
* @return formatted number as array of unicode scalars
*/
private Integer[] formatNumberAsSequence(long number, int one, int base, int[] map) {
assert base > 1;
assert (map == null) || (map.length >= base);
List sl = new ArrayList();
if (number == 0) {
return null;
} else {
long n = number;
while (n > 0) {
int d = (int) ((n - 1) % (long) base);
int s = (map != null) ? map [ d ] : (one + d);
sl.add(0, s);
n = (n - 1) / base;
}
return sl.toArray(new Integer [ sl.size() ]);
}
}
/**
* Format NUMBER as using special system that starts at ONE.
* @param number to be formatted
* @param one unicode scalar value denoting start of system (numeric value 1)
* @return formatted number as array of unicode scalars
*/
private Integer[] formatNumberAsSpecial(long number, int one) {
SpecialNumberFormatter f = getSpecialFormatter(one, letterValue, features, language, country);
if (f != null) {
return f.format(number, one, letterValue, features, language, country);
} else {
return null;
}
}
/**
* Format NUMBER as word according to TYPE, which must be either
* Character.UPPERCASE_LETTER, Character.LOWERCASE_LETTER, or
* Character.TITLECASE_LETTER. Makes use of this.language to
* determine language of word.
* @param number to be formatted
* @param caseType unicode character type for case conversion
* @return formatted number as array of unicode scalars
*/
private Integer[] formatNumberAsWord(long number, int caseType) {
SpecialNumberFormatter f = null;
if (isLanguage("eng")) {
f = new EnglishNumberAsWordFormatter(caseType);
} else if (isLanguage("spa")) {
f = new SpanishNumberAsWordFormatter(caseType);
} else if (isLanguage("fra")) {
f = new FrenchNumberAsWordFormatter(caseType);
} else {
f = new EnglishNumberAsWordFormatter(caseType);
}
return f.format(number, 0, letterValue, features, language, country);
}
private boolean isLanguage(String iso3Code) {
if (language == null) {
return false;
} else if (language.equals(iso3Code)) {
return true;
} else {
return isSameLanguage(iso3Code, language);
}
}
private static String[][] equivalentLanguages = {
{ "eng", "en" },
{ "fra", "fre", "fr" },
{ "spa", "es" },
};
private static boolean isSameLanguage(String i3c, String lc) {
for (String[] el : equivalentLanguages) {
assert el.length >= 2;
if (el[0].equals(i3c)) {
for (String anEl : el) {
if (anEl.equals(lc)) {
return true;
}
}
return false;
}
}
return false;
}
private static boolean hasFeature(String features, String feature) {
if (features != null) {
assert feature != null;
assert feature.length() != 0;
String[] fa = features.split(",");
for (String f : fa) {
String[] fp = f.split("=");
assert fp.length > 0;
String fn = fp[0];
String fv = (fp.length > 1) ? fp[1] : "";
if (fn.equals(feature)) {
return true;
}
}
}
return false;
}
/* not yet used
private static String getFeatureValue ( String features, String feature ) {
if ( features != null ) {
assert feature != null;
assert feature.length() != 0;
String[] fa = features.split(",");
for ( String f : fa ) {
String[] fp = f.split("=");
assert fp.length > 0;
String fn = fp[0];
String fv = ( fp.length > 1 ) ? fp[1] : "";
if ( fn.equals ( feature ) ) {
return fv;
}
}
}
return "";
}
*/
private static void appendScalars(List scalars, Integer[] sa) {
Collections.addAll(scalars, sa);
}
private static String scalarsToString(List scalars) {
Integer[] sa = scalars.toArray(new Integer [ scalars.size() ]);
return UTF32.fromUTF32(sa);
}
private static boolean isPaddedOne(Integer[] token) {
if (getDecimalValue(token [ token.length - 1 ]) != 1) {
return false;
} else {
for (int i = 0, n = token.length - 1; i < n; i++) {
if (getDecimalValue(token [ i ]) != 0) {
return false;
}
}
return true;
}
}
private static int getDecimalValue(Integer scalar) {
int s = scalar;
if (Character.getType(s) == Character.DECIMAL_DIGIT_NUMBER) {
return Character.getNumericValue(s);
} else {
return -1;
}
}
private static boolean isStartOfDecimalSequence(int s) {
return (Character.getNumericValue(s) == 1)
&& (Character.getNumericValue(s - 1) == 0)
&& (Character.getNumericValue(s + 8) == 9);
}
private static int[][] supportedAlphabeticSequences = {
{ 'A', 26 }, // A...Z
{ 'a', 26 }, // a...z
};
private static boolean isStartOfAlphabeticSequence(int s) {
for (int[] ss : supportedAlphabeticSequences) {
assert ss.length >= 2;
if (ss[0] == s) {
return true;
}
}
return false;
}
private static int getSequenceBase(int s) {
for (int[] ss : supportedAlphabeticSequences) {
assert ss.length >= 2;
if (ss[0] == s) {
return ss[1];
}
}
return 0;
}
private static int[][] supportedSpecials = {
{ 'I' }, // latin - uppercase roman numerals
{ 'i' }, // latin - lowercase roman numerals
{ '\u0391' }, // greek - uppercase isopsephry numerals
{ '\u03B1' }, // greek - lowercase isopsephry numerals
{ '\u05D0' }, // hebrew - gematria numerals
{ '\u0623' }, // arabic - abjadi numberals
{ '\u0627' }, // arabic - either abjadi or hijai alphabetic sequence
{ '\u0E01' }, // thai - default alphabetic sequence
{ '\u3042' }, // kana - hiragana (gojuon) - default alphabetic sequence
{ '\u3044' }, // kana - hiragana (iroha)
{ '\u30A2' }, // kana - katakana (gojuon) - default alphabetic sequence
{ '\u30A4' }, // kana - katakana (iroha)
};
private static boolean isStartOfNumericSpecial(int s) {
for (int[] ss : supportedSpecials) {
assert ss.length >= 1;
if (ss[0] == s) {
return true;
}
}
return false;
}
private SpecialNumberFormatter getSpecialFormatter(int one, int letterValue, String features, String language, String country) {
if (one == (int) 'I') {
return new RomanNumeralsFormatter();
} else if (one == (int) 'i') {
return new RomanNumeralsFormatter();
} else if (one == (int) '\u0391') {
return new IsopsephryNumeralsFormatter();
} else if (one == (int) '\u03B1') {
return new IsopsephryNumeralsFormatter();
} else if (one == (int) '\u05D0') {
return new GematriaNumeralsFormatter();
} else if (one == (int) '\u0623') {
return new ArabicNumeralsFormatter();
} else if (one == (int) '\u0627') {
return new ArabicNumeralsFormatter();
} else if (one == (int) '\u0E01') {
return new ThaiNumeralsFormatter();
} else if (one == (int) '\u3042') {
return new KanaNumeralsFormatter();
} else if (one == (int) '\u3044') {
return new KanaNumeralsFormatter();
} else if (one == (int) '\u30A2') {
return new KanaNumeralsFormatter();
} else if (one == (int) '\u30A4') {
return new KanaNumeralsFormatter();
} else {
return null;
}
}
private static Integer[] toUpperCase(Integer[] sa) {
assert sa != null;
for (int i = 0, n = sa.length; i < n; i++) {
Integer s = sa [ i ];
sa [ i ] = Character.toUpperCase(s);
}
return sa;
}
private static Integer[] toLowerCase(Integer[] sa) {
assert sa != null;
for (int i = 0, n = sa.length; i < n; i++) {
Integer s = sa [ i ];
sa [ i ] = Character.toLowerCase(s);
}
return sa;
}
/* not yet used
private static Integer[] toTitleCase ( Integer[] sa ) {
assert sa != null;
if ( sa.length > 0 ) {
sa [ 0 ] = Character.toTitleCase ( sa [ 0 ] );
}
return sa;
}
*/
private static List convertWordCase(List words, int caseType) {
List wl = new ArrayList();
for (String w : words) {
wl.add(convertWordCase(w, caseType));
}
return wl;
}
private static String convertWordCase(String word, int caseType) {
if (caseType == Character.UPPERCASE_LETTER) {
return word.toUpperCase();
} else if (caseType == Character.LOWERCASE_LETTER) {
return word.toLowerCase();
} else if (caseType == Character.TITLECASE_LETTER) {
StringBuffer sb = new StringBuffer();
for (int i = 0, n = word.length(); i < n; i++) {
String s = word.substring(i, i + 1);
if (i == 0) {
sb.append(s.toUpperCase());
} else {
sb.append(s.toLowerCase());
}
}
return sb.toString();
} else {
return word;
}
}
private static String joinWords(List words, String separator) {
StringBuffer sb = new StringBuffer();
for (String w : words) {
if (sb.length() > 0) {
sb.append(separator);
}
sb.append(w);
}
return sb.toString();
}
/**
* Special number formatter.
*/
interface SpecialNumberFormatter {
/**
* Format number with special numeral system.
* @param number to be formatted
* @param one unicode scalar value denoting numeric value 1
* @param letterValue letter value (must be one of the above letter value enumeration values)
* @param features features (feature sub-parameters)
* @param language denotes applicable language
* @param country denotes applicable country
* @return formatted number as array of unicode scalars
*/
Integer[] format(long number, int one, int letterValue, String features, String language, String country);
}
/**
* English Word Numerals
*/
private static String[] englishWordOnes = { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
private static String[] englishWordTeens = { "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen" };
private static String[] englishWordTens = { "", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety" };
private static String[] englishWordOthers = { "hundred", "thousand", "million", "billion" };
private static String[] englishWordOnesOrd = { "none", "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth" };
private static String[] englishWordTeensOrd = { "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth", "sixteenth", "seventeenth", "eighteenth", "nineteenth" };
private static String[] englishWordTensOrd = { "", "tenth", "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth", "seventieth", "eightieth", "ninetith" };
private static String[] englishWordOthersOrd = { "hundredth", "thousandth", "millionth", "billionth" };
private static class EnglishNumberAsWordFormatter implements SpecialNumberFormatter {
private int caseType = Character.UPPERCASE_LETTER;
EnglishNumberAsWordFormatter(int caseType) {
this.caseType = caseType;
}
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
List wl = new ArrayList();
if (number >= 1000000000000L) {
return null;
} else {
boolean ordinal = hasFeature(features, "ordinal");
if (number == 0) {
wl.add(englishWordOnes [ 0 ]);
} else if (ordinal && (number < 10)) {
wl.add(englishWordOnesOrd [ (int) number ]);
} else {
int ones = (int) (number % 1000);
int thousands = (int) ((number / 1000) % 1000);
int millions = (int) ((number / 1000000) % 1000);
int billions = (int) ((number / 1000000000) % 1000);
if (billions > 0) {
wl = formatOnesInThousand(wl, billions);
if (ordinal && ((number % 1000000000) == 0)) {
wl.add(englishWordOthersOrd[3]);
} else {
wl.add(englishWordOthers[3]);
}
}
if (millions > 0) {
wl = formatOnesInThousand(wl, millions);
if (ordinal && ((number % 1000000) == 0)) {
wl.add(englishWordOthersOrd[2]);
} else {
wl.add(englishWordOthers[2]);
}
}
if (thousands > 0) {
wl = formatOnesInThousand(wl, thousands);
if (ordinal && ((number % 1000) == 0)) {
wl.add(englishWordOthersOrd[1]);
} else {
wl.add(englishWordOthers[1]);
}
}
if (ones > 0) {
wl = formatOnesInThousand(wl, ones, ordinal);
}
}
wl = convertWordCase(wl, caseType);
return UTF32.toUTF32(joinWords(wl, " "), 0, true);
}
}
private List formatOnesInThousand(List wl, int number) {
return formatOnesInThousand(wl, number, false);
}
private List formatOnesInThousand(List wl, int number, boolean ordinal) {
assert number < 1000;
int ones = number % 10;
int tens = (number / 10) % 10;
int hundreds = (number / 100) % 10;
if (hundreds > 0) {
wl.add(englishWordOnes [ hundreds ]);
if (ordinal && ((number % 100) == 0)) {
wl.add(englishWordOthersOrd[0]);
} else {
wl.add(englishWordOthers[0]);
}
}
if (tens > 0) {
if (tens == 1) {
if (ordinal) {
wl.add(englishWordTeensOrd [ ones ]);
} else {
wl.add(englishWordTeens [ ones ]);
}
} else {
if (ordinal && (ones == 0)) {
wl.add(englishWordTensOrd [ tens ]);
} else {
wl.add(englishWordTens [ tens ]);
}
if (ones > 0) {
if (ordinal) {
wl.add(englishWordOnesOrd [ ones ]);
} else {
wl.add(englishWordOnes [ ones ]);
}
}
}
} else if (ones > 0) {
if (ordinal) {
wl.add(englishWordOnesOrd [ ones ]);
} else {
wl.add(englishWordOnes [ ones ]);
}
}
return wl;
}
}
/**
* French Word Numerals
*/
private static String[] frenchWordOnes = { "z\u00e9ro", "un", "deux", "trois", "quatre", "cinq", "six", "sept", "huit", "neuf" };
private static String[] frenchWordTeens = { "dix", "onze", "douze", "treize", "quatorze", "quinze", "seize", "dix-sept", "dix-huit", "dix-neuf" };
private static String[] frenchWordTens = { "", "dix", "vingt", "trente", "quarante", "cinquante", "soixante", "soixante-dix", "quatre-vingt", "quatre-vingt-dix" };
private static String[] frenchWordOthers = { "cent", "cents", "mille", "million", "millions", "milliard", "milliards" };
private static String[] frenchWordOnesOrdMale = { "premier", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" };
private static String[] frenchWordOnesOrdFemale = { "premi\u00e8re", "deuxi\u00e8me", "troisi\u00e8me", "quatri\u00e8me", "cinqui\u00e8me", "sixi\u00e8me", "septi\u00e8me", "huiti\u00e8me", "neuvi\u00e8me", "dixi\u00e8me" };
private static class FrenchNumberAsWordFormatter implements SpecialNumberFormatter {
private int caseType = Character.UPPERCASE_LETTER;
FrenchNumberAsWordFormatter(int caseType) {
this.caseType = caseType;
}
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
List wl = new ArrayList();
if (number >= 1000000000000L) {
return null;
} else {
boolean ordinal = hasFeature(features, "ordinal");
if (number == 0) {
wl.add(frenchWordOnes [ 0 ]);
} else if (ordinal && (number <= 10)) {
boolean female = hasFeature(features, "female");
if (female) {
wl.add(frenchWordOnesOrdFemale [ (int) number ]);
} else {
wl.add(frenchWordOnesOrdMale [ (int) number ]);
}
} else {
int ones = (int) (number % 1000);
int thousands = (int) ((number / 1000) % 1000);
int millions = (int) ((number / 1000000) % 1000);
int billions = (int) ((number / 1000000000) % 1000);
if (billions > 0) {
wl = formatOnesInThousand(wl, billions);
if (billions == 1) {
wl.add(frenchWordOthers[5]);
} else {
wl.add(frenchWordOthers[6]);
}
}
if (millions > 0) {
wl = formatOnesInThousand(wl, millions);
if (millions == 1) {
wl.add(frenchWordOthers[3]);
} else {
wl.add(frenchWordOthers[4]);
}
}
if (thousands > 0) {
if (thousands > 1) {
wl = formatOnesInThousand(wl, thousands);
}
wl.add(frenchWordOthers[2]);
}
if (ones > 0) {
wl = formatOnesInThousand(wl, ones);
}
}
wl = convertWordCase(wl, caseType);
return UTF32.toUTF32(joinWords(wl, " "), 0, true);
}
}
private List formatOnesInThousand(List wl, int number) {
assert number < 1000;
int ones = number % 10;
int tens = (number / 10) % 10;
int hundreds = (number / 100) % 10;
if (hundreds > 0) {
if (hundreds > 1) {
wl.add(frenchWordOnes [ hundreds ]);
}
if ((hundreds > 1) && (tens == 0) && (ones == 0)) {
wl.add(frenchWordOthers[1]);
} else {
wl.add(frenchWordOthers[0]);
}
}
if (tens > 0) {
if (tens == 1) {
wl.add(frenchWordTeens [ ones ]);
} else if (tens < 7) {
if (ones == 1) {
wl.add(frenchWordTens [ tens ]);
wl.add("et");
wl.add(frenchWordOnes [ ones ]);
} else {
StringBuffer sb = new StringBuffer();
sb.append(frenchWordTens [ tens ]);
if (ones > 0) {
sb.append('-');
sb.append(frenchWordOnes [ ones ]);
}
wl.add(sb.toString());
}
} else if (tens == 7) {
if (ones == 1) {
wl.add(frenchWordTens [ 6 ]);
wl.add("et");
wl.add(frenchWordTeens [ ones ]);
} else {
StringBuffer sb = new StringBuffer();
sb.append(frenchWordTens [ 6 ]);
sb.append('-');
sb.append(frenchWordTeens [ ones ]);
wl.add(sb.toString());
}
} else if (tens == 8) {
StringBuffer sb = new StringBuffer();
sb.append(frenchWordTens [ tens ]);
if (ones > 0) {
sb.append('-');
sb.append(frenchWordOnes [ ones ]);
} else {
sb.append('s');
}
wl.add(sb.toString());
} else if (tens == 9) {
StringBuffer sb = new StringBuffer();
sb.append(frenchWordTens [ 8 ]);
sb.append('-');
sb.append(frenchWordTeens [ ones ]);
wl.add(sb.toString());
}
} else if (ones > 0) {
wl.add(frenchWordOnes [ ones ]);
}
return wl;
}
}
/**
* Spanish Word Numerals
*/
private static String[] spanishWordOnes = { "cero", "uno", "dos", "tres", "cuatro", "cinco", "seise", "siete", "ocho", "nueve" };
private static String[] spanishWordTeens = { "diez", "once", "doce", "trece", "catorce", "quince", "diecis\u00e9is", "diecisiete", "dieciocho", "diecinueve" };
private static String[] spanishWordTweens = { "veinte", "veintiuno", "veintid\u00f3s", "veintitr\u00e9s", "veinticuatro", "veinticinco", "veintis\u00e9is", "veintisiete", "veintiocho", "veintinueve" };
private static String[] spanishWordTens = { "", "diez", "veinte", "treinta", "cuarenta", "cincuenta", "sesenta", "setenta", "ochenta", "noventa" };
private static String[] spanishWordHundreds = { "", "ciento", "doscientos", "trescientos", "cuatrocientos", "quinientos", "seiscientos", "setecientos", "ochocientos", "novecientos" };
private static String[] spanishWordOthers = { "un", "cien", "mil", "mill\u00f3n", "millones" };
private static String[] spanishWordOnesOrdMale = { "ninguno", "primero", "segundo", "tercero", "cuarto", "quinto", "sexto", "s\u00e9ptimo", "octavo", "novento", "d\u00e9cimo" };
private static String[] spanishWordOnesOrdFemale = { "ninguna", "primera", "segunda", "tercera", "cuarta", "quinta", "sexta", "s\u00e9ptima", "octava", "noventa", "d\u00e9cima" };
private static class SpanishNumberAsWordFormatter implements SpecialNumberFormatter {
private int caseType = Character.UPPERCASE_LETTER;
SpanishNumberAsWordFormatter(int caseType) {
this.caseType = caseType;
}
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
List wl = new ArrayList();
if (number >= 1000000000000L) {
return null;
} else {
boolean ordinal = hasFeature(features, "ordinal");
if (number == 0) {
wl.add(spanishWordOnes [ 0 ]);
} else if (ordinal && (number <= 10)) {
boolean female = hasFeature(features, "female");
if (female) {
wl.add(spanishWordOnesOrdFemale [ (int) number ]);
} else {
wl.add(spanishWordOnesOrdMale [ (int) number ]);
}
} else {
int ones = (int) (number % 1000);
int thousands = (int) ((number / 1000) % 1000);
int millions = (int) ((number / 1000000) % 1000);
int billions = (int) ((number / 1000000000) % 1000);
if (billions > 0) {
if (billions > 1) {
wl = formatOnesInThousand(wl, billions);
}
wl.add(spanishWordOthers[2]);
wl.add(spanishWordOthers[4]);
}
if (millions > 0) {
if (millions == 1) {
wl.add(spanishWordOthers[0]);
} else {
wl = formatOnesInThousand(wl, millions);
}
if (millions > 1) {
wl.add(spanishWordOthers[4]);
} else {
wl.add(spanishWordOthers[3]);
}
}
if (thousands > 0) {
if (thousands > 1) {
wl = formatOnesInThousand(wl, thousands);
}
wl.add(spanishWordOthers[2]);
}
if (ones > 0) {
wl = formatOnesInThousand(wl, ones);
}
}
wl = convertWordCase(wl, caseType);
return UTF32.toUTF32(joinWords(wl, " "), 0, true);
}
}
private List formatOnesInThousand(List wl, int number) {
assert number < 1000;
int ones = number % 10;
int tens = (number / 10) % 10;
int hundreds = (number / 100) % 10;
if (hundreds > 0) {
if ((hundreds == 1) && (tens == 0) && (ones == 0)) {
wl.add(spanishWordOthers[1]);
} else {
wl.add(spanishWordHundreds [ hundreds ]);
}
}
if (tens > 0) {
if (tens == 1) {
wl.add(spanishWordTeens [ ones ]);
} else if (tens == 2) {
wl.add(spanishWordTweens [ ones ]);
} else {
wl.add(spanishWordTens [ tens ]);
if (ones > 0) {
wl.add("y");
wl.add(spanishWordOnes [ ones ]);
}
}
} else if (ones > 0) {
wl.add(spanishWordOnes [ ones ]);
}
return wl;
}
}
/**
* Roman (Latin) Numerals
*/
private static int[] romanMapping = {
100000,
90000,
50000,
40000,
10000,
9000,
5000,
4000,
1000,
900,
500,
400,
100,
90,
50,
40,
10,
9,
8,
7,
6,
5,
4,
3,
2,
1
};
private static String[] romanStandardForms = {
null,
null,
null,
null,
null,
null,
null,
null,
"m",
"cm",
"d",
"cd",
"c",
"xc",
"l",
"xl",
"x",
"ix",
null,
null,
null,
"v",
"iv",
null,
null,
"i"
};
private static String[] romanLargeForms = {
"\u2188",
"\u2182\u2188",
"\u2187",
"\u2182\u2187",
"\u2182",
"\u2180\u2182",
"\u2181",
"\u2180\u2181",
"m",
"cm",
"d",
"cd",
"c",
"xc",
"l",
"xl",
"x",
"ix",
null,
null,
null,
"v",
"iv",
null,
null,
"i"
};
private static String[] romanNumberForms = {
"\u2188",
"\u2182\u2188",
"\u2187",
"\u2182\u2187",
"\u2182",
"\u2180\u2182",
"\u2181",
"\u2180\u2181",
"\u216F",
"\u216D\u216F",
"\u216E",
"\u216D\u216E",
"\u216D",
"\u2169\u216D",
"\u216C",
"\u2169\u216C",
"\u2169",
"\u2168",
"\u2167",
"\u2166",
"\u2165",
"\u2164",
"\u2163",
"\u2162",
"\u2161",
"\u2160"
};
private static class RomanNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
List sl = new ArrayList();
if (number == 0) {
return null;
} else {
String[] forms;
int maxNumber;
if (hasFeature(features, "unicode-number-forms")) {
forms = romanNumberForms;
maxNumber = 199999;
} else if (hasFeature(features, "large")) {
forms = romanLargeForms;
maxNumber = 199999;
} else {
forms = romanStandardForms;
maxNumber = 4999;
}
if (number > maxNumber) {
return null;
} else {
while (number > 0) {
for (int i = 0, n = romanMapping.length; i < n; i++) {
int d = romanMapping [ i ];
if ((number >= d) && (forms [ i ] != null)) {
appendScalars(sl, UTF32.toUTF32(forms [ i ], 0, true));
number = number - d;
break;
}
}
}
if (one == (int) 'I') {
return toUpperCase(sl.toArray(new Integer [ sl.size() ]));
} else if (one == (int) 'i') {
return toLowerCase(sl.toArray(new Integer [ sl.size() ]));
} else {
return null;
}
}
}
}
}
/**
* Isopsephry (Greek) Numerals
*/
private static class IsopsephryNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
return null;
}
}
/**
* Gematria (Hebrew) Numerals
*/
private static int[] hebrewGematriaAlphabeticMap = {
// ones
0x05D0, // ALEF
0x05D1, // BET
0x05D2, // GIMEL
0x05D3, // DALET
0x05D4, // HE
0x05D5, // VAV
0x05D6, // ZAYIN
0x05D7, // HET
0x05D8, // TET
// tens
0x05D9, // YOD
0x05DB, // KAF
0x05DC, // LAMED
0x05DE, // MEM
0x05E0, // NUN
0x05E1, // SAMEKH
0x05E2, // AYIN
0x05E4, // PE
0x05E6, // TSADHI
// hundreds
0x05E7, // QOF
0x05E8, // RESH
0x05E9, // SHIN
0x05EA, // TAV
0x05DA, // FINAL KAF
0x05DD, // FINAL MEM
0x05DF, // FINAL NUN
0x05E3, // FINAL PE
0x05E5, // FINAL TSADHI
};
private class GematriaNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
if (one == 0x05D0) {
if (letterValue == LETTER_VALUE_ALPHABETIC) {
return formatNumberAsSequence(number, one, hebrewGematriaAlphabeticMap.length, hebrewGematriaAlphabeticMap);
} else if (letterValue == LETTER_VALUE_TRADITIONAL) {
if ((number == 0) || (number > 1999)) {
return null;
} else {
return formatAsGematriaNumber(number, features, language, country);
}
} else {
return null;
}
} else {
return null;
}
}
private Integer[] formatAsGematriaNumber(long number, String features, String language, String country) {
List sl = new ArrayList();
assert hebrewGematriaAlphabeticMap.length == 27;
assert hebrewGematriaAlphabeticMap[0] == 0x05D0; // ALEF
assert hebrewGematriaAlphabeticMap[21] == 0x05EA; // TAV
assert number != 0;
assert number < 2000;
int[] map = hebrewGematriaAlphabeticMap;
int thousands = (int) ((number / 1000) % 10);
int hundreds = (int) ((number / 100) % 10);
int tens = (int) ((number / 10) % 10);
int ones = (int) ((number / 1) % 10);
if (thousands > 0) {
sl.add(map [ 0 + (thousands - 1) ]);
sl.add(0x05F3);
}
if (hundreds > 0) {
if (hundreds < 5) {
sl.add(map [ 18 + (hundreds - 1) ]);
} else if (hundreds < 9) {
sl.add(map [ 18 + (4 - 1) ]);
sl.add(0x05F4);
sl.add(map [ 18 + (hundreds - 5) ]);
} else if (hundreds == 9) {
sl.add(map [ 18 + (4 - 1) ]);
sl.add(map [ 18 + (4 - 1) ]);
sl.add(0x05F4);
sl.add(map [ 18 + (hundreds - 9) ]);
}
assert hundreds < 10;
}
if (number == 15) {
sl.add(map [ 9 - 1]);
sl.add(0x05F4);
sl.add(map [ 6 - 1]);
} else if (number == 16) {
sl.add(map [ 9 - 1 ]);
sl.add(0x05F4);
sl.add(map [ 7 - 1 ]);
} else {
if (tens > 0) {
assert tens < 10;
sl.add(map [ 9 + (tens - 1) ]);
}
if (ones > 0) {
assert ones < 10;
sl.add(map [ 0 + (ones - 1) ]);
}
}
return sl.toArray(new Integer [ sl.size() ]);
}
}
/**
* Arabic Numerals
*/
private static int[] arabicAbjadiAlphabeticMap = {
// ones
0x0623, // ALEF WITH HAMZA ABOVE
0x0628, // BEH
0x062C, // JEEM
0x062F, // DAL
0x0647, // HEH
0x0648, // WAW
0x0632, // ZAIN
0x062D, // HAH
0x0637, // TAH
// tens
0x0649, // ALEF MAQSURA
0x0643, // KAF
0x0644, // LAM
0x0645, // MEEM
0x0646, // NOON
0x0633, // SEEN
0x0639, // AIN
0x0641, // FEH
0x0635, // SAD
// hundreds
0x0642, // QAF
0x0631, // REH
0x0634, // SHEEN
0x062A, // TEH
0x062B, // THEH
0x062E, // KHAH
0x0630, // THAL
0x0636, // DAD
0x0638, // ZAH
// thousands
0x063A, // GHAIN
};
private static int[] arabicHijaiAlphabeticMap = {
0x0623, // ALEF WITH HAMZA ABOVE
0x0628, // BEH
0x062A, // TEH
0x062B, // THEH
0x062C, // JEEM
0x062D, // HAH
0x062E, // KHAH
0x062F, // DAL
0x0630, // THAL
0x0631, // REH
0x0632, // ZAIN
0x0633, // SEEN
0x0634, // SHEEN
0x0635, // SAD
0x0636, // DAD
0x0637, // TAH
0x0638, // ZAH
0x0639, // AIN
0x063A, // GHAIN
0x0641, // FEH
0x0642, // QAF
0x0643, // KAF
0x0644, // LAM
0x0645, // MEEM
0x0646, // NOON
0x0647, // HEH
0x0648, // WAW
0x0649, // ALEF MAQSURA
};
private class ArabicNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
if (one == 0x0627) {
int[] map;
if (letterValue == LETTER_VALUE_TRADITIONAL) {
map = arabicAbjadiAlphabeticMap;
} else if (letterValue == LETTER_VALUE_ALPHABETIC) {
map = arabicHijaiAlphabeticMap;
} else {
map = arabicAbjadiAlphabeticMap;
}
return formatNumberAsSequence(number, one, map.length, map);
} else if (one == 0x0623) {
if ((number == 0) || (number > 1999)) {
return null;
} else {
return formatAsAbjadiNumber(number, features, language, country);
}
} else {
return null;
}
}
private Integer[] formatAsAbjadiNumber(long number, String features, String language, String country) {
List sl = new ArrayList();
assert arabicAbjadiAlphabeticMap.length == 28;
assert arabicAbjadiAlphabeticMap[0] == 0x0623; // ALEF WITH HAMZA ABOVE
assert arabicAbjadiAlphabeticMap[27] == 0x063A; // GHAIN
assert number != 0;
assert number < 2000;
int[] map = arabicAbjadiAlphabeticMap;
int thousands = (int) ((number / 1000) % 10);
int hundreds = (int) ((number / 100) % 10);
int tens = (int) ((number / 10) % 10);
int ones = (int) ((number / 1) % 10);
if (thousands > 0) {
assert thousands < 2;
sl.add(map [ 27 + (thousands - 1) ]);
}
if (hundreds > 0) {
assert thousands < 10;
sl.add(map [ 18 + (hundreds - 1) ]);
}
if (tens > 0) {
assert tens < 10;
sl.add(map [ 9 + (tens - 1) ]);
}
if (ones > 0) {
assert ones < 10;
sl.add(map [ 0 + (ones - 1) ]);
}
return sl.toArray(new Integer [ sl.size() ]);
}
}
/**
* Kana (Japanese) Numerals
*/
private static int[] hiraganaGojuonAlphabeticMap = {
0x3042, // A
0x3044, // I
0x3046, // U
0x3048, // E
0x304A, // O
0x304B, // KA
0x304D, // KI
0x304F, // KU
0x3051, // KE
0x3053, // KO
0x3055, // SA
0x3057, // SI
0x3059, // SU
0x305B, // SE
0x305D, // SO
0x305F, // TA
0x3061, // TI
0x3064, // TU
0x3066, // TE
0x3068, // TO
0x306A, // NA
0x306B, // NI
0x306C, // NU
0x306D, // NE
0x306E, // NO
0x306F, // HA
0x3072, // HI
0x3075, // HU
0x3078, // HE
0x307B, // HO
0x307E, // MA
0x307F, // MI
0x3080, // MU
0x3081, // ME
0x3082, // MO
0x3084, // YA
0x3086, // YU
0x3088, // YO
0x3089, // RA
0x308A, // RI
0x308B, // RU
0x308C, // RE
0x308D, // RO
0x308F, // WA
0x3090, // WI
0x3091, // WE
0x3092, // WO
0x3093, // N
};
private static int[] katakanaGojuonAlphabeticMap = {
0x30A2, // A
0x30A4, // I
0x30A6, // U
0x30A8, // E
0x30AA, // O
0x30AB, // KA
0x30AD, // KI
0x30AF, // KU
0x30B1, // KE
0x30B3, // KO
0x30B5, // SA
0x30B7, // SI
0x30B9, // SU
0x30BB, // SE
0x30BD, // SO
0x30BF, // TA
0x30C1, // TI
0x30C4, // TU
0x30C6, // TE
0x30C8, // TO
0x30CA, // NA
0x30CB, // NI
0x30CC, // NU
0x30CD, // NE
0x30CE, // NO
0x30CF, // HA
0x30D2, // HI
0x30D5, // HU
0x30D8, // HE
0x30DB, // HO
0x30DE, // MA
0x30DF, // MI
0x30E0, // MU
0x30E1, // ME
0x30E2, // MO
0x30E4, // YA
0x30E6, // YU
0x30E8, // YO
0x30E9, // RA
0x30EA, // RI
0x30EB, // RU
0x30EC, // RE
0x30ED, // RO
0x30EF, // WA
0x30F0, // WI
0x30F1, // WE
0x30F2, // WO
0x30F3, // N
};
private class KanaNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
if ((one == 0x3042) && (letterValue == LETTER_VALUE_ALPHABETIC)) {
return formatNumberAsSequence(number, one, hiraganaGojuonAlphabeticMap.length, hiraganaGojuonAlphabeticMap);
} else if ((one == 0x30A2) && (letterValue == LETTER_VALUE_ALPHABETIC)) {
return formatNumberAsSequence(number, one, katakanaGojuonAlphabeticMap.length, katakanaGojuonAlphabeticMap);
} else {
return null;
}
}
}
/**
* Thai Numerals
*/
private static int[] thaiAlphabeticMap = {
0x0E01,
0x0E02,
0x0E03,
0x0E04,
0x0E05,
0x0E06,
0x0E07,
0x0E08,
0x0E09,
0x0E0A,
0x0E0B,
0x0E0C,
0x0E0D,
0x0E0E,
0x0E0F,
0x0E10,
0x0E11,
0x0E12,
0x0E13,
0x0E14,
0x0E15,
0x0E16,
0x0E17,
0x0E18,
0x0E19,
0x0E1A,
0x0E1B,
0x0E1C,
0x0E1D,
0x0E1E,
0x0E1F,
0x0E20,
0x0E21,
0x0E22,
0x0E23,
// 0x0E24, // RU - not used in modern sequence
0x0E25,
// 0x0E26, // LU - not used in modern sequence
0x0E27,
0x0E28,
0x0E29,
0x0E2A,
0x0E2B,
0x0E2C,
0x0E2D,
0x0E2E,
};
private class ThaiNumeralsFormatter implements SpecialNumberFormatter {
public Integer[] format(long number, int one, int letterValue, String features, String language, String country) {
if ((one == 0x0E01) && (letterValue == LETTER_VALUE_ALPHABETIC)) {
return formatNumberAsSequence(number, one, thaiAlphabeticMap.length, thaiAlphabeticMap);
} else {
return null;
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy