net.sf.saxon.expr.number.NumberFormatter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2022 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.expr.number;
import net.sf.saxon.lib.Numberer;
import net.sf.saxon.regex.charclass.Categories;
import net.sf.saxon.str.*;
import net.sf.saxon.z.IntPredicateProxy;
import net.sf.saxon.z.IntUnionPredicate;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
/**
* Class NumberFormatter defines a method to format a ArrayList of integers as a character
* string according to a supplied format specification.
*
*/
public class NumberFormatter {
private ArrayList formatTokens;
private ArrayList punctuationTokens;
private boolean startsWithPunctuation;
/**
* Tokenize the format pattern.
*
* @param format the format specification. Contains one of the following values:
* - "1": conventional decimal numbering
* - "a": sequence a, b, c, ... aa, ab, ac, ...
* - "A": sequence A, B, C, ... AA, AB, AC, ...
* - "i": sequence i, ii, iii, iv, v ...
* - "I": sequence I, II, III, IV, V, ...
*
* This symbol may be preceded and followed by punctuation (any other characters) which is
* copied to the output string.
*/
public void prepare(String format) {
// Tokenize the format string into alternating alphanumeric and non-alphanumeric tokens
if (format.isEmpty()) {
format = "1";
}
formatTokens = new ArrayList<>(10);
punctuationTokens = new ArrayList<>(10);
UnicodeString uFormat = StringView.tidy(format);
int len = uFormat.length32();
int i = 0;
int t;
boolean first = true;
startsWithPunctuation = true;
while (i < len) {
int c = uFormat.codePointAt(i);
t = i;
while (isLetterOrDigit(c)) {
i++;
if (i == len) break;
c = uFormat.codePointAt(i);
}
if (i > t) {
UnicodeString tok = uFormat.substring(t, i);
formatTokens.add(tok);
if (first) {
punctuationTokens.add(BMPString.of("."));
startsWithPunctuation = false;
first = false;
}
}
if (i == len) break;
t = i;
c = uFormat.codePointAt(i);
while (!isLetterOrDigit(c)) {
first = false;
i++;
if (i == len) break;
c = uFormat.codePointAt(i);
}
if (i > t) {
UnicodeString sep = uFormat.substring(t, i);
punctuationTokens.add(sep);
}
}
if (formatTokens.isEmpty()) {
formatTokens.add(BMPString.of("1"));
if (punctuationTokens.size() == 1) {
punctuationTokens.add(punctuationTokens.get(0));
}
}
}
/**
* Determine whether a (possibly non-BMP) character is a letter or digit.
*
* @param c the codepoint of the character to be tested
* @return true if this is a number or letter as defined in the XSLT rules for xsl:number pictures.
*/
public static boolean isLetterOrDigit(int c) {
if (c <= 0x7F) {
// Fast path for ASCII characters
return (c >= 0x30 && c <= 0x39) || (c >= 0x41 && c <= 0x5A) || (c >= 0x61 && c <= 0x7A);
} else {
return alphanumeric.test(c);
}
}
private static final IntPredicateProxy alphanumeric =
IntUnionPredicate.makeUnion(Categories.getCategory("N"), (Categories.getCategory("L")));
/**
* Format a list of numbers.
*
* @param numbers the numbers to be formatted (a sequence of integer values; it may also contain
* preformatted strings as part of the error recovery fallback)
* @param groupSize the grouping-size, as in xsl:number
* @param groupSeparator the grouping-separator, as in xsl:number
* @param letterValue the letter-value, as in xsl:number
* @param ordinal the ordinal attribute as in xsl:number
* @param numberer the Numberer to be used for localization
* @return the formatted output string.
*/
public UnicodeString format(List