com.ibm.icu.impl.SimpleFormatterImpl Maven / Gradle / Ivy
Show all versions of icu4j Show documentation
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
* Copyright (C) 2014-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.impl;
import java.io.IOException;
import java.text.Format;
import com.ibm.icu.util.ICUUncheckedIOException;
/**
* Formats simple patterns like "{1} was born in {0}".
* Internal version of {@link com.ibm.icu.text.SimpleFormatter}
* with only static methods, to avoid wrapper objects.
*
* This class "compiles" pattern strings into a binary format
* and implements formatting etc. based on that.
*
*
Format:
* Index 0: One more than the highest argument number.
* Followed by zero or more arguments or literal-text segments.
*
*
An argument is stored as its number, less than ARG_NUM_LIMIT.
* A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
* followed by that many chars.
*/
public final class SimpleFormatterImpl {
/**
* Argument numbers must be smaller than this limit.
* Text segment lengths are offset by this much.
* This is currently the only unused char value in compiled patterns,
* except it is the maximum value of the first unit (max arg +1).
*/
private static final int ARG_NUM_LIMIT = 0x100;
private static final char LEN1_CHAR = (char)(ARG_NUM_LIMIT + 1);
private static final char LEN2_CHAR = (char)(ARG_NUM_LIMIT + 2);
private static final char LEN3_CHAR = (char)(ARG_NUM_LIMIT + 3);
/**
* Initial and maximum char/UChar value set for a text segment.
* Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
* Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
*/
private static final char SEGMENT_LENGTH_ARGUMENT_CHAR = (char)0xffff;
/**
* Maximum length of a text segment. Longer segments are split into shorter ones.
*/
private static final int MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_ARGUMENT_CHAR - ARG_NUM_LIMIT;
/** "Intern" some common patterns. */
private static final String[][] COMMON_PATTERNS = {
{ "{0} {1}", "\u0002\u0000" + LEN1_CHAR + " \u0001" },
{ "{0} ({1})", "\u0002\u0000" + LEN2_CHAR + " (\u0001" + LEN1_CHAR + ')' },
{ "{0}, {1}", "\u0002\u0000" + LEN2_CHAR + ", \u0001" },
{ "{0} – {1}", "\u0002\u0000" + LEN3_CHAR + " – \u0001" }, // en dash
};
/** Use only static methods. */
private SimpleFormatterImpl() {}
/**
* Creates a compiled form of the pattern string, for use with appropriate static methods.
* The number of arguments checked against the given limits is the
* highest argument number plus one, not the number of occurrences of arguments.
*
* @param pattern The pattern string.
* @param sb A StringBuilder instance which may or may not be used.
* @param min The pattern must have at least this many arguments.
* @param max The pattern must have at most this many arguments.
* @return The compiled-pattern string.
* @throws IllegalArgumentException for bad argument syntax and too few or too many arguments.
*/
public static String compileToStringMinMaxArguments(
CharSequence pattern, StringBuilder sb, int min, int max) {
// Return some precompiled common two-argument patterns.
if (min <= 2 && 2 <= max) {
for (String[] pair : COMMON_PATTERNS) {
if (pair[0].contentEquals(pattern)) {
assert pair[1].charAt(0) == 2;
return pair[1];
}
}
}
// Parse consistent with MessagePattern, but
// - support only simple numbered arguments
// - build a simple binary structure into the result string
int patternLength = pattern.length();
sb.ensureCapacity(patternLength);
// Reserve the first char for the number of arguments.
sb.setLength(1);
int textLength = 0;
int maxArg = -1;
boolean inQuote = false;
for (int i = 0; i < patternLength;) {
char c = pattern.charAt(i++);
if (c == '\'') {
if (i < patternLength && (c = pattern.charAt(i)) == '\'') {
// double apostrophe, skip the second one
++i;
} else if (inQuote) {
// skip the quote-ending apostrophe
inQuote = false;
continue;
} else if (c == '{' || c == '}') {
// Skip the quote-starting apostrophe, find the end of the quoted literal text.
++i;
inQuote = true;
} else {
// The apostrophe is part of literal text.
c = '\'';
}
} else if (!inQuote && c == '{') {
if (textLength > 0) {
sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength));
textLength = 0;
}
int argNumber;
if ((i + 1) < patternLength &&
0 <= (argNumber = pattern.charAt(i) - '0') && argNumber <= 9 &&
pattern.charAt(i + 1) == '}') {
i += 2;
} else {
// Multi-digit argument number (no leading zero) or syntax error.
// MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
// around the number, but this class does not.
int argStart = i - 1;
argNumber = -1;
if (i < patternLength && '1' <= (c = pattern.charAt(i++)) && c <= '9') {
argNumber = c - '0';
while (i < patternLength && '0' <= (c = pattern.charAt(i++)) && c <= '9') {
argNumber = argNumber * 10 + (c - '0');
if (argNumber >= ARG_NUM_LIMIT) {
break;
}
}
}
if (argNumber < 0 || c != '}') {
throw new IllegalArgumentException(
"Argument syntax error in pattern \"" + pattern +
"\" at index " + argStart +
": " + pattern.subSequence(argStart, i));
}
}
if (argNumber > maxArg) {
maxArg = argNumber;
}
sb.append((char)argNumber);
continue;
} // else: c is part of literal text
// Append c and track the literal-text segment length.
if (textLength == 0) {
// Reserve a char for the length of a new text segment, preset the maximum length.
sb.append(SEGMENT_LENGTH_ARGUMENT_CHAR);
}
sb.append(c);
if (++textLength == MAX_SEGMENT_LENGTH) {
textLength = 0;
}
}
if (textLength > 0) {
sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength));
}
int argCount = maxArg + 1;
if (argCount < min) {
throw new IllegalArgumentException(
"Fewer than minimum " + min + " arguments in pattern \"" + pattern + "\"");
}
if (argCount > max) {
throw new IllegalArgumentException(
"More than maximum " + max + " arguments in pattern \"" + pattern + "\"");
}
sb.setCharAt(0, (char)argCount);
return sb.toString();
}
/**
* @param compiledPattern Compiled form of a pattern string.
* @return The max argument number + 1.
*/
public static int getArgumentLimit(String compiledPattern) {
return compiledPattern.charAt(0);
}
/**
* Formats the given values.
*
* @param compiledPattern Compiled form of a pattern string.
*/
public static String formatCompiledPattern(String compiledPattern, CharSequence... values) {
return formatAndAppend(compiledPattern, new StringBuilder(), null, values).toString();
}
/**
* Formats the not-compiled pattern with the given values.
* Equivalent to compileToStringMinMaxArguments() followed by formatCompiledPattern().
* The number of arguments checked against the given limits is the
* highest argument number plus one, not the number of occurrences of arguments.
*
* @param pattern Not-compiled form of a pattern string.
* @param min The pattern must have at least this many arguments.
* @param max The pattern must have at most this many arguments.
* @return The compiled-pattern string.
* @throws IllegalArgumentException for bad argument syntax and too few or too many arguments.
*/
public static String formatRawPattern(String pattern, int min, int max, CharSequence... values) {
StringBuilder sb = new StringBuilder();
String compiledPattern = compileToStringMinMaxArguments(pattern, sb, min, max);
sb.setLength(0);
return formatAndAppend(compiledPattern, sb, null, values).toString();
}
/**
* Formats the given values, appending to the appendTo builder.
*
* @param compiledPattern Compiled form of a pattern string.
* @param appendTo Gets the formatted pattern and values appended.
* @param offsets offsets[i] receives the offset of where
* values[i] replaced pattern argument {i}.
* Can be null, or can be shorter or longer than values.
* If there is no {i} in the pattern, then offsets[i] is set to -1.
* @param values The argument values.
* An argument value must not be the same object as appendTo.
* values.length must be at least getArgumentLimit().
* Can be null if getArgumentLimit()==0.
* @return appendTo
*/
public static StringBuilder formatAndAppend(
String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values) {
int valuesLength = values != null ? values.length : 0;
if (valuesLength < getArgumentLimit(compiledPattern)) {
throw new IllegalArgumentException("Too few values.");
}
return format(compiledPattern, values, appendTo, null, true, offsets);
}
/**
* Formats the given values, replacing the contents of the result builder.
* May optimize by actually appending to the result if it is the same object
* as the value corresponding to the initial argument in the pattern.
*
* @param compiledPattern Compiled form of a pattern string.
* @param result Gets its contents replaced by the formatted pattern and values.
* @param offsets offsets[i] receives the offset of where
* values[i] replaced pattern argument {i}.
* Can be null, or can be shorter or longer than values.
* If there is no {i} in the pattern, then offsets[i] is set to -1.
* @param values The argument values.
* An argument value may be the same object as result.
* values.length must be at least getArgumentLimit().
* @return result
*/
public static StringBuilder formatAndReplace(
String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values) {
int valuesLength = values != null ? values.length : 0;
if (valuesLength < getArgumentLimit(compiledPattern)) {
throw new IllegalArgumentException("Too few values.");
}
// If the pattern starts with an argument whose value is the same object
// as the result, then we keep the result contents and append to it.
// Otherwise we replace its contents.
int firstArg = -1;
// If any non-initial argument value is the same object as the result,
// then we first copy its contents and use that instead while formatting.
String resultCopy = null;
if (getArgumentLimit(compiledPattern) > 0) {
for (int i = 1; i < compiledPattern.length();) {
int n = compiledPattern.charAt(i++);
if (n < ARG_NUM_LIMIT) {
if (values[n] == result) {
if (i == 2) {
firstArg = n;
} else if (resultCopy == null) {
resultCopy = result.toString();
}
}
} else {
i += n - ARG_NUM_LIMIT;
}
}
}
if (firstArg < 0) {
result.setLength(0);
}
return format(compiledPattern, values, result, resultCopy, false, offsets);
}
/**
* Returns the pattern text with none of the arguments.
* Like formatting with all-empty string values.
*
* @param compiledPattern Compiled form of a pattern string.
*/
public static String getTextWithNoArguments(String compiledPattern) {
int capacity = compiledPattern.length() - 1 - getArgumentLimit(compiledPattern);
StringBuilder sb = new StringBuilder(capacity);
for (int i = 1; i < compiledPattern.length();) {
int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT;
if (segmentLength > 0) {
int limit = i + segmentLength;
sb.append(compiledPattern, i, limit);
i = limit;
}
}
return sb.toString();
}
/**
* Returns the length of the pattern text with none of the arguments.
* @param compiledPattern Compiled form of a pattern string.
* @param codePoints true to count code points; false to count code units.
* @return The number of code points or code units.
*/
public static int getLength(String compiledPattern, boolean codePoints) {
int result = 0;
for (int i = 1; i < compiledPattern.length();) {
int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT;
if (segmentLength > 0) {
int limit = i + segmentLength;
if (codePoints) {
result += Character.codePointCount(compiledPattern, i, limit);
} else {
result += (limit - i);
}
i = limit;
}
}
return result;
}
/**
* Returns the length in code units of the pattern text up until the first argument.
* @param compiledPattern Compiled form of a pattern string.
* @return The number of code units.
*/
public static int getPrefixLength(String compiledPattern) {
if (compiledPattern.length() == 1) {
return 0;
} else if (compiledPattern.charAt(0) == 0) {
return compiledPattern.length() - 2;
} else if (compiledPattern.charAt(1) <= ARG_NUM_LIMIT) {
return 0;
} else {
return compiledPattern.charAt(1) - ARG_NUM_LIMIT;
}
}
/**
* Special case for using FormattedStringBuilder with patterns with 0 or 1 argument.
*
* With 1 argument, treat the current contents of the FormattedStringBuilder between
* start and end as the argument {0}. Insert the extra strings from compiledPattern
* to surround the argument in the output.
*
* With 0 arguments, overwrite the entire contents of the FormattedStringBuilder
* between start and end.
*
* @param compiledPattern Compiled form of a pattern string.
* @param field Field to use when adding chars to the output.
* @param start The start index of the argument already in the output string.
* @param end The end index of the argument already in the output string.
* @param output Destination for formatted output.
* @return Net number of characters added to the formatted string.
*/
public static int formatPrefixSuffix(
String compiledPattern,
Format.Field field,
int start,
int end,
FormattedStringBuilder output) {
int argLimit = getArgumentLimit(compiledPattern);
if (argLimit == 0) {
// No arguments in compiled pattern; overwrite the entire segment with our string.
return output.splice(start, end, compiledPattern, 2, compiledPattern.length(), field);
} else {
assert argLimit == 1;
int suffixOffset;
int length = 0;
if (compiledPattern.charAt(1) != '\u0000') {
int prefixLength = compiledPattern.charAt(1) - ARG_NUM_LIMIT;
length = output.insert(start, compiledPattern, 2, 2 + prefixLength, field);
suffixOffset = 3 + prefixLength;
} else {
suffixOffset = 2;
}
if (suffixOffset < compiledPattern.length()) {
int suffixLength = compiledPattern.charAt(suffixOffset) - ARG_NUM_LIMIT;
length += output.insert(end + length, compiledPattern, 1 + suffixOffset,
1 + suffixOffset + suffixLength, field);
}
return length;
}
}
/** Internal iterator interface for maximum efficiency.
*
* Usage boilerplate:
*
*
* long state = 0;
* while (true) {
* state = IterInternal.step(state, compiledPattern, output);
* if (state == IterInternal.DONE) {
* break;
* }
* int argIndex = IterInternal.getArgIndex(state);
* // Append the string corresponding to argIndex to output
* }
*
*
*/
public static class IterInternal {
public static final long DONE = -1;
public static long step(long state, CharSequence compiledPattern, Appendable output) {
int i = (int) (state >>> 32);
assert i < compiledPattern.length();
i++;
while (i < compiledPattern.length() && compiledPattern.charAt(i) > ARG_NUM_LIMIT) {
int limit = i + compiledPattern.charAt(i) + 1 - ARG_NUM_LIMIT;
try {
output.append(compiledPattern, i + 1, limit);
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
i = limit;
}
if (i == compiledPattern.length()) {
return DONE;
}
return (((long) i) << 32) | compiledPattern.charAt(i);
}
public static int getArgIndex(long state) {
return (int) state;
}
}
private static StringBuilder format(
String compiledPattern, CharSequence[] values,
StringBuilder result, String resultCopy, boolean forbidResultAsValue,
int[] offsets) {
int offsetsLength;
if (offsets == null) {
offsetsLength = 0;
} else {
offsetsLength = offsets.length;
for (int i = 0; i < offsetsLength; i++) {
offsets[i] = -1;
}
}
for (int i = 1; i < compiledPattern.length();) {
int n = compiledPattern.charAt(i++);
if (n < ARG_NUM_LIMIT) {
CharSequence value = values[n];
if (value == result) {
if (forbidResultAsValue) {
throw new IllegalArgumentException("Value must not be same object as result");
}
if (i == 2) {
// We are appending to result which is also the first value object.
if (n < offsetsLength) {
offsets[n] = 0;
}
} else {
if (n < offsetsLength) {
offsets[n] = result.length();
}
result.append(resultCopy);
}
} else {
if (n < offsetsLength) {
offsets[n] = result.length();
}
result.append(value);
}
} else {
int limit = i + (n - ARG_NUM_LIMIT);
result.append(compiledPattern, i, limit);
i = limit;
}
}
return result;
}
}