com.ibm.icu.text.MessagePattern Maven / Gradle / Ivy
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
*******************************************************************************
* Copyright (C) 2010-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* created on: 2010aug21
* created by: Markus W. Scherer
*/
package com.ibm.icu.text;
import java.util.ArrayList;
import java.util.Locale;
import com.ibm.icu.impl.ICUConfig;
import com.ibm.icu.impl.PatternProps;
import com.ibm.icu.util.Freezable;
import com.ibm.icu.util.ICUCloneNotSupportedException;
//Note: Minimize ICU dependencies, only use a very small part of the ICU core.
//In particular, do not depend on *Format classes.
/**
* Parses and represents ICU MessageFormat patterns.
* Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
* Used in the implementations of those classes as well as in tools
* for message validation, translation and format conversion.
*
* The parser handles all syntax relevant for identifying message arguments.
* This includes "complex" arguments whose style strings contain
* nested MessageFormat pattern substrings.
* For "simple" arguments (with no nested MessageFormat pattern substrings),
* the argument style is not parsed any further.
*
* The parser handles named and numbered message arguments and allows both in one message.
*
* Once a pattern has been parsed successfully, iterate through the parsed data
* with countParts(), getPart() and related methods.
*
* The data logically represents a parse tree, but is stored and accessed
* as a list of "parts" for fast and simple parsing and to minimize object allocations.
* Arguments and nested messages are best handled via recursion.
* For every _START "part", {@link #getLimitPartIndex(int)} efficiently returns
* the index of the corresponding _LIMIT "part".
*
* List of "parts":
*
* message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
* argument = noneArg | simpleArg | complexArg
* complexArg = choiceArg | pluralArg | selectArg
*
* noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
* simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
* choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
* pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
* selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
*
* choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
* pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
* selectStyle = (ARG_SELECTOR message)+
*
*
* - Literal output text is not represented directly by "parts" but accessed
* between parts of a message, from one part's getLimit() to the next part's getIndex().
*
ARG_START.CHOICE
stands for an ARG_START Part with ArgType CHOICE.
* - In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
* the less-than-or-equal-to sign (U+2264).
*
- In the pluralStyle, the first, optional numeric Part has the "offset:" value.
* The optional numeric Part between each (ARG_SELECTOR, message) pair
* is the value of an explicit-number selector like "=2",
* otherwise the selector is a non-numeric identifier.
*
- The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
*
*
* This class is not intended for public subclassing.
*
* @stable ICU 4.8
* @author Markus Scherer
*/
public final class MessagePattern implements Cloneable, Freezable {
/**
* Mode for when an apostrophe starts quoted literal text for MessageFormat output.
* The default is DOUBLE_OPTIONAL unless overridden via ICUConfig
* (/com/ibm/icu/ICUConfig.properties).
*
* A pair of adjacent apostrophes always results in a single apostrophe in the output,
* even when the pair is between two single, text-quoting apostrophes.
*
* The following table shows examples of desired MessageFormat.format() output
* with the pattern strings that yield that output.
*
*
*
* Desired output
* DOUBLE_OPTIONAL
* DOUBLE_REQUIRED
*
*
* I see {many}
* I see '{many}'
* (same)
*
*
* I said {'Wow!'}
* I said '{''Wow!''}'
* (same)
*
*
* I don't know
* I don't know OR
I don''t know
* I don''t know
*
*
* @stable ICU 4.8
*/
public enum ApostropheMode {
/**
* A literal apostrophe is represented by
* either a single or a double apostrophe pattern character.
* Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
* if it immediately precedes a curly brace {},
* or a pipe symbol | if inside a choice format,
* or a pound symbol # if inside a plural format.
*
* This is the default behavior starting with ICU 4.8.
* @stable ICU 4.8
*/
DOUBLE_OPTIONAL,
/**
* A literal apostrophe must be represented by
* a double apostrophe pattern character.
* A single apostrophe always starts quoted literal text.
*
* This is the behavior of ICU 4.6 and earlier, and of {@link java.text.MessageFormat}.
* @stable ICU 4.8
*/
DOUBLE_REQUIRED
}
/**
* Constructs an empty MessagePattern with default ApostropheMode.
* @stable ICU 4.8
*/
public MessagePattern() {
aposMode=defaultAposMode;
}
/**
* Constructs an empty MessagePattern.
* @param mode Explicit ApostropheMode.
* @stable ICU 4.8
*/
public MessagePattern(ApostropheMode mode) {
aposMode=mode;
}
/**
* Constructs a MessagePattern with default ApostropheMode and
* parses the MessageFormat pattern string.
* @param pattern a MessageFormat pattern string
* @throws IllegalArgumentException for syntax errors in the pattern string
* @throws IndexOutOfBoundsException if certain limits are exceeded
* (e.g., argument number too high, argument name too long, etc.)
* @throws NumberFormatException if a number could not be parsed
* @stable ICU 4.8
*/
public MessagePattern(String pattern) {
aposMode=defaultAposMode;
parse(pattern);
}
/**
* Parses a MessageFormat pattern string.
* @param pattern a MessageFormat pattern string
* @return this
* @throws IllegalArgumentException for syntax errors in the pattern string
* @throws IndexOutOfBoundsException if certain limits are exceeded
* (e.g., argument number too high, argument name too long, etc.)
* @throws NumberFormatException if a number could not be parsed
* @stable ICU 4.8
*/
public MessagePattern parse(String pattern) {
preParse(pattern);
parseMessage(0, 0, 0, ArgType.NONE);
postParse();
return this;
}
/**
* Parses a ChoiceFormat pattern string.
* @param pattern a ChoiceFormat pattern string
* @return this
* @throws IllegalArgumentException for syntax errors in the pattern string
* @throws IndexOutOfBoundsException if certain limits are exceeded
* (e.g., argument number too high, argument name too long, etc.)
* @throws NumberFormatException if a number could not be parsed
* @stable ICU 4.8
*/
public MessagePattern parseChoiceStyle(String pattern) {
preParse(pattern);
parseChoiceStyle(0, 0);
postParse();
return this;
}
/**
* Parses a PluralFormat pattern string.
* @param pattern a PluralFormat pattern string
* @return this
* @throws IllegalArgumentException for syntax errors in the pattern string
* @throws IndexOutOfBoundsException if certain limits are exceeded
* (e.g., argument number too high, argument name too long, etc.)
* @throws NumberFormatException if a number could not be parsed
* @stable ICU 4.8
*/
public MessagePattern parsePluralStyle(String pattern) {
preParse(pattern);
parsePluralOrSelectStyle(ArgType.PLURAL, 0, 0);
postParse();
return this;
}
/**
* Parses a SelectFormat pattern string.
* @param pattern a SelectFormat pattern string
* @return this
* @throws IllegalArgumentException for syntax errors in the pattern string
* @throws IndexOutOfBoundsException if certain limits are exceeded
* (e.g., argument number too high, argument name too long, etc.)
* @throws NumberFormatException if a number could not be parsed
* @stable ICU 4.8
*/
public MessagePattern parseSelectStyle(String pattern) {
preParse(pattern);
parsePluralOrSelectStyle(ArgType.SELECT, 0, 0);
postParse();
return this;
}
/**
* Clears this MessagePattern.
* countParts() will return 0.
* @stable ICU 4.8
*/
public void clear() {
// Mostly the same as preParse().
if(isFrozen()) {
throw new UnsupportedOperationException(
"Attempt to clear() a frozen MessagePattern instance.");
}
msg=null;
hasArgNames=hasArgNumbers=false;
needsAutoQuoting=false;
parts.clear();
if(numericValues!=null) {
numericValues.clear();
}
}
/**
* Clears this MessagePattern and sets the ApostropheMode.
* countParts() will return 0.
* @param mode The new ApostropheMode.
* @stable ICU 4.8
*/
public void clearPatternAndSetApostropheMode(ApostropheMode mode) {
clear();
aposMode=mode;
}
/**
* @param other another object to compare with.
* @return true if this object is equivalent to the other one.
* @stable ICU 4.8
*/
@Override
public boolean equals(Object other) {
if(this==other) {
return true;
}
if(other==null || getClass()!=other.getClass()) {
return false;
}
MessagePattern o=(MessagePattern)other;
return
aposMode.equals(o.aposMode) &&
(msg==null ? o.msg==null : msg.equals(o.msg)) &&
parts.equals(o.parts);
// No need to compare numericValues if msg and parts are the same.
}
/**
* {@inheritDoc}
* @stable ICU 4.8
*/
@Override
public int hashCode() {
return (aposMode.hashCode()*37+(msg!=null ? msg.hashCode() : 0))*37+parts.hashCode();
}
/**
* @return this instance's ApostropheMode.
* @stable ICU 4.8
*/
public ApostropheMode getApostropheMode() {
return aposMode;
}
/**
* @return true if getApostropheMode() == ApostropheMode.DOUBLE_REQUIRED
* @internal
*/
/* package */ boolean jdkAposMode() {
return aposMode == ApostropheMode.DOUBLE_REQUIRED;
}
/**
* @return the parsed pattern string (null if none was parsed).
* @stable ICU 4.8
*/
public String getPatternString() {
return msg;
}
/**
* Does the parsed pattern have named arguments like {first_name}?
* @return true if the parsed pattern has at least one named argument.
* @stable ICU 4.8
*/
public boolean hasNamedArguments() {
return hasArgNames;
}
/**
* Does the parsed pattern have numbered arguments like {2}?
* @return true if the parsed pattern has at least one numbered argument.
* @stable ICU 4.8
*/
public boolean hasNumberedArguments() {
return hasArgNumbers;
}
/**
* {@inheritDoc}
* @stable ICU 4.8
*/
@Override
public String toString() {
return msg;
}
/**
* Validates and parses an argument name or argument number string.
* An argument name must be a "pattern identifier", that is, it must contain
* no Unicode Pattern_Syntax or Pattern_White_Space characters.
* If it only contains ASCII digits, then it must be a small integer with no leading zero.
* @param name Input string.
* @return >=0 if the name is a valid number,
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
* ARG_NAME_NOT_VALID (-2) if it is neither.
* @stable ICU 4.8
*/
public static int validateArgumentName(String name) {
if(!PatternProps.isIdentifier(name)) {
return ARG_NAME_NOT_VALID;
}
return parseArgNumber(name, 0, name.length());
}
/**
* Return value from {@link #validateArgumentName(String)} for when
* the string is a valid "pattern identifier" but not a number.
* @stable ICU 4.8
*/
public static final int ARG_NAME_NOT_NUMBER=-1;
/**
* Return value from {@link #validateArgumentName(String)} for when
* the string is invalid.
* It might not be a valid "pattern identifier",
* or it have only ASCII digits but there is a leading zero or the number is too large.
* @stable ICU 4.8
*/
public static final int ARG_NAME_NOT_VALID=-2;
/**
* Returns a version of the parsed pattern string where each ASCII apostrophe
* is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
*
* For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
* into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
* @return the deep-auto-quoted version of the parsed pattern string.
* @see MessageFormat#autoQuoteApostrophe(String)
* @stable ICU 4.8
*/
public String autoQuoteApostropheDeep() {
if(!needsAutoQuoting) {
return msg;
}
StringBuilder modified=null;
// Iterate backward so that the insertion indexes do not change.
int count=countParts();
for(int i=count; i>0;) {
Part part;
if((part=getPart(--i)).getType()==Part.Type.INSERT_CHAR) {
if(modified==null) {
modified=new StringBuilder(msg.length()+10).append(msg);
}
modified.insert(part.index, (char)part.value);
}
}
if(modified==null) {
return msg;
} else {
return modified.toString();
}
}
/**
* Returns the number of "parts" created by parsing the pattern string.
* Returns 0 if no pattern has been parsed or clear() was called.
* @return the number of pattern parts.
* @stable ICU 4.8
*/
public int countParts() {
return parts.size();
}
/**
* Gets the i-th pattern "part".
* @param i The index of the Part data. (0..countParts()-1)
* @return the i-th pattern "part".
* @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
* @stable ICU 4.8
*/
public Part getPart(int i) {
return parts.get(i);
}
/**
* Returns the Part.Type of the i-th pattern "part".
* Convenience method for getPart(i).getType().
* @param i The index of the Part data. (0..countParts()-1)
* @return The Part.Type of the i-th Part.
* @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
* @stable ICU 4.8
*/
public Part.Type getPartType(int i) {
return parts.get(i).type;
}
/**
* Returns the pattern index of the specified pattern "part".
* Convenience method for getPart(partIndex).getIndex().
* @param partIndex The index of the Part data. (0..countParts()-1)
* @return The pattern index of this Part.
* @throws IndexOutOfBoundsException if partIndex is outside the (0..countParts()-1) range
* @stable ICU 4.8
*/
public int getPatternIndex(int partIndex) {
return parts.get(partIndex).index;
}
/**
* Returns the substring of the pattern string indicated by the Part.
* Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
* @param part a part of this MessagePattern.
* @return the substring associated with part.
* @stable ICU 4.8
*/
public String getSubstring(Part part) {
int index=part.index;
return msg.substring(index, index+part.length);
}
/**
* Compares the part's substring with the input string s.
* @param part a part of this MessagePattern.
* @param s a string.
* @return true if getSubstring(part).equals(s).
* @stable ICU 4.8
*/
public boolean partSubstringMatches(Part part, String s) {
return part.length == s.length() && msg.regionMatches(part.index, s, 0, part.length);
}
/**
* Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
* @param part a part of this MessagePattern.
* @return the part's numeric value, or NO_NUMERIC_VALUE if this is not a numeric part.
* @stable ICU 4.8
*/
public double getNumericValue(Part part) {
Part.Type type=part.type;
if(type==Part.Type.ARG_INT) {
return part.value;
} else if(type==Part.Type.ARG_DOUBLE) {
return numericValues.get(part.value);
} else {
return NO_NUMERIC_VALUE;
}
}
/**
* Special value that is returned by getNumericValue(Part) when no
* numeric value is defined for a part.
* @see #getNumericValue
* @stable ICU 4.8
*/
public static final double NO_NUMERIC_VALUE=-123456789;
/**
* Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
* @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
* @return the "offset:" value.
* @throws IndexOutOfBoundsException if pluralStart is outside the (0..countParts()-1) range
* @stable ICU 4.8
*/
public double getPluralOffset(int pluralStart) {
Part part=parts.get(pluralStart);
if(part.type.hasNumericValue()) {
return getNumericValue(part);
} else {
return 0;
}
}
/**
* Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
* @param start The index of some Part data (0..countParts()-1);
* this Part should be of Type ARG_START or MSG_START.
* @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
* or start itself if getPartType(msgStart)!=ARG|MSG_START.
* @throws IndexOutOfBoundsException if start is outside the (0..countParts()-1) range
* @stable ICU 4.8
*/
public int getLimitPartIndex(int start) {
int limit=parts.get(start).limitPartIndex;
if(limit
* There is always a later MSG_LIMIT part.
* @stable ICU 4.8
*/
MSG_START,
/**
* End of a message pattern (main or nested).
* The length is 0 for the top-level message and
* the last sub-message of a choice argument,
* otherwise 1 for the '}' or (in a choice argument style) the '|'.
* The value indicates the nesting level, starting with 0 for the main message.
* @stable ICU 4.8
*/
MSG_LIMIT,
/**
* Indicates a substring of the pattern string which is to be skipped when formatting.
* For example, an apostrophe that begins or ends quoted text
* would be indicated with such a part.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
SKIP_SYNTAX,
/**
* Indicates that a syntax character needs to be inserted for auto-quoting.
* The length is 0.
* The value is the character code of the insertion character. (U+0027=APOSTROPHE)
* @stable ICU 4.8
*/
INSERT_CHAR,
/**
* Indicates a syntactic (non-escaped) # symbol in a plural variant.
* When formatting, replace this part's substring with the
* (value-offset) for the plural argument value.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
REPLACE_NUMBER,
/**
* Start of an argument.
* The length is 1 for the '{'.
* The value is the ordinal value of the ArgType. Use getArgType().
*
* This part is followed by either an ARG_NUMBER or ARG_NAME,
* followed by optional argument sub-parts (see ArgType constants)
* and finally an ARG_LIMIT part.
* @stable ICU 4.8
*/
ARG_START,
/**
* End of an argument.
* The length is 1 for the '}'.
* The value is the ordinal value of the ArgType. Use getArgType().
* @stable ICU 4.8
*/
ARG_LIMIT,
/**
* The argument number, provided by the value.
* @stable ICU 4.8
*/
ARG_NUMBER,
/**
* The argument name.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
ARG_NAME,
/**
* The argument type.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
ARG_TYPE,
/**
* The argument style text.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
ARG_STYLE,
/**
* A selector substring in a "complex" argument style.
* The value is undefined and currently always 0.
* @stable ICU 4.8
*/
ARG_SELECTOR,
/**
* An integer value, for example the offset or an explicit selector value
* in a PluralFormat style.
* The part value is the integer value.
* @stable ICU 4.8
*/
ARG_INT,
/**
* A numeric value, for example the offset or an explicit selector value
* in a PluralFormat style.
* The part value is an index into an internal array of numeric values;
* use getNumericValue().
* @stable ICU 4.8
*/
ARG_DOUBLE;
/**
* Indicates whether this part has a numeric value.
* If so, then that numeric value can be retrieved via {@link MessagePattern#getNumericValue(Part)}.
* @return true if this part has a numeric value.
* @stable ICU 4.8
*/
public boolean hasNumericValue() {
return this==ARG_INT || this==ARG_DOUBLE;
}
}
/**
* @return a string representation of this part.
* @stable ICU 4.8
*/
@Override
public String toString() {
String valueString=(type==Type.ARG_START || type==Type.ARG_LIMIT) ?
getArgType().name() : Integer.toString(value);
return type.name()+"("+valueString+")@"+index;
}
/**
* @param other another object to compare with.
* @return true if this object is equivalent to the other one.
* @stable ICU 4.8
*/
@Override
public boolean equals(Object other) {
if(this==other) {
return true;
}
if(other==null || getClass()!=other.getClass()) {
return false;
}
Part o=(Part)other;
return
type.equals(o.type) &&
index==o.index &&
length==o.length &&
value==o.value &&
limitPartIndex==o.limitPartIndex;
}
/**
* {@inheritDoc}
* @stable ICU 4.8
*/
@Override
public int hashCode() {
return ((type.hashCode()*37+index)*37+length)*37+value;
}
private static final int MAX_LENGTH=0xffff;
private static final int MAX_VALUE=Short.MAX_VALUE;
// Some fields are not final because they are modified during pattern parsing.
// After pattern parsing, the parts are effectively immutable.
private final Type type;
private final int index;
private final char length;
private short value;
private int limitPartIndex;
}
/**
* Argument type constants.
* Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
*
* Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
* with a nesting level one greater than the surrounding message.
* @stable ICU 4.8
*/
public enum ArgType {
/**
* The argument has no specified type.
* @stable ICU 4.8
*/
NONE,
/**
* The argument has a "simple" type which is provided by the ARG_TYPE part.
* An ARG_STYLE part might follow that.
* @stable ICU 4.8
*/
SIMPLE,
/**
* The argument is a ChoiceFormat with one or more
* ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
* @stable ICU 4.8
*/
CHOICE,
/**
* The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
* (e.g., offset:1)
* and one or more (ARG_SELECTOR [explicit-value] message) tuples.
* If the selector has an explicit value (e.g., =2), then
* that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
* Otherwise the message immediately follows the ARG_SELECTOR.
* @stable ICU 4.8
*/
PLURAL,
/**
* The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
* @stable ICU 4.8
*/
SELECT,
/**
* The argument is an ordinal-number PluralFormat
* with the same style parts sequence and semantics as {@link ArgType#PLURAL}.
* @stable ICU 50
*/
SELECTORDINAL;
/**
* @return true if the argument type has a plural style part sequence and semantics,
* for example {@link ArgType#PLURAL} and {@link ArgType#SELECTORDINAL}.
* @stable ICU 50
*/
public boolean hasPluralStyle() {
return this == PLURAL || this == SELECTORDINAL;
}
}
/**
* Creates and returns a copy of this object.
* @return a copy of this object (or itself if frozen).
* @stable ICU 4.8
*/
@Override
public Object clone() {
if(isFrozen()) {
return this;
} else {
return cloneAsThawed();
}
}
/**
* Creates and returns an unfrozen copy of this object.
* @return a copy of this object.
* @stable ICU 4.8
*/
@Override
@SuppressWarnings("unchecked")
public MessagePattern cloneAsThawed() {
MessagePattern newMsg;
try {
newMsg=(MessagePattern)super.clone();
} catch (CloneNotSupportedException e) {
throw new ICUCloneNotSupportedException(e);
}
newMsg.parts=(ArrayList)parts.clone();
if(numericValues!=null) {
newMsg.numericValues=(ArrayList)numericValues.clone();
}
newMsg.frozen=false;
return newMsg;
}
/**
* Freezes this object, making it immutable and thread-safe.
* @return this
* @stable ICU 4.8
*/
@Override
public MessagePattern freeze() {
frozen=true;
return this;
}
/**
* Determines whether this object is frozen (immutable) or not.
* @return true if this object is frozen.
* @stable ICU 4.8
*/
@Override
public boolean isFrozen() {
return frozen;
}
private void preParse(String pattern) {
if(isFrozen()) {
throw new UnsupportedOperationException(
"Attempt to parse("+prefix(pattern)+") on frozen MessagePattern instance.");
}
msg=pattern;
hasArgNames=hasArgNumbers=false;
needsAutoQuoting=false;
parts.clear();
if(numericValues!=null) {
numericValues.clear();
}
}
private void postParse() {
// Nothing to be done currently.
}
private int parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType) {
if(nestingLevel>Part.MAX_VALUE) {
throw new IndexOutOfBoundsException();
}
int msgStart=parts.size();
addPart(Part.Type.MSG_START, index, msgStartLength, nestingLevel);
index+=msgStartLength;
while(index=0) {
if((index+1)0 && c=='}') || (parentType==ArgType.CHOICE && c=='|')) {
// Finish the message before the terminator.
// In a choice style, report the "}" substring only for the following ARG_LIMIT,
// not for this MSG_LIMIT.
int limitLength=(parentType==ArgType.CHOICE && c=='}') ? 0 : 1;
addLimitPart(msgStart, Part.Type.MSG_LIMIT, index-1, limitLength, nestingLevel);
if(parentType==ArgType.CHOICE) {
// Let the choice style parser see the '}' or '|'.
return index-1;
} else {
// continue parsing after the '}'
return index;
}
} // else: c is part of literal text
}
if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
throw new IllegalArgumentException(
"Unmatched '{' braces in message "+prefix());
}
addLimitPart(msgStart, Part.Type.MSG_LIMIT, index, 0, nestingLevel);
return index;
}
private int parseArg(int index, int argStartLength, int nestingLevel) {
int argStart=parts.size();
ArgType argType=ArgType.NONE;
addPart(Part.Type.ARG_START, index, argStartLength, argType.ordinal());
int nameIndex=index=skipWhiteSpace(index+argStartLength);
if(index==msg.length()) {
throw new IllegalArgumentException(
"Unmatched '{' braces in message "+prefix());
}
// parse argument name or number
index=skipIdentifier(index);
int number=parseArgNumber(nameIndex, index);
if(number>=0) {
int length=index-nameIndex;
if(length>Part.MAX_LENGTH || number>Part.MAX_VALUE) {
throw new IndexOutOfBoundsException(
"Argument number too large: "+prefix(nameIndex));
}
hasArgNumbers=true;
addPart(Part.Type.ARG_NUMBER, nameIndex, length, number);
} else if(number==ARG_NAME_NOT_NUMBER) {
int length=index-nameIndex;
if(length>Part.MAX_LENGTH) {
throw new IndexOutOfBoundsException(
"Argument name too long: "+prefix(nameIndex));
}
hasArgNames=true;
addPart(Part.Type.ARG_NAME, nameIndex, length, 0);
} else { // number<-1 (ARG_NAME_NOT_VALID)
throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
}
index=skipWhiteSpace(index);
if(index==msg.length()) {
throw new IllegalArgumentException(
"Unmatched '{' braces in message "+prefix());
}
char c=msg.charAt(index);
if(c=='}') {
// all done
} else if(c!=',') {
throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
} else /* ',' */ {
// parse argument type: case-sensitive a-zA-Z
int typeIndex=index=skipWhiteSpace(index+1);
while(indexPart.MAX_LENGTH) {
throw new IndexOutOfBoundsException(
"Argument type name too long: "+prefix(nameIndex));
}
argType=ArgType.SIMPLE;
if(length==6) {
// case-insensitive comparisons for complex-type names
if(isChoice(typeIndex)) {
argType=ArgType.CHOICE;
} else if(isPlural(typeIndex)) {
argType=ArgType.PLURAL;
} else if(isSelect(typeIndex)) {
argType=ArgType.SELECT;
}
} else if(length==13) {
if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
argType=ArgType.SELECTORDINAL;
}
}
// change the ARG_START type from NONE to argType
parts.get(argStart).value=(short)argType.ordinal();
if(argType==ArgType.SIMPLE) {
addPart(Part.Type.ARG_TYPE, typeIndex, length, 0);
}
// look for an argument style (pattern)
if(c=='}') {
if(argType!=ArgType.SIMPLE) {
throw new IllegalArgumentException(
"No style field for complex argument: "+prefix(nameIndex));
}
} else /* ',' */ {
++index;
if(argType==ArgType.SIMPLE) {
index=parseSimpleStyle(index);
} else if(argType==ArgType.CHOICE) {
index=parseChoiceStyle(index, nestingLevel);
} else {
index=parsePluralOrSelectStyle(argType, index, nestingLevel);
}
}
}
// Argument parsing stopped on the '}'.
addLimitPart(argStart, Part.Type.ARG_LIMIT, index, 1, argType.ordinal());
return index+1;
}
private int parseSimpleStyle(int index) {
int start=index;
int nestedBraces=0;
while(index0) {
--nestedBraces;
} else {
int length=--index-start;
if(length>Part.MAX_LENGTH) {
throw new IndexOutOfBoundsException(
"Argument style text too long: "+prefix(start));
}
addPart(Part.Type.ARG_STYLE, start, length, 0);
return index;
}
} // c is part of literal text
}
throw new IllegalArgumentException(
"Unmatched '{' braces in message "+prefix());
}
private int parseChoiceStyle(int index, int nestingLevel) {
int start=index;
index=skipWhiteSpace(index);
if(index==msg.length() || msg.charAt(index)=='}') {
throw new IllegalArgumentException(
"Missing choice argument pattern in "+prefix());
}
for(;;) {
// The choice argument style contains |-separated (number, separator, message) triples.
// Parse the number.
int numberIndex=index;
index=skipDouble(index);
int length=index-numberIndex;
if(length==0) {
throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
}
if(length>Part.MAX_LENGTH) {
throw new IndexOutOfBoundsException(
"Choice number too long: "+prefix(numberIndex));
}
parseDouble(numberIndex, index, true); // adds ARG_INT or ARG_DOUBLE
// Parse the separator.
index=skipWhiteSpace(index);
if(index==msg.length()) {
throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
}
char c=msg.charAt(index);
if(!(c=='#' || c=='<' || c=='\u2264')) { // U+2264 is <=
throw new IllegalArgumentException(
"Expected choice separator (#<\u2264) instead of '"+c+
"' in choice pattern "+prefix(start));
}
addPart(Part.Type.ARG_SELECTOR, index, 1, 0);
// Parse the message fragment.
index=parseMessage(++index, 0, nestingLevel+1, ArgType.CHOICE);
// parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
if(index==msg.length()) {
return index;
}
if(msg.charAt(index)=='}') {
if(!inMessageFormatPattern(nestingLevel)) {
throw new IllegalArgumentException(
"Bad choice pattern syntax: "+prefix(start));
}
return index;
} // else the terminator is '|'
index=skipWhiteSpace(index+1);
}
}
private int parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel) {
int start=index;
boolean isEmpty=true;
boolean hasOther=false;
for(;;) {
// First, collect the selector looking for a small set of terminators.
// It would be a little faster to consider the syntax of each possible
// token right here, but that makes the code too complicated.
index=skipWhiteSpace(index);
boolean eos=index==msg.length();
if(eos || msg.charAt(index)=='}') {
if(eos==inMessageFormatPattern(nestingLevel)) {
throw new IllegalArgumentException(
"Bad "+
argType.toString().toLowerCase(Locale.ENGLISH)+
" pattern syntax: "+prefix(start));
}
if(!hasOther) {
throw new IllegalArgumentException(
"Missing 'other' keyword in "+
argType.toString().toLowerCase(Locale.ENGLISH)+
" pattern in "+prefix());
}
return index;
}
int selectorIndex=index;
if(argType.hasPluralStyle() && msg.charAt(selectorIndex)=='=') {
// explicit-value plural selector: =double
index=skipDouble(index+1);
int length=index-selectorIndex;
if(length==1) {
throw new IllegalArgumentException(
"Bad "+
argType.toString().toLowerCase(Locale.ENGLISH)+
" pattern syntax: "+prefix(start));
}
if(length>Part.MAX_LENGTH) {
throw new IndexOutOfBoundsException(
"Argument selector too long: "+prefix(selectorIndex));
}
addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
parseDouble(selectorIndex+1, index, false); // adds ARG_INT or ARG_DOUBLE
} else {
index=skipIdentifier(index);
int length=index-selectorIndex;
if(length==0) {
throw new IllegalArgumentException(
"Bad "+
argType.toString().toLowerCase(Locale.ENGLISH)+
" pattern syntax: "+prefix(start));
}
// Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
if( argType.hasPluralStyle() && length==6 && indexPart.MAX_LENGTH) {
throw new IndexOutOfBoundsException(
"Plural offset value too long: "+prefix(valueIndex));
}
parseDouble(valueIndex, index, false); // adds ARG_INT or ARG_DOUBLE
isEmpty=false;
continue; // no message fragment after the offset
} else {
// normal selector word
if(length>Part.MAX_LENGTH) {
throw new IndexOutOfBoundsException(
"Argument selector too long: "+prefix(selectorIndex));
}
addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
if(msg.regionMatches(selectorIndex, "other", 0, length)) {
hasOther=true;
}
}
}
// parse the message fragment following the selector
index=skipWhiteSpace(index);
if(index==msg.length() || msg.charAt(index)!='{') {
throw new IllegalArgumentException(
"No message fragment after "+
argType.toString().toLowerCase(Locale.ENGLISH)+
" selector: "+prefix(selectorIndex));
}
index=parseMessage(index, 1, nestingLevel+1, argType);
isEmpty=false;
}
}
/**
* Validates and parses an argument name or argument number string.
* This internal method assumes that the input substring is a "pattern identifier".
* @return >=0 if the name is a valid number,
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
* ARG_NAME_NOT_VALID (-2) if it is neither.
* @see #validateArgumentName(String)
*/
private static int parseArgNumber(CharSequence s, int start, int limit) {
// If the identifier contains only ASCII digits, then it is an argument _number_
// and must not have leading zeros (except "0" itself).
// Otherwise it is an argument _name_.
if(start>=limit) {
return ARG_NAME_NOT_VALID;
}
int number;
// Defer numeric errors until we know there are only digits.
boolean badNumber;
char c=s.charAt(start++);
if(c=='0') {
if(start==limit) {
return 0;
} else {
number=0;
badNumber=true; // leading zero
}
} else if('1'<=c && c<='9') {
number=c-'0';
badNumber=false;
} else {
return ARG_NAME_NOT_NUMBER;
}
while(start=Integer.MAX_VALUE/10) {
badNumber=true; // overflow
}
number=number*10+(c-'0');
} else {
return ARG_NAME_NOT_NUMBER;
}
}
// There are only ASCII digits.
if(badNumber) {
return ARG_NAME_NOT_VALID;
} else {
return number;
}
}
private int parseArgNumber(int start, int limit) {
return parseArgNumber(msg, start, limit);
}
/**
* Parses a number from the specified message substring.
* @param start start index into the message string
* @param limit limit index into the message string, must be start(Part.MAX_VALUE+isNegative)) {
break; // not a small-enough integer
}
if(index==limit) {
addPart(Part.Type.ARG_INT, start, limit-start, isNegative!=0 ? -value : value);
return;
}
c=msg.charAt(index++);
}
// Let Double.parseDouble() throw a NumberFormatException.
double numericValue=Double.parseDouble(msg.substring(start, limit));
addArgDoublePart(numericValue, start, limit-start);
return;
}
throw new NumberFormatException(
"Bad syntax for numeric value: "+msg.substring(start, limit));
}
/**
* Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
* according to JDK pattern behavior.
* @internal
*/
/* package */ static void appendReducedApostrophes(String s, int start, int limit,
StringBuilder sb) {
int doubleApos=-1;
for(;;) {
int i=s.indexOf('\'', start);
if(i<0 || i>=limit) {
sb.append(s, start, limit);
break;
}
if(i==doubleApos) {
// Double apostrophe at start-1 and start==i, append one.
sb.append('\'');
++start;
doubleApos=-1;
} else {
// Append text between apostrophes and skip this one.
sb.append(s, start, i);
doubleApos=start=i+1;
}
}
}
private int skipWhiteSpace(int index) {
return PatternProps.skipWhiteSpace(msg, index);
}
private int skipIdentifier(int index) {
return PatternProps.skipIdentifier(msg, index);
}
/**
* Skips a sequence of characters that could occur in a double value.
* Does not fully parse or validate the value.
*/
private int skipDouble(int index) {
while(index'9' && c!='e' && c!='E' && c!=0x221e)) {
break;
}
++index;
}
return index;
}
private static boolean isArgTypeChar(int c) {
return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
}
private boolean isChoice(int index) {
char c;
return
((c=msg.charAt(index++))=='c' || c=='C') &&
((c=msg.charAt(index++))=='h' || c=='H') &&
((c=msg.charAt(index++))=='o' || c=='O') &&
((c=msg.charAt(index++))=='i' || c=='I') &&
((c=msg.charAt(index++))=='c' || c=='C') &&
((c=msg.charAt(index))=='e' || c=='E');
}
private boolean isPlural(int index) {
char c;
return
((c=msg.charAt(index++))=='p' || c=='P') &&
((c=msg.charAt(index++))=='l' || c=='L') &&
((c=msg.charAt(index++))=='u' || c=='U') &&
((c=msg.charAt(index++))=='r' || c=='R') &&
((c=msg.charAt(index++))=='a' || c=='A') &&
((c=msg.charAt(index))=='l' || c=='L');
}
private boolean isSelect(int index) {
char c;
return
((c=msg.charAt(index++))=='s' || c=='S') &&
((c=msg.charAt(index++))=='e' || c=='E') &&
((c=msg.charAt(index++))=='l' || c=='L') &&
((c=msg.charAt(index++))=='e' || c=='E') &&
((c=msg.charAt(index++))=='c' || c=='C') &&
((c=msg.charAt(index))=='t' || c=='T');
}
private boolean isOrdinal(int index) {
char c;
return
((c=msg.charAt(index++))=='o' || c=='O') &&
((c=msg.charAt(index++))=='r' || c=='R') &&
((c=msg.charAt(index++))=='d' || c=='D') &&
((c=msg.charAt(index++))=='i' || c=='I') &&
((c=msg.charAt(index++))=='n' || c=='N') &&
((c=msg.charAt(index++))=='a' || c=='A') &&
((c=msg.charAt(index))=='l' || c=='L');
}
/**
* @return true if we are inside a MessageFormat (sub-)pattern,
* as opposed to inside a top-level choice/plural/select pattern.
*/
private boolean inMessageFormatPattern(int nestingLevel) {
return nestingLevel>0 || parts.get(0).type==Part.Type.MSG_START;
}
/**
* @return true if we are in a MessageFormat sub-pattern
* of a top-level ChoiceFormat pattern.
*/
private boolean inTopLevelChoiceMessage(int nestingLevel, ArgType parentType) {
return
nestingLevel==1 &&
parentType==ArgType.CHOICE &&
parts.get(0).type!=Part.Type.MSG_START;
}
private void addPart(Part.Type type, int index, int length, int value) {
parts.add(new Part(type, index, length, value));
}
private void addLimitPart(int start, Part.Type type, int index, int length, int value) {
parts.get(start).limitPartIndex=parts.size();
addPart(type, index, length, value);
}
private void addArgDoublePart(double numericValue, int start, int length) {
int numericIndex;
if(numericValues==null) {
numericValues=new ArrayList();
numericIndex=0;
} else {
numericIndex=numericValues.size();
if(numericIndex>Part.MAX_VALUE) {
throw new IndexOutOfBoundsException("Too many numeric values");
}
}
numericValues.add(numericValue);
addPart(Part.Type.ARG_DOUBLE, start, length, numericIndex);
}
private static final int MAX_PREFIX_LENGTH=24;
/**
* Returns a prefix of s.substring(start). Used for Exception messages.
* @param s
* @param start start index in s
* @return s.substring(start) or a prefix of that
*/
private static String prefix(String s, int start) {
StringBuilder prefix=new StringBuilder(MAX_PREFIX_LENGTH+20);
if(start==0) {
prefix.append("\"");
} else {
prefix.append("[at pattern index ").append(start).append("] \"");
}
int substringLength=s.length()-start;
if(substringLength<=MAX_PREFIX_LENGTH) {
prefix.append(start==0 ? s : s.substring(start));
} else {
int limit=start+MAX_PREFIX_LENGTH-4;
if(Character.isHighSurrogate(s.charAt(limit-1))) {
// remove lead surrogate from the end of the prefix
--limit;
}
prefix.append(s, start, limit).append(" ...");
}
return prefix.append("\"").toString();
}
private static String prefix(String s) {
return prefix(s, 0);
}
private String prefix(int start) {
return prefix(msg, start);
}
private String prefix() {
return prefix(msg, 0);
}
private ApostropheMode aposMode;
private String msg;
private ArrayList parts=new ArrayList();
private ArrayList numericValues;
private boolean hasArgNames;
private boolean hasArgNumbers;
private boolean needsAutoQuoting;
private volatile boolean frozen;
private static final ApostropheMode defaultAposMode=
ApostropheMode.valueOf(
ICUConfig.get("com.ibm.icu.text.MessagePattern.ApostropheMode", "DOUBLE_OPTIONAL"));
private static final ArgType[] argTypes=ArgType.values();
}