All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.message2.MFParser Maven / Gradle / Ivy

// © 2022 and later: Unicode, Inc. and others.
// License & terms of use: https://www.unicode.org/copyright.html

package com.ibm.icu.message2;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * This class parses a {@code MessageFormat 2} syntax into a data model {@link MFDataModel.Message}.
 *
 * 

It is used by {@link MessageFormatter}, but it might be handy for various tools.

* * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ @Deprecated public class MFParser { private static final int EOF = -1; private final InputSource input; MFParser(String text) { this.input = new InputSource(text); } /** * Parses a {@code MessageFormat 2} syntax into a {@link MFDataModel.Message}. * *

It is used by {@link MessageFormatter}, but it might be handy for various tools.

* @param input the text to parse * @return the parsed {@code MFDataModel.Message} * @throws MFParseException if errors are detected * * @internal ICU 75 technology preview * @deprecated This API is for technology preview only. */ @Deprecated public static MFDataModel.Message parse(String input) throws MFParseException { return new MFParser(input).parseImpl(); } // Parser proper private MFDataModel.Message parseImpl() throws MFParseException { MFDataModel.Message result; int cp = input.peekChar(); if (cp == '.') { // declarations or .match result = getComplexMessage(); } else if (cp == '{') { // `{` or `{{` cp = input.readCodePoint(); cp = input.peekChar(); if (cp == '{') { // `{{`, complex body without declarations input.backup(1); // let complexBody deal with the wrapping {{ and }} MFDataModel.Pattern pattern = getQuotedPattern(); result = new MFDataModel.PatternMessage(new ArrayList<>(), pattern); } else { // placeholder input.backup(1); // We want the '{' present, to detect the part as placeholder. MFDataModel.Pattern pattern = getPattern(); result = new MFDataModel.PatternMessage(new ArrayList<>(), pattern); } } else { MFDataModel.Pattern pattern = getPattern(); result = new MFDataModel.PatternMessage(new ArrayList<>(), pattern); } skipOptionalWhitespaces(); checkCondition(input.atEnd(), "Content detected after the end of the message."); new MFDataModelValidator(result).validate(); return result; } // abnf: simple-message = [simple-start pattern] // abnf: simple-start = simple-start-char / text-escape / placeholder // abnf: pattern = *(text-char / text-escape / placeholder) private MFDataModel.Pattern getPattern() throws MFParseException { MFDataModel.Pattern pattern = new MFDataModel.Pattern(); while (true) { MFDataModel.PatternPart part = getPatternPart(); if (part == null) { break; } pattern.parts.add(part); } // checkCondition(!pattern.parts.isEmpty(), "Empty pattern"); return pattern; } private MFDataModel.PatternPart getPatternPart() throws MFParseException { int cp = input.peekChar(); switch (cp) { case EOF: return null; case '}': // This is the end, otherwise it would be escaped return null; case '{': MFDataModel.Expression ph = getPlaceholder(); return ph; default: String plainText = getText(); MFDataModel.StringPart sp = new MFDataModel.StringPart(plainText); return sp; } } private String getText() { StringBuilder result = new StringBuilder(); while (true) { int cp = input.readCodePoint(); switch (cp) { case EOF: return result.toString(); case '\\': cp = input.readCodePoint(); if (cp == '\\' || cp == '{' || cp == '|' | cp == '}') { result.appendCodePoint(cp); } else { // TODO: Error, treat invalid escape? result.appendCodePoint('\\'); result.appendCodePoint(cp); } break; case '.': case '@': case '|': result.appendCodePoint(cp); break; default: if (StringUtils.isContentChar(cp) || StringUtils.isWhitespace(cp)) { result.appendCodePoint(cp); } else { input.backup(1); return result.toString(); } } } } // abnf: placeholder = expression / markup // abnf: expression = literal-expression // abnf: / variable-expression // abnf: / annotation-expression // abnf: literal-expression = "{" [s] literal [s annotation] *(s attribute) [s] "}" // abnf: variable-expression = "{" [s] variable [s annotation] *(s attribute) [s] "}" // abnf: annotation-expression = "{" [s] annotation *(s attribute) [s] "}" // abnf: markup = "{" [s] "#" identifier *(s option) *(s attribute) [s] ["/"] "}" ; open and standalone // abnf: / "{" [s] "/" identifier *(s option) *(s attribute) [s] "}" ; close private MFDataModel.Expression getPlaceholder() throws MFParseException { int cp = input.peekChar(); if (cp != '{') { return null; } input.readCodePoint(); // consume the '{' skipOptionalWhitespaces(); cp = input.peekChar(); MFDataModel.Expression result; if (cp == '#' || cp == '/') { result = getMarkup(); } else if (cp == '$') { result = getVariableExpression(); } else if (StringUtils.isFunctionSigil(cp) || StringUtils.isPrivateAnnotationSigil(cp) || StringUtils.isReservedAnnotationSigil(cp)) { result = getAnnotationExpression(); } else { result = getLiteralExpression(); } skipOptionalWhitespaces(); cp = input.readCodePoint(); // consume the '}' checkCondition(cp == '}', "Unclosed placeholder"); return result; } private MFDataModel.Annotation getAnnotation() throws MFParseException { int position = input.getPosition(); skipOptionalWhitespaces(); int cp = input.peekChar(); switch (cp) { case '}': return null; case ':': // annotation, function // abnf: function = ":" identifier *(s option) input.readCodePoint(); // Consume the sigil String identifier = getIdentifier(); checkCondition(identifier != null, "Annotation / function name missing"); Map options = getOptions(); return new MFDataModel.FunctionAnnotation(identifier, options); default: // reserved && private if (StringUtils.isReservedAnnotationSigil(cp) || StringUtils.isPrivateAnnotationSigil(cp)) { cp = input.readCodePoint(); // The sigil is part of the body. // Safe to cast to char, the code point is in BMP identifier = (char) cp + getIdentifier(); String body = getReservedBody(); return new MFDataModel.UnsupportedAnnotation(identifier + body); } } input.gotoPosition(position); return null; } private MFDataModel.Annotation getMarkupAnnotation() throws MFParseException { skipOptionalWhitespaces(); int cp = input.peekChar(); switch (cp) { case '}': return null; case '#': case '/': // abnf: markup = "{" [s] "#" identifier *(s option) *(s attribute) [s] ["/"] "}" ; open and standalone // abnf: / "{" [s] "/" identifier *(s option) *(s attribute) [s] "}" ; close input.readCodePoint(); // Consume the sigil String identifier = getIdentifier(); checkCondition(identifier != null, "Annotation / function name missing"); Map options = getOptions(); return new MFDataModel.FunctionAnnotation(identifier, options); default: // reserved, private, function, something else, return null; } } // abnf: literal-expression = "{" [s] literal [s annotation] *(s attribute) [s] "}" private MFDataModel.Expression getLiteralExpression() throws MFParseException { MFDataModel.Literal literal = getLiteral(); checkCondition(literal != null, "Literal expression expected."); MFDataModel.Annotation annotation = null; int wsCount = skipWhitespaces(); if (wsCount > 0) { // we might have an annotation annotation = getAnnotation(); if (annotation == null) { // We had some spaces, but no annotation. // So we put (some) back for the possible attributes. input.backup(1); } } List attributes = getAttributes(); // Literal without a function, for example {|hello|} or {123} return new MFDataModel.LiteralExpression(literal, annotation, attributes); } // abnf: variable-expression = "{" [s] variable [s annotation] *(s attribute) [s] "}" private MFDataModel.VariableExpression getVariableExpression() throws MFParseException { MFDataModel.VariableRef variableRef = getVariableRef(); MFDataModel.Annotation annotation = getAnnotation(); List attributes = getAttributes(); // Variable without a function, for example {$foo} return new MFDataModel.VariableExpression(variableRef, annotation, attributes); } // abnf: annotation-expression = "{" [s] annotation *(s attribute) [s] "}" private MFDataModel.Expression getAnnotationExpression() throws MFParseException { MFDataModel.Annotation annotation = getAnnotation(); List attributes = getAttributes(); if (annotation instanceof MFDataModel.FunctionAnnotation) { return new MFDataModel.FunctionExpression( (MFDataModel.FunctionAnnotation) annotation, attributes); } else if (annotation instanceof MFDataModel.UnsupportedAnnotation) { return new MFDataModel.UnsupportedExpression( (MFDataModel.UnsupportedAnnotation) annotation, attributes); } else { error("Unexpected annotation : " + annotation); } return null; } // abnf: markup = "{" [s] "#" identifier *(s option) *(s attribute) [s] ["/"] "}" ; open and standalone // abnf: / "{" [s] "/" identifier *(s option) *(s attribute) [s] "}" ; close private MFDataModel.Markup getMarkup() throws MFParseException { int cp = input.peekChar(); // consume the '{' checkCondition(cp == '#' || cp == '/', "Should not happen. Expecting a markup."); MFDataModel.Markup.Kind kind = cp == '/' ? MFDataModel.Markup.Kind.CLOSE : MFDataModel.Markup.Kind.OPEN; MFDataModel.Annotation annotation = getMarkupAnnotation(); List attributes = getAttributes(); cp = input.peekChar(); if (cp == '/') { kind = MFDataModel.Markup.Kind.STANDALONE; input.readCodePoint(); } if (annotation instanceof MFDataModel.FunctionAnnotation) { MFDataModel.FunctionAnnotation fa = (MFDataModel.FunctionAnnotation) annotation; return new MFDataModel.Markup(kind, fa.name, fa.options, attributes); } return null; } private List getAttributes() throws MFParseException { List result = new ArrayList<>(); while (true) { MFDataModel.Attribute attribute = getAttribute(); if (attribute == null) { break; } result.add(attribute); } return result; } // abnf: attribute = "@" identifier [[s] "=" [s] (literal / variable)] private MFDataModel.Attribute getAttribute() throws MFParseException { int position = input.getPosition(); if (skipWhitespaces() == 0) { input.gotoPosition(position); return null; } int cp = input.peekChar(); if (cp == '@') { input.readCodePoint(); // consume the '@' String id = getIdentifier(); int wsCount = skipWhitespaces(); cp = input.peekChar(); MFDataModel.LiteralOrVariableRef literalOrVariable = null; if (cp == '=') { input.readCodePoint(); skipOptionalWhitespaces(); literalOrVariable = getLiteralOrVariableRef(); checkCondition(literalOrVariable != null, "Attributes must have a value after `=`"); } else { // was not equal, attribute without a value, put the "spaces" back. input.backup(wsCount); } return new MFDataModel.Attribute(id, literalOrVariable); } else { input.gotoPosition(position); } return null; } // abnf: reserved-body = *([s] 1*(reserved-char / reserved-escape / quoted)) // abnf: reserved-escape = backslash ( backslash / "{" / "|" / "}" ) private String getReservedBody() throws MFParseException { int spaceCount = skipWhitespaces(); StringBuilder result = new StringBuilder(); while (true) { int cp = input.readCodePoint(); if (StringUtils.isReservedChar(cp)) { result.appendCodePoint(cp); } else if (cp == '\\') { cp = input.readCodePoint(); checkCondition( cp == '{' || cp == '|' || cp == '}', "Invalid escape sequence. Only \\{, \\| and \\} are valid here."); result.append(cp); } else if (cp == '|') { input.backup(1); MFDataModel.Literal quoted = getQuotedLiteral(); result.append(quoted.value); } else if (cp == EOF) { return result.toString(); } else { if (result.length() == 0) { input.backup(spaceCount + 1); return ""; } else { input.backup(1); return result.toString(); } } } } // abnf: identifier = [namespace ":"] name // abnf: namespace = name // abnf: name = name-start *name-char private String getIdentifier() throws MFParseException { String namespace = getName(); if (namespace == null) { return null; } int cp = input.readCodePoint(); if (cp == ':') { // the previous name was namespace String name = getName(); checkCondition(name != null, "Expected name after namespace '" + namespace + "'"); return namespace + ":" + name; } else { input.backup(1); } return namespace; } // abnf helper: *(s option) private Map getOptions() throws MFParseException { Map options = new LinkedHashMap<>(); while (true) { MFDataModel.Option option = getOption(); if (option == null) { break; } if (options.containsKey(option.name)) { error("Duplicated option '" + option.name + "'"); } options.put(option.name, option); } return options; } // abnf: option = identifier [s] "=" [s] (literal / variable) private MFDataModel.Option getOption() throws MFParseException { int position = input.getPosition(); skipOptionalWhitespaces(); String identifier = getIdentifier(); if (identifier == null) { input.gotoPosition(position); return null; } skipOptionalWhitespaces(); int cp = input.readCodePoint(); checkCondition(cp == '=', "Expected '='"); // skipOptionalWhitespaces(); MFDataModel.LiteralOrVariableRef litOrVar = getLiteralOrVariableRef(); if (litOrVar == null) { error("Options must have a value. An empty string should be quoted."); } return new MFDataModel.Option(identifier, litOrVar); } private MFDataModel.LiteralOrVariableRef getLiteralOrVariableRef() throws MFParseException { int cp = input.peekChar(); if (cp == '$') { return getVariableRef(); } return getLiteral(); } // abnf: literal = quoted / unquoted private MFDataModel.Literal getLiteral() throws MFParseException { int cp = input.readCodePoint(); switch (cp) { case '|': // quoted // abnf: quoted = "|" *(quoted-char / quoted-escape) "|" input.backup(1); MFDataModel.Literal ql = getQuotedLiteral(); return ql; default: // unquoted input.backup(1); MFDataModel.Literal unql = getUnQuotedLiteral(); return unql; } } private MFDataModel.VariableRef getVariableRef() throws MFParseException { int cp = input.readCodePoint(); if (cp != '$') { checkCondition(cp == '$', "We can't get here"); } // abnf: variable = "$" name String name = getName(); checkCondition(name != null, "Invalid variable reference following $"); return new MFDataModel.VariableRef(name); } private MFDataModel.Literal getQuotedLiteral() throws MFParseException { StringBuilder result = new StringBuilder(); int cp = input.readCodePoint(); checkCondition(cp == '|', "expected starting '|'"); while (true) { cp = input.readCodePoint(); if (cp == EOF) { break; } else if (StringUtils.isQuotedChar(cp)) { result.appendCodePoint(cp); } else if (cp == '\\') { cp = input.readCodePoint(); checkCondition(cp == '|', "Invalid escape sequence, only \"\\|\" is valid here"); result.appendCodePoint('|'); } else { break; } } checkCondition(cp == '|', "expected ending '|'"); return new MFDataModel.Literal(result.toString()); } private MFDataModel.Literal getUnQuotedLiteral() throws MFParseException { String name = getName(); if (name != null) { return new MFDataModel.Literal(name); } return getNumberLiteral(); } // abnf: ; number-literal matches JSON number (https://www.rfc-editor.org/rfc/rfc8259#section-6) // abnf: number-literal = ["-"] (%x30 / (%x31-39 *DIGIT)) ["." 1*DIGIT] [%i"e" ["-" / "+"] 1*DIGIT] private static final Pattern RE_NUMBER_LITERAL = Pattern.compile("^-?(0|[1-9][0-9]*)(\\.[0-9]+)?([eE][+\\-]?[0-9]+)?"); private MFDataModel.Literal getNumberLiteral() { String numberString = peekWithRegExp(RE_NUMBER_LITERAL); if (numberString != null) { return new MFDataModel.Literal(numberString); } return null; } private void skipMandatoryWhitespaces() throws MFParseException { int count = skipWhitespaces(); checkCondition(count > 0, "Space expected"); } private void skipOptionalWhitespaces() { skipWhitespaces(); } private int skipWhitespaces() { int skipCount = 0; while (true) { int cp = input.readCodePoint(); if (cp == EOF) { return skipCount; } if (!StringUtils.isWhitespace(cp)) { input.backup(1); return skipCount; } skipCount++; } } private MFDataModel.Message getComplexMessage() throws MFParseException { List declarations = new ArrayList<>(); boolean foundMatch = false; while (true) { MFDataModel.Declaration declaration = getDeclaration(); if (declaration == null) { break; } if (declaration instanceof MatchDeclaration) { foundMatch = true; break; } declarations.add(declaration); } if (foundMatch) { return getMatch(declarations); } else { // Expect {{...}} or end of message skipOptionalWhitespaces(); int cp = input.peekChar(); if (cp == EOF) { // Only declarations, no pattern return new MFDataModel.PatternMessage(declarations, null); } else { MFDataModel.Pattern pattern = getQuotedPattern(); return new MFDataModel.PatternMessage(declarations, pattern); } } } // abnf: matcher = match-statement 1*([s] variant) // abnf: match-statement = match 1*([s] selector) // abnf: selector = expression // abnf: variant = key *(s key) [s] quoted-pattern // abnf: key = literal / "*" // abnf: match = %s".match" private MFDataModel.SelectMessage getMatch(List declarations) throws MFParseException { // ".match" was already consumed by the caller // Look for selectors List expressions = new ArrayList<>(); while (true) { skipMandatoryWhitespaces(); MFDataModel.Expression expression = getPlaceholder(); if (expression == null) { break; } checkCondition( !(expression instanceof MFDataModel.Markup), "Cannot do selection on markup"); expressions.add(expression); } checkCondition(!expressions.isEmpty(), "There should be at least one selector expression."); // At this point we need to look for variants, which are key - value List variants = new ArrayList<>(); while (true) { MFDataModel.Variant variant = getVariant(); if (variant == null) { break; } variants.add(variant); } return new MFDataModel.SelectMessage(declarations, expressions, variants); } // abnf: variant = key *(s key) [s] quoted-pattern // abnf: key = literal / "*" private MFDataModel.Variant getVariant() throws MFParseException { List keys = new ArrayList<>(); // abnf variant = key *(s key) [s] quoted-pattern while (true) { // Space is required between keys MFDataModel.LiteralOrCatchallKey key = getKey(!keys.isEmpty()); if (key == null) { break; } keys.add(key); } skipOptionalWhitespaces(); if (input.atEnd()) { checkCondition( keys.isEmpty(), "After selector keys it is mandatory to have a pattern."); return null; } MFDataModel.Pattern pattern = getQuotedPattern(); return new MFDataModel.Variant(keys, pattern); } private MFDataModel.LiteralOrCatchallKey getKey(boolean requireSpaces) throws MFParseException { if (requireSpaces) { skipMandatoryWhitespaces(); } else { skipOptionalWhitespaces(); } int cp = input.peekChar(); if (cp == '*') { input.readCodePoint(); // consume the '*' return new MFDataModel.CatchallKey(); } if (cp == EOF) { return null; } return getLiteral(); } private static class MatchDeclaration implements MFDataModel.Declaration { // Provides a common type that extends MFDataModel.Declaration but for match. // There is no such thing in the data model. } // abnf: input-declaration = input [s] variable-expression // abnf: local-declaration = local s variable [s] "=" [s] expression // abnf: reserved-statement = reserved-keyword [s reserved-body] 1*([s] expression) // abnf: reserved-keyword = "." name private MFDataModel.Declaration getDeclaration() throws MFParseException { int position = input.getPosition(); skipOptionalWhitespaces(); int cp = input.readCodePoint(); if (cp != '.') { input.gotoPosition(position); return null; } String declName = getName(); checkCondition(declName != null, "Expected a declaration after the '.'"); MFDataModel.Expression expression; switch (declName) { case "input": skipMandatoryWhitespaces(); expression = getPlaceholder(); String inputVarName = null; if (expression instanceof MFDataModel.VariableExpression) { inputVarName = ((MFDataModel.VariableExpression) expression).arg.name; } if (expression instanceof MFDataModel.VariableExpression) { return new MFDataModel.InputDeclaration( inputVarName, (MFDataModel.VariableExpression) expression); } break; case "local": // abnf: local-declaration = local s variable [s] "=" [s] expression skipMandatoryWhitespaces(); MFDataModel.LiteralOrVariableRef varName = getVariableRef(); skipOptionalWhitespaces(); cp = input.readCodePoint(); checkCondition(cp == '=', declName); skipOptionalWhitespaces(); expression = getPlaceholder(); if (varName instanceof MFDataModel.VariableRef) { return new MFDataModel.LocalDeclaration( ((MFDataModel.VariableRef) varName).name, expression); } break; case "match": return new MatchDeclaration(); default: // abnf: reserved-statement = reserved-keyword [s reserved-body] 1*([s] expression) skipOptionalWhitespaces(); String body = getReservedBody(); List expressions = new ArrayList<>(); while (true) { skipOptionalWhitespaces(); expression = getPlaceholder(); // This also covers != null if (expression instanceof MFDataModel.VariableExpression) { expressions.add(expression); } else { break; } } return new MFDataModel.UnsupportedStatement(declName, body, expressions); } return null; } // quoted-pattern = "{{" pattern "}}" private MFDataModel.Pattern getQuotedPattern() throws MFParseException { // {{ ... }} int cp = input.readCodePoint(); checkCondition(cp == '{', "Expected { for a complex body"); cp = input.readCodePoint(); checkCondition(cp == '{', "Expected second { for a complex body"); MFDataModel.Pattern pattern = getPattern(); cp = input.readCodePoint(); checkCondition(cp == '}', "Expected } to end a complex body"); cp = input.readCodePoint(); checkCondition(cp == '}', "Expected second } to end a complex body"); return pattern; } private String getName() throws MFParseException { StringBuilder result = new StringBuilder(); int cp = input.readCodePoint(); checkCondition(cp != EOF, "Expected name or namespace."); if (!StringUtils.isNameStart(cp)) { input.backup(1); return null; } result.appendCodePoint(cp); while (true) { cp = input.readCodePoint(); if (StringUtils.isNameChar(cp)) { result.appendCodePoint(cp); } else if (cp == EOF) { break; } else { input.backup(1); break; } } return result.toString(); } private void checkCondition(boolean condition, String message) throws MFParseException { if (!condition) { error(message); } } private void error(String message) throws MFParseException { StringBuilder finalMsg = new StringBuilder(); if (input == null) { finalMsg.append("Parse error: "); finalMsg.append(message); } else { int position = input.getPosition(); finalMsg.append("Parse error [" + input.getPosition() + "]: "); finalMsg.append(message); finalMsg.append("\n"); if (position != EOF) { finalMsg.append(input.buffer.substring(0, position)); finalMsg.append("^^^"); finalMsg.append(input.buffer.substring(position)); } else { finalMsg.append(input.buffer); finalMsg.append("^^^"); } } throw new MFParseException(finalMsg.toString(), input.getPosition()); } private String peekWithRegExp(Pattern pattern) { StringView sv = new StringView(input.buffer, input.getPosition()); Matcher m = pattern.matcher(sv); boolean found = m.find(); if (found) { input.skip(m.group().length()); return m.group(); } return null; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy