com.squarespace.cldrengine.message.MessagePatternParser Maven / Gradle / Ivy
The newest version!
package com.squarespace.cldrengine.message;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import com.squarespace.cldrengine.message.MessageMatcher.State;
/**
* Hand-implemented parser for ICU message format. Designed to be compact and fast vs. other implementations. The parser
* produces an instruction tree which can be cached for repeated use, and is intended to be evaluated by a separate
* engine.
*
* Note: The 'choice' formatter is not implemented since it is deprecated.
*
* See ICU docs for details on syntax:
* https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/com/ibm/icu/text/MessageFormat.html
*
* Rationale
*
* The decision to hand-implement the parser was for 2 reasons: 1. Avoiding extra runtime dependencies (parser
* generators, e.g. pegjs, etc) 2. Provide control over the memory allocation, garbage generation, and other aspects
* that impact parser performance.
*
* A major consideration is size of the resulting parser code. The OpenJS project 'messageformat-parser' which is
* generated by Peg.js is 30kB minified. It also requires other dependencies for the plural calculations, where this is
* already supported in our library via @phensley/plurals
*
* See: https://unpkg.com/messageformat-parser/parser.js
*/
public class MessagePatternParser {
private static final MessageCode NOOP = new MessageCode(MessageOpType.NOOP);
private static final char LEFT = '{';
private static final char RIGHT = '}';
private static final char MINUS = '-';
private static final char APOS = '\'';
private static final char POUND = '#';
private final String str;
private final int len;
private final MessageMatcher matcher;
private final boolean disableEscapes;
public MessagePatternParser(Collection formatters, String str, boolean disableEscapes) {
this.str = str;
this.len = str.length();
this.matcher = new MessageMatcher(formatters, str);
this.disableEscapes = disableEscapes;
}
public MessageCode parse() {
State state = new MessageMatcher.State(0, len);
return outer(state, null);
}
public MessageCode outer(MessageMatcher.State r, Object argsub) {
// Accumulate parsed instruction nodes
List n = new ArrayList<>();
// Accumulate plain text characters
StringBuilder buf = new StringBuilder();
while (r.s < r.e) {
char c = str.charAt(r.s);
// Loop for characters which mark the start of a special section
switch (c) {
case LEFT: {
// Push non-empty buffer
if (buf.length() > 0) {
n.add(textarg(buf.toString(), argsub));
buf = new StringBuilder();
}
int sn = r.s + 1;
boolean hidden = sn < str.length() && str.charAt(sn) == MINUS;
int k = seek(r.s, r.e);
if (k == -1) {
n.add(textarg(str.substring(r.s, r.e), argsub));
r.s = r.e;
} else if (hidden) {
// Tag is hidden from processor, emit as text
n.add(new MessageTextCode(LEFT + str.substring(r.s + 2, k + 1)));
// Skip over hidden tag
r.s = k;
} else {
// Process tag interior
MessageCode child = inner(new MessageMatcher.State(r.s + 1, k));
if (child == null) {
// If we're not in the outermost scope, push text
if (argsub != null && r.s + 1 != k) {
n.add(textarg(str.substring(r.s + 1, k), argsub));
}
} else {
n.add(child);
}
// Skip over processed tag
r.s = k;
}
break;
}
case APOS: {
if (disableEscapes) {
buf.append(c);
} else {
int k = r.s + 1;
if (k < len && c == str.charAt(k)) {
// Convert double apostrophe to single
buf.append(c);
r.s++;
} else {
// Skip over apostrophe
r.s++;
// Capture string wrapped in apostrophes
k = str.indexOf(c, r.s);
if (k == -1) {
k = r.e;
}
// Since this is escaped text, push text node without substituting '#'
buf.append(str.substring(r.s, k));
// Skip over escaped text
r.s = k;
}
}
break;
}
default:
// Append plain character to output buffer
buf.append(c);
break;
}
r.s++;
}
// Push any trailing characters
if (buf.length() > 0) {
n.add(textarg(buf.toString(), argsub));
}
// Flatten blocks
return flatten(n);
}
public MessageCode inner(MessageMatcher.State r) {
MessageMatcher m = this.matcher;
// Skip any optional leading spaces
m.spaces(r);
// See if we have any arguments. we must have at least one or we fail this tag.
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy