All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.SimpleFormatterImpl Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 75.1
Show newest version
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 2014-2016, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
package com.ibm.icu.impl;

import java.io.IOException;
import java.text.Format;

import com.ibm.icu.util.ICUUncheckedIOException;

/**
 * Formats simple patterns like "{1} was born in {0}".
 * Internal version of {@link com.ibm.icu.text.SimpleFormatter}
 * with only static methods, to avoid wrapper objects.
 *
 * 

This class "compiles" pattern strings into a binary format * and implements formatting etc. based on that. * *

Format: * Index 0: One more than the highest argument number. * Followed by zero or more arguments or literal-text segments. * *

An argument is stored as its number, less than ARG_NUM_LIMIT. * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT, * followed by that many chars. */ public final class SimpleFormatterImpl { /** * Argument numbers must be smaller than this limit. * Text segment lengths are offset by this much. * This is currently the only unused char value in compiled patterns, * except it is the maximum value of the first unit (max arg +1). */ private static final int ARG_NUM_LIMIT = 0x100; private static final char LEN1_CHAR = (char)(ARG_NUM_LIMIT + 1); private static final char LEN2_CHAR = (char)(ARG_NUM_LIMIT + 2); private static final char LEN3_CHAR = (char)(ARG_NUM_LIMIT + 3); /** * Initial and maximum char/UChar value set for a text segment. * Segment length char values are from ARG_NUM_LIMIT+1 to this value here. * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing. */ private static final char SEGMENT_LENGTH_ARGUMENT_CHAR = (char)0xffff; /** * Maximum length of a text segment. Longer segments are split into shorter ones. */ private static final int MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_ARGUMENT_CHAR - ARG_NUM_LIMIT; /** "Intern" some common patterns. */ private static final String[][] COMMON_PATTERNS = { { "{0} {1}", "\u0002\u0000" + LEN1_CHAR + " \u0001" }, { "{0} ({1})", "\u0002\u0000" + LEN2_CHAR + " (\u0001" + LEN1_CHAR + ')' }, { "{0}, {1}", "\u0002\u0000" + LEN2_CHAR + ", \u0001" }, { "{0} – {1}", "\u0002\u0000" + LEN3_CHAR + " – \u0001" }, // en dash }; /** Use only static methods. */ private SimpleFormatterImpl() {} /** * Creates a compiled form of the pattern string, for use with appropriate static methods. * The number of arguments checked against the given limits is the * highest argument number plus one, not the number of occurrences of arguments. * * @param pattern The pattern string. * @param sb A StringBuilder instance which may or may not be used. * @param min The pattern must have at least this many arguments. * @param max The pattern must have at most this many arguments. * @return The compiled-pattern string. * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments. */ public static String compileToStringMinMaxArguments( CharSequence pattern, StringBuilder sb, int min, int max) { // Return some precompiled common two-argument patterns. if (min <= 2 && 2 <= max) { for (String[] pair : COMMON_PATTERNS) { if (pair[0].contentEquals(pattern)) { assert pair[1].charAt(0) == 2; return pair[1]; } } } // Parse consistent with MessagePattern, but // - support only simple numbered arguments // - build a simple binary structure into the result string int patternLength = pattern.length(); sb.ensureCapacity(patternLength); // Reserve the first char for the number of arguments. sb.setLength(1); int textLength = 0; int maxArg = -1; boolean inQuote = false; for (int i = 0; i < patternLength;) { char c = pattern.charAt(i++); if (c == '\'') { if (i < patternLength && (c = pattern.charAt(i)) == '\'') { // double apostrophe, skip the second one ++i; } else if (inQuote) { // skip the quote-ending apostrophe inQuote = false; continue; } else if (c == '{' || c == '}') { // Skip the quote-starting apostrophe, find the end of the quoted literal text. ++i; inQuote = true; } else { // The apostrophe is part of literal text. c = '\''; } } else if (!inQuote && c == '{') { if (textLength > 0) { sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength)); textLength = 0; } int argNumber; if ((i + 1) < patternLength && 0 <= (argNumber = pattern.charAt(i) - '0') && argNumber <= 9 && pattern.charAt(i + 1) == '}') { i += 2; } else { // Multi-digit argument number (no leading zero) or syntax error. // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index) // around the number, but this class does not. int argStart = i - 1; argNumber = -1; if (i < patternLength && '1' <= (c = pattern.charAt(i++)) && c <= '9') { argNumber = c - '0'; while (i < patternLength && '0' <= (c = pattern.charAt(i++)) && c <= '9') { argNumber = argNumber * 10 + (c - '0'); if (argNumber >= ARG_NUM_LIMIT) { break; } } } if (argNumber < 0 || c != '}') { throw new IllegalArgumentException( "Argument syntax error in pattern \"" + pattern + "\" at index " + argStart + ": " + pattern.subSequence(argStart, i)); } } if (argNumber > maxArg) { maxArg = argNumber; } sb.append((char)argNumber); continue; } // else: c is part of literal text // Append c and track the literal-text segment length. if (textLength == 0) { // Reserve a char for the length of a new text segment, preset the maximum length. sb.append(SEGMENT_LENGTH_ARGUMENT_CHAR); } sb.append(c); if (++textLength == MAX_SEGMENT_LENGTH) { textLength = 0; } } if (textLength > 0) { sb.setCharAt(sb.length() - textLength - 1, (char)(ARG_NUM_LIMIT + textLength)); } int argCount = maxArg + 1; if (argCount < min) { throw new IllegalArgumentException( "Fewer than minimum " + min + " arguments in pattern \"" + pattern + "\""); } if (argCount > max) { throw new IllegalArgumentException( "More than maximum " + max + " arguments in pattern \"" + pattern + "\""); } sb.setCharAt(0, (char)argCount); return sb.toString(); } /** * @param compiledPattern Compiled form of a pattern string. * @return The max argument number + 1. */ public static int getArgumentLimit(String compiledPattern) { return compiledPattern.charAt(0); } /** * Formats the given values. * * @param compiledPattern Compiled form of a pattern string. */ public static String formatCompiledPattern(String compiledPattern, CharSequence... values) { return formatAndAppend(compiledPattern, new StringBuilder(), null, values).toString(); } /** * Formats the not-compiled pattern with the given values. * Equivalent to compileToStringMinMaxArguments() followed by formatCompiledPattern(). * The number of arguments checked against the given limits is the * highest argument number plus one, not the number of occurrences of arguments. * * @param pattern Not-compiled form of a pattern string. * @param min The pattern must have at least this many arguments. * @param max The pattern must have at most this many arguments. * @return The compiled-pattern string. * @throws IllegalArgumentException for bad argument syntax and too few or too many arguments. */ public static String formatRawPattern(String pattern, int min, int max, CharSequence... values) { StringBuilder sb = new StringBuilder(); String compiledPattern = compileToStringMinMaxArguments(pattern, sb, min, max); sb.setLength(0); return formatAndAppend(compiledPattern, sb, null, values).toString(); } /** * Formats the given values, appending to the appendTo builder. * * @param compiledPattern Compiled form of a pattern string. * @param appendTo Gets the formatted pattern and values appended. * @param offsets offsets[i] receives the offset of where * values[i] replaced pattern argument {i}. * Can be null, or can be shorter or longer than values. * If there is no {i} in the pattern, then offsets[i] is set to -1. * @param values The argument values. * An argument value must not be the same object as appendTo. * values.length must be at least getArgumentLimit(). * Can be null if getArgumentLimit()==0. * @return appendTo */ public static StringBuilder formatAndAppend( String compiledPattern, StringBuilder appendTo, int[] offsets, CharSequence... values) { int valuesLength = values != null ? values.length : 0; if (valuesLength < getArgumentLimit(compiledPattern)) { throw new IllegalArgumentException("Too few values."); } return format(compiledPattern, values, appendTo, null, true, offsets); } /** * Formats the given values, replacing the contents of the result builder. * May optimize by actually appending to the result if it is the same object * as the value corresponding to the initial argument in the pattern. * * @param compiledPattern Compiled form of a pattern string. * @param result Gets its contents replaced by the formatted pattern and values. * @param offsets offsets[i] receives the offset of where * values[i] replaced pattern argument {i}. * Can be null, or can be shorter or longer than values. * If there is no {i} in the pattern, then offsets[i] is set to -1. * @param values The argument values. * An argument value may be the same object as result. * values.length must be at least getArgumentLimit(). * @return result */ public static StringBuilder formatAndReplace( String compiledPattern, StringBuilder result, int[] offsets, CharSequence... values) { int valuesLength = values != null ? values.length : 0; if (valuesLength < getArgumentLimit(compiledPattern)) { throw new IllegalArgumentException("Too few values."); } // If the pattern starts with an argument whose value is the same object // as the result, then we keep the result contents and append to it. // Otherwise we replace its contents. int firstArg = -1; // If any non-initial argument value is the same object as the result, // then we first copy its contents and use that instead while formatting. String resultCopy = null; if (getArgumentLimit(compiledPattern) > 0) { for (int i = 1; i < compiledPattern.length();) { int n = compiledPattern.charAt(i++); if (n < ARG_NUM_LIMIT) { if (values[n] == result) { if (i == 2) { firstArg = n; } else if (resultCopy == null) { resultCopy = result.toString(); } } } else { i += n - ARG_NUM_LIMIT; } } } if (firstArg < 0) { result.setLength(0); } return format(compiledPattern, values, result, resultCopy, false, offsets); } /** * Returns the pattern text with none of the arguments. * Like formatting with all-empty string values. * * @param compiledPattern Compiled form of a pattern string. */ public static String getTextWithNoArguments(String compiledPattern) { int capacity = compiledPattern.length() - 1 - getArgumentLimit(compiledPattern); StringBuilder sb = new StringBuilder(capacity); for (int i = 1; i < compiledPattern.length();) { int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT; if (segmentLength > 0) { int limit = i + segmentLength; sb.append(compiledPattern, i, limit); i = limit; } } return sb.toString(); } /** * Returns the length of the pattern text with none of the arguments. * @param compiledPattern Compiled form of a pattern string. * @param codePoints true to count code points; false to count code units. * @return The number of code points or code units. */ public static int getLength(String compiledPattern, boolean codePoints) { int result = 0; for (int i = 1; i < compiledPattern.length();) { int segmentLength = compiledPattern.charAt(i++) - ARG_NUM_LIMIT; if (segmentLength > 0) { int limit = i + segmentLength; if (codePoints) { result += Character.codePointCount(compiledPattern, i, limit); } else { result += (limit - i); } i = limit; } } return result; } /** * Returns the length in code units of the pattern text up until the first argument. * @param compiledPattern Compiled form of a pattern string. * @return The number of code units. */ public static int getPrefixLength(String compiledPattern) { if (compiledPattern.length() == 1) { return 0; } else if (compiledPattern.charAt(0) == 0) { return compiledPattern.length() - 2; } else if (compiledPattern.charAt(1) <= ARG_NUM_LIMIT) { return 0; } else { return compiledPattern.charAt(1) - ARG_NUM_LIMIT; } } /** * Special case for using FormattedStringBuilder with patterns with 0 or 1 argument. * * With 1 argument, treat the current contents of the FormattedStringBuilder between * start and end as the argument {0}. Insert the extra strings from compiledPattern * to surround the argument in the output. * * With 0 arguments, overwrite the entire contents of the FormattedStringBuilder * between start and end. * * @param compiledPattern Compiled form of a pattern string. * @param field Field to use when adding chars to the output. * @param start The start index of the argument already in the output string. * @param end The end index of the argument already in the output string. * @param output Destination for formatted output. * @return Net number of characters added to the formatted string. */ public static int formatPrefixSuffix( String compiledPattern, Format.Field field, int start, int end, FormattedStringBuilder output) { int argLimit = getArgumentLimit(compiledPattern); if (argLimit == 0) { // No arguments in compiled pattern; overwrite the entire segment with our string. return output.splice(start, end, compiledPattern, 2, compiledPattern.length(), field); } else { assert argLimit == 1; int suffixOffset; int length = 0; if (compiledPattern.charAt(1) != '\u0000') { int prefixLength = compiledPattern.charAt(1) - ARG_NUM_LIMIT; length = output.insert(start, compiledPattern, 2, 2 + prefixLength, field); suffixOffset = 3 + prefixLength; } else { suffixOffset = 2; } if (suffixOffset < compiledPattern.length()) { int suffixLength = compiledPattern.charAt(suffixOffset) - ARG_NUM_LIMIT; length += output.insert(end + length, compiledPattern, 1 + suffixOffset, 1 + suffixOffset + suffixLength, field); } return length; } } /** Internal iterator interface for maximum efficiency. * * Usage boilerplate: * *

     * long state = 0;
     * while (true) {
     *     state = IterInternal.step(state, compiledPattern, output);
     *     if (state == IterInternal.DONE) {
     *         break;
     *     }
     *     int argIndex = IterInternal.getArgIndex(state);
     *     // Append the string corresponding to argIndex to output
     * }
     * 
* */ public static class IterInternal { public static final long DONE = -1; public static long step(long state, CharSequence compiledPattern, Appendable output) { int i = (int) (state >>> 32); assert i < compiledPattern.length(); i++; while (i < compiledPattern.length() && compiledPattern.charAt(i) > ARG_NUM_LIMIT) { int limit = i + compiledPattern.charAt(i) + 1 - ARG_NUM_LIMIT; try { output.append(compiledPattern, i + 1, limit); } catch (IOException e) { throw new ICUUncheckedIOException(e); } i = limit; } if (i == compiledPattern.length()) { return DONE; } return (((long) i) << 32) | compiledPattern.charAt(i); } public static int getArgIndex(long state) { return (int) state; } } private static StringBuilder format( String compiledPattern, CharSequence[] values, StringBuilder result, String resultCopy, boolean forbidResultAsValue, int[] offsets) { int offsetsLength; if (offsets == null) { offsetsLength = 0; } else { offsetsLength = offsets.length; for (int i = 0; i < offsetsLength; i++) { offsets[i] = -1; } } for (int i = 1; i < compiledPattern.length();) { int n = compiledPattern.charAt(i++); if (n < ARG_NUM_LIMIT) { CharSequence value = values[n]; if (value == result) { if (forbidResultAsValue) { throw new IllegalArgumentException("Value must not be same object as result"); } if (i == 2) { // We are appending to result which is also the first value object. if (n < offsetsLength) { offsets[n] = 0; } } else { if (n < offsetsLength) { offsets[n] = result.length(); } result.append(resultCopy); } } else { if (n < offsetsLength) { offsets[n] = result.length(); } result.append(value); } } else { int limit = i + (n - ARG_NUM_LIMIT); result.append(compiledPattern, i, limit); i = limit; } } return result; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy