All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.number.parse.NumberParserImpl Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.impl.number.parse;

import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.AffixPatternProvider;
import com.ibm.icu.impl.number.AffixUtils;
import com.ibm.icu.impl.number.CustomSymbolCurrency;
import com.ibm.icu.impl.number.DecimalFormatProperties;
import com.ibm.icu.impl.number.DecimalFormatProperties.ParseMode;
import com.ibm.icu.impl.number.Grouper;
import com.ibm.icu.impl.number.PatternStringParser;
import com.ibm.icu.impl.number.PatternStringParser.ParsedPatternInfo;
import com.ibm.icu.impl.number.PropertiesAffixPatternProvider;
import com.ibm.icu.impl.number.RoundingUtils;
import com.ibm.icu.number.NumberFormatter.GroupingStrategy;
import com.ibm.icu.number.Scale;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.CurrencyAmount;
import com.ibm.icu.util.ULocale;

/**
 * Primary number parsing implementation class.
 *
 * @author sffc
 *
 */
public class NumberParserImpl {

    /**
     * Creates a parser with most default options. Used for testing, not production.
     */
    public static NumberParserImpl createSimpleParser(ULocale locale, String pattern, int parseFlags) {

        NumberParserImpl parser = new NumberParserImpl(parseFlags);
        Currency currency = Currency.getInstance("USD");
        DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
        IgnorablesMatcher ignorables = IgnorablesMatcher.getInstance(parseFlags);

        AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
        factory.currency = currency;
        factory.symbols = symbols;
        factory.ignorables = ignorables;
        factory.locale = locale;
        factory.parseFlags = parseFlags;

        ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
        AffixMatcher.createMatchers(patternInfo, parser, factory, ignorables, parseFlags);

        Grouper grouper = Grouper.forStrategy(GroupingStrategy.AUTO).withLocaleData(locale, patternInfo);

        parser.addMatcher(ignorables);
        parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
        parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
        parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
        parser.addMatcher(PercentMatcher.getInstance(symbols));
        parser.addMatcher(PermilleMatcher.getInstance(symbols));
        parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
        parser.addMatcher(InfinityMatcher.getInstance(symbols));
        parser.addMatcher(PaddingMatcher.getInstance("@"));
        parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
        parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols, parseFlags));
        parser.addMatcher(new RequireNumberValidator());

        parser.freeze();
        return parser;
    }

    /**
     * Parses the string without returning a NumberParserImpl. Used for testing, not production.
     */
    public static Number parseStatic(
            String input,
            ParsePosition ppos,
            DecimalFormatProperties properties,
            DecimalFormatSymbols symbols) {
        NumberParserImpl parser = createParserFromProperties(properties, symbols, false);
        ParsedNumber result = new ParsedNumber();
        parser.parse(input, true, result);
        if (result.success()) {
            ppos.setIndex(result.charEnd);
            return result.getNumber();
        } else {
            ppos.setErrorIndex(result.charEnd);
            return null;
        }
    }

    /**
     * Parses the string without returning a NumberParserImpl. Used for testing, not production.
     */
    public static CurrencyAmount parseStaticCurrency(
            String input,
            ParsePosition ppos,
            DecimalFormatProperties properties,
            DecimalFormatSymbols symbols) {
        NumberParserImpl parser = createParserFromProperties(properties, symbols, true);
        ParsedNumber result = new ParsedNumber();
        parser.parse(input, true, result);
        if (result.success()) {
            ppos.setIndex(result.charEnd);
            assert result.currencyCode != null;
            return new CurrencyAmount(result.getNumber(), Currency.getInstance(result.currencyCode));
        } else {
            ppos.setErrorIndex(result.charEnd);
            return null;
        }
    }

    public static NumberParserImpl createDefaultParserForLocale(ULocale loc) {
        DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(loc);
        DecimalFormatProperties properties = PatternStringParser.parseToProperties("0");
        return createParserFromProperties(properties, symbols, false);
    }

    /**
     * Creates a parser from the given DecimalFormatProperties. This is the endpoint used by
     * DecimalFormat in production code.
     *
     * @param properties
     *            The property bag.
     * @param symbols
     *            The locale's symbols.
     * @param parseCurrency
     *            True to force a currency match and use monetary separators; false otherwise.
     * @return An immutable parser object.
     */
    public static NumberParserImpl createParserFromProperties(
            DecimalFormatProperties properties,
            DecimalFormatSymbols symbols,
            boolean parseCurrency) {

        ULocale locale = symbols.getULocale();
        AffixPatternProvider affixProvider = PropertiesAffixPatternProvider.forProperties(properties);
        Currency currency = CustomSymbolCurrency.resolve(properties.getCurrency(), locale, symbols);
        ParseMode parseMode = properties.getParseMode();
        if (parseMode == null) {
            parseMode = ParseMode.LENIENT;
        }
        Grouper grouper = Grouper.forProperties(properties);
        int parseFlags = 0;
        if (!properties.getParseCaseSensitive()) {
            parseFlags |= ParsingUtils.PARSE_FLAG_IGNORE_CASE;
        }
        if (properties.getParseIntegerOnly()) {
            parseFlags |= ParsingUtils.PARSE_FLAG_INTEGER_ONLY;
        }
        if (properties.getParseToBigDecimal()) {
            parseFlags |= ParsingUtils.PARSE_FLAG_FORCE_BIG_DECIMAL;
        }
        if (properties.getSignAlwaysShown()) {
            parseFlags |= ParsingUtils.PARSE_FLAG_PLUS_SIGN_ALLOWED;
        }
        if (parseMode == ParseMode.JAVA_COMPATIBILITY) {
            parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS;
            parseFlags |= ParsingUtils.PARSE_FLAG_USE_FULL_AFFIXES;
            parseFlags |= ParsingUtils.PARSE_FLAG_EXACT_AFFIX;
            parseFlags |= ParsingUtils.PARSE_FLAG_JAVA_COMPATIBILITY_IGNORABLES;
        } else if (parseMode == ParseMode.STRICT) {
            parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE;
            parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS;
            parseFlags |= ParsingUtils.PARSE_FLAG_USE_FULL_AFFIXES;
            parseFlags |= ParsingUtils.PARSE_FLAG_EXACT_AFFIX;
            parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_IGNORABLES;
        } else {
            parseFlags |= ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
        }
        if (grouper.getPrimary() <= 0) {
            parseFlags |= ParsingUtils.PARSE_FLAG_GROUPING_DISABLED;
        }
        if (parseCurrency || affixProvider.hasCurrencySign()) {
            parseFlags |= ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS;
        }
        if (!parseCurrency) {
            parseFlags |= ParsingUtils.PARSE_FLAG_NO_FOREIGN_CURRENCIES;
        }

        NumberParserImpl parser = new NumberParserImpl(parseFlags);
        IgnorablesMatcher ignorables = IgnorablesMatcher.getInstance(parseFlags);

        AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
        factory.currency = currency;
        factory.symbols = symbols;
        factory.ignorables = ignorables;
        factory.locale = locale;
        factory.parseFlags = parseFlags;

        //////////////////////
        /// AFFIX MATCHERS ///
        //////////////////////

        // Set up a pattern modifier with mostly defaults to generate AffixMatchers.
        AffixMatcher.createMatchers(affixProvider, parser, factory, ignorables, parseFlags);

        ////////////////////////
        /// CURRENCY MATCHER ///
        ////////////////////////

        if (parseCurrency || affixProvider.hasCurrencySign()) {
            parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols, parseFlags));
        }

        ///////////////
        /// PERCENT ///
        ///////////////

        // ICU-TC meeting, April 11, 2018: accept percent/permille only if it is in the pattern,
        // and to maintain regressive behavior, divide by 100 even if no percent sign is present.
        if (parseMode == ParseMode.LENIENT && affixProvider.containsSymbolType(AffixUtils.TYPE_PERCENT)) {
            parser.addMatcher(PercentMatcher.getInstance(symbols));
        }
        if (parseMode == ParseMode.LENIENT && affixProvider.containsSymbolType(AffixUtils.TYPE_PERMILLE)) {
            parser.addMatcher(PermilleMatcher.getInstance(symbols));
        }

        ///////////////////////////////
        /// OTHER STANDARD MATCHERS ///
        ///////////////////////////////

        if (parseMode == ParseMode.LENIENT) {
            parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
            parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
        }
        parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
        parser.addMatcher(InfinityMatcher.getInstance(symbols));
        String padString = properties.getPadString();
        if (padString != null && !ignorables.getSet().contains(padString)) {
            parser.addMatcher(PaddingMatcher.getInstance(padString));
        }
        parser.addMatcher(ignorables);
        parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
        // NOTE: parseNoExponent doesn't disable scientific parsing if we have a scientific formatter
        if (!properties.getParseNoExponent() || properties.getMinimumExponentDigits() > 0) {
            parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
        }

        //////////////////
        /// VALIDATORS ///
        //////////////////

        parser.addMatcher(new RequireNumberValidator());
        if (parseMode != ParseMode.LENIENT) {
            parser.addMatcher(new RequireAffixValidator());
        }
        if (parseCurrency) {
            parser.addMatcher(new RequireCurrencyValidator());
        }
        if (properties.getDecimalPatternMatchRequired()) {
            boolean patternHasDecimalSeparator = properties.getDecimalSeparatorAlwaysShown()
                    || properties.getMaximumFractionDigits() != 0;
            parser.addMatcher(RequireDecimalSeparatorValidator.getInstance(patternHasDecimalSeparator));
        }
        // The multiplier takes care of scaling percentages.
        Scale multiplier = RoundingUtils.scaleFromProperties(properties);
        if (multiplier != null) {
            parser.addMatcher(new MultiplierParseHandler(multiplier));
        }

        parser.freeze();
        return parser;
    }

    private final int parseFlags;
    private final List matchers;
    private boolean frozen;

    /**
     * Creates a new, empty parser.
     *
     * @param parseFlags
     *            The parser settings defined in the PARSE_FLAG_* fields.
     */
    public NumberParserImpl(int parseFlags) {
        matchers = new ArrayList<>();
        this.parseFlags = parseFlags;
        frozen = false;
    }

    public void addMatcher(NumberParseMatcher matcher) {
        assert !frozen;
        this.matchers.add(matcher);
    }

    public void addMatchers(Collection matchers) {
        assert !frozen;
        this.matchers.addAll(matchers);
    }

    public void freeze() {
        frozen = true;
    }

    public int getParseFlags() {
        return parseFlags;
    }

    public void parse(String input, boolean greedy, ParsedNumber result) {
        parse(input, 0, greedy, result);
    }

    /**
     * Primary entrypoint to parsing code path.
     *
     * @param input
     *            The string to parse. This is a String, not CharSequence, to enforce assumptions about
     *            immutability (CharSequences are not guaranteed to be immutable).
     * @param start
     *            The index into the string at which to start parsing.
     * @param greedy
     *            Whether to use the faster but potentially less accurate greedy code path.
     * @param result
     *            Output variable to store results.
     */
    public void parse(String input, int start, boolean greedy, ParsedNumber result) {
        assert frozen;
        assert start >= 0 && start < input.length();
        StringSegment segment = new StringSegment(input,
                0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE));
        segment.adjustOffset(start);
        if (greedy) {
            parseGreedy(segment, result);
        } else if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_ALLOW_INFINITE_RECURSION)) {
            // Start at 1 so that recursionLevels never gets to 0
            parseLongestRecursive(segment, result, 1);
        } else {
            // Arbitrary recursion safety limit: 100 levels.
            parseLongestRecursive(segment, result, -100);
        }
        for (NumberParseMatcher matcher : matchers) {
            matcher.postProcess(result);
        }
        result.postProcess();
    }

    private void parseGreedy(StringSegment segment, ParsedNumber result) {
        // Note: this method is not recursive in order to avoid stack overflow.
        for (int i = 0; i < matchers.size();) {
            // Base Case
            if (segment.length() == 0) {
                return;
            }
            NumberParseMatcher matcher = matchers.get(i);
            if (!matcher.smokeTest(segment)) {
                // Matcher failed smoke test: try the next one
                i++;
                continue;
            }
            int initialOffset = segment.getOffset();
            matcher.match(segment, result);
            if (segment.getOffset() != initialOffset) {
                // Greedy heuristic: accept the match and loop back
                i = 0;
                continue;
            } else {
                // Matcher did not match: try the next one
                i++;
                continue;
            }
        }

        // NOTE: If we get here, the greedy parse completed without consuming the entire string.
    }

    private void parseLongestRecursive(StringSegment segment, ParsedNumber result, int recursionLevels) {
        // Base Case
        if (segment.length() == 0) {
            return;
        }

        // Safety against stack overflow
        if (recursionLevels == 0) {
            return;
        }

        // TODO: Give a nice way for the matcher to reset the ParsedNumber?
        ParsedNumber initial = new ParsedNumber();
        initial.copyFrom(result);
        ParsedNumber candidate = new ParsedNumber();

        int initialOffset = segment.getOffset();
        for (int i = 0; i < matchers.size(); i++) {
            NumberParseMatcher matcher = matchers.get(i);
            if (!matcher.smokeTest(segment)) {
                continue;
            }

            // In a non-greedy parse, we attempt all possible matches and pick the best.
            for (int charsToConsume = 0; charsToConsume < segment.length();) {
                charsToConsume += Character.charCount(segment.codePointAt(charsToConsume));

                // Run the matcher on a segment of the current length.
                candidate.copyFrom(initial);
                segment.setLength(charsToConsume);
                boolean maybeMore = matcher.match(segment, candidate);
                segment.resetLength();

                // If the entire segment was consumed, recurse.
                if (segment.getOffset() - initialOffset == charsToConsume) {
                    parseLongestRecursive(segment, candidate, recursionLevels + 1);
                    if (candidate.isBetterThan(result)) {
                        result.copyFrom(candidate);
                    }
                }

                // Since the segment can be re-used, reset the offset.
                // This does not have an effect if the matcher did not consume any chars.
                segment.setOffset(initialOffset);

                // Unless the matcher wants to see the next char, continue to the next matcher.
                if (!maybeMore) {
                    break;
                }
            }
        }
    }

    @Override
    public String toString() {
        return "";
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy