All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.text.RbnfScannerProviderImpl Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
/*
*******************************************************************************
* Copyright (C) 2009-2014, International Business Machines Corporation and    *
* others. All Rights Reserved.                                                *
*******************************************************************************
*/

package com.ibm.icu.impl.text;

import java.util.HashMap;
import java.util.Map;

import com.ibm.icu.impl.ICUDebug;
import com.ibm.icu.text.CollationElementIterator;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RbnfLenientScanner;
import com.ibm.icu.text.RbnfLenientScannerProvider;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.util.ULocale;

/**
 * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat
 * implementation behind setLenientParseMode, which is based on Collator.
 * @internal
 * @deprecated This API is ICU internal only.
 */
@Deprecated
public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider {
    private static final boolean DEBUG = ICUDebug.enabled("rbnf");
    private Map cache;

    /**
     * @internal
     * @deprecated This API is ICU internal only.
     */
    @Deprecated
    public RbnfScannerProviderImpl() {
        cache = new HashMap();
    }

    /**
     * Returns a collation-based scanner.
     *
     * Only primary differences are treated as significant.  This means that case
     * differences, accent differences, alternate spellings of the same letter
     * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
     * matching the text.  In many cases, numerals will be accepted in place of words
     * or phrases as well.
     *
     * For example, all of the following will correctly parse as 255 in English in
     * lenient-parse mode:
     * 
"two hundred fifty-five" *
"two hundred fifty five" *
"TWO HUNDRED FIFTY-FIVE" *
"twohundredfiftyfive" *
"2 hundred fifty-5" * * The Collator used is determined by the locale that was * passed to this object on construction. The description passed to this object * on construction may supply additional collation rules that are appended to the * end of the default collator for the locale, enabling additional equivalences * (such as adding more ignorable characters or permitting spelled-out version of * symbols; see the demo program for examples). * * It's important to emphasize that even strict parsing is relatively lenient: it * will accept some text that it won't produce as output. In English, for example, * it will correctly parse "two hundred zero" and "fifteen hundred". * * @internal * @deprecated This API is ICU internal only. */ @Deprecated public RbnfLenientScanner get(ULocale locale, String extras) { RbnfLenientScanner result = null; String key = locale.toString() + "/" + extras; synchronized(cache) { result = cache.get(key); if (result != null) { return result; } } result = createScanner(locale, extras); synchronized(cache) { cache.put(key, result); } return result; } /** * @internal * @deprecated This API is ICU internal only. */ @Deprecated protected RbnfLenientScanner createScanner(ULocale locale, String extras) { RuleBasedCollator collator = null; try { // create a default collator based on the locale, // then pull out that collator's rules, append any additional // rules specified in the description, and create a _new_ // collator based on the combination of those rules collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale()); if (extras != null) { String rules = collator.getRules() + extras; collator = new RuleBasedCollator(rules); } collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); } catch (Exception e) { // If we get here, it means we have a malformed set of // collation rules, which hopefully won't happen ///CLOVER:OFF if (DEBUG){ // debug hook e.printStackTrace(); System.out.println("++++"); } collator = null; ///CLOVER:ON } return new RbnfLenientScannerImpl(collator); } private static class RbnfLenientScannerImpl implements RbnfLenientScanner { private final RuleBasedCollator collator; private RbnfLenientScannerImpl(RuleBasedCollator rbc) { this.collator = rbc; } public boolean allIgnorable(String s) { CollationElementIterator iter = collator.getCollationElementIterator(s); int o = iter.next(); while (o != CollationElementIterator.NULLORDER && CollationElementIterator.primaryOrder(o) == 0) { o = iter.next(); } return o == CollationElementIterator.NULLORDER; } public int[] findText(String str, String key, int startingAt) { int p = startingAt; int keyLen = 0; // basically just isolate smaller and smaller substrings of // the target string (each running to the end of the string, // and with the first one running from startingAt to the end) // and then use prefixLength() to see if the search key is at // the beginning of each substring. This is excruciatingly // slow, but it will locate the key and tell use how long the // matching text was. while (p < str.length() && keyLen == 0) { keyLen = prefixLength(str.substring(p), key); if (keyLen != 0) { return new int[] { p, keyLen }; } ++p; } // if we make it to here, we didn't find it. Return -1 for the // location. The length should be ignored, but set it to 0, // which should be "safe" return new int[] { -1, 0 }; } ///CLOVER:OFF // The following method contains the same signature as findText // and has never been used by anything once. @SuppressWarnings("unused") public int[] findText2(String str, String key, int startingAt) { CollationElementIterator strIter = collator.getCollationElementIterator(str); CollationElementIterator keyIter = collator.getCollationElementIterator(key); int keyStart = -1; strIter.setOffset(startingAt); int oStr = strIter.next(); int oKey = keyIter.next(); while (oKey != CollationElementIterator.NULLORDER) { while (oStr != CollationElementIterator.NULLORDER && CollationElementIterator.primaryOrder(oStr) == 0) oStr = strIter.next(); while (oKey != CollationElementIterator.NULLORDER && CollationElementIterator.primaryOrder(oKey) == 0) oKey = keyIter.next(); if (oStr == CollationElementIterator.NULLORDER) { return new int[] { -1, 0 }; } if (oKey == CollationElementIterator.NULLORDER) { break; } if (CollationElementIterator.primaryOrder(oStr) == CollationElementIterator.primaryOrder(oKey)) { keyStart = strIter.getOffset(); oStr = strIter.next(); oKey = keyIter.next(); } else { if (keyStart != -1) { keyStart = -1; keyIter.reset(); } else { oStr = strIter.next(); } } } if (oKey == CollationElementIterator.NULLORDER) { return new int[] { keyStart, strIter.getOffset() - keyStart }; } return new int[] { -1, 0 }; } ///CLOVER:ON public int prefixLength(String str, String prefix) { // Create two collation element iterators, one over the target string // and another over the prefix. // // Previous code was matching "fifty-" against " fifty" and leaving // the number " fifty-7" to parse as 43 (50 - 7). // Also it seems that if we consume the entire prefix, that's ok even // if we've consumed the entire string, so I switched the logic to // reflect this. CollationElementIterator strIter = collator.getCollationElementIterator(str); CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix); // match collation elements between the strings int oStr = strIter.next(); int oPrefix = prefixIter.next(); while (oPrefix != CollationElementIterator.NULLORDER) { // skip over ignorable characters in the target string while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr != CollationElementIterator.NULLORDER) { oStr = strIter.next(); } // skip over ignorable characters in the prefix while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix != CollationElementIterator.NULLORDER) { oPrefix = prefixIter.next(); } // if skipping over ignorables brought to the end of // the prefix, we DID match: drop out of the loop if (oPrefix == CollationElementIterator.NULLORDER) { break; } // if skipping over ignorables brought us to the end // of the target string, we didn't match and return 0 if (oStr == CollationElementIterator.NULLORDER) { return 0; } // match collation elements from the two strings // (considering only primary differences). If we // get a mismatch, dump out and return 0 if (CollationElementIterator.primaryOrder(oStr) != CollationElementIterator.primaryOrder(oPrefix)) { return 0; } // otherwise, advance to the next character in each string // and loop (we drop out of the loop when we exhaust // collation elements in the prefix) oStr = strIter.next(); oPrefix = prefixIter.next(); } int result = strIter.getOffset(); if (oStr != CollationElementIterator.NULLORDER) { --result; } return result; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy