com.ibm.icu.text.SourceTargetUtility Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support
There is a newer version: 76.1
Show newest version
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
 *******************************************************************************
 * Copyright (C) 2010-2011, Google, International Business Machines            *
 * Corporation and others. All Rights Reserved.                                *
 *******************************************************************************
 */
package com.ibm.icu.text;

import java.util.HashSet;
import java.util.Set;

import com.ibm.icu.lang.CharSequences;

/**
 * Simple internal utility class for helping with getSource/TargetSet
 */
class SourceTargetUtility {
    final Transform transform;
    final UnicodeSet sourceCache;
    final Set sourceStrings;
    static final UnicodeSet NON_STARTERS = new UnicodeSet("[:^ccc=0:]").freeze();
    static Normalizer2 NFC = Normalizer2.getNFCInstance();
    //static final UnicodeSet TRAILING_COMBINING = new UnicodeSet();

    public SourceTargetUtility(Transform transform) {
        this(transform, null);
    }

    public SourceTargetUtility(Transform transform, Normalizer2 normalizer) {
        this.transform = transform;
        if (normalizer != null) {
//            synchronized (SourceTargetUtility.class) {
//                if (NFC == null) {
//                    NFC = Normalizer2.getInstance(null, "nfc", Mode.COMPOSE);
//                    for (int i = 0; i <= 0x10FFFF; ++i) {
//                        String d = NFC.getDecomposition(i);
//                        if (d == null) {
//                            continue;
//                        }
//                        String s = NFC.normalize(d);
//                        if (!CharSequences.equals(i, s)) {
//                            continue;
//                        }
//                        // composes
//                        boolean first = false;
//                        for (int trailing : CharSequences.codePoints(d)) {
//                            if (first) {
//                                first = false;
//                            } else {
//                                TRAILING_COMBINING.add(trailing);
//                            }
//                        }
//                    }
//                }
//            }
            sourceCache = new UnicodeSet("[:^ccc=0:]");
        } else {
            sourceCache = new UnicodeSet();
        }
        sourceStrings = new HashSet();
        for (int i = 0; i <= 0x10FFFF; ++i) {
            String s = transform.transform(UTF16.valueOf(i));
            boolean added = false;
            if (!CharSequences.equals(i, s)) {
                sourceCache.add(i);
                added = true;
            }
            if (normalizer == null) {
                continue;
            }
            String d = NFC.getDecomposition(i);
            if (d == null) {
                continue;
            }
            s = transform.transform(d);
            if (!d.equals(s)) {
                sourceStrings.add(d);
            }
            if (added) {
                continue;
            }
            if (!normalizer.isInert(i)) {
                sourceCache.add(i);
                continue;
            }
            // see if any of the non-starters change s; if so, add i
//            for (String ns : TRAILING_COMBINING) {
//                String s2 = transform.transform(s + ns);
//                if (!s2.startsWith(s)) {
//                    sourceCache.add(i);
//                    break;
//                }
//            }

            // int endOfFirst = CharSequences.onCharacterBoundary(d, 1) ? 1 : 2;
            // if (endOfFirst >= d.length()) {
            // continue;
            // }
            // // now add all initial substrings
            // for (int j = 1; j < d.length(); ++j) {
            // if (!CharSequences.onCharacterBoundary(d, j)) {
            // continue;
            // }
            // String dd = d.substring(0,j);
            // s = transform.transform(dd);
            // if (!dd.equals(s)) {
            // sourceStrings.add(dd);
            // }
            // }
        }
        sourceCache.freeze();
    }

    public void addSourceTargetSet(Transliterator transliterator, UnicodeSet inputFilter, UnicodeSet sourceSet,
            UnicodeSet targetSet) {
        UnicodeSet myFilter = transliterator.getFilterAsUnicodeSet(inputFilter);
        UnicodeSet affectedCharacters = new UnicodeSet(sourceCache).retainAll(myFilter);
        sourceSet.addAll(affectedCharacters);
        for (String s : affectedCharacters) {
            targetSet.addAll(transform.transform(s));
        }
        for (String s : sourceStrings) {
            if (myFilter.containsAll(s)) {
                String t = transform.transform(s);
                if (!s.equals(t)) {
                    targetSet.addAll(t);
                    sourceSet.addAll(s);
                }
            }
        }
    }
}