All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.text.UnicodeFilter Maven / Gradle / Ivy

There is a newer version: 2.12.15
Show newest version
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
 *******************************************************************************
 * Copyright (C) 1996-2016, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.text;

/**
 * UnicodeFilter defines a protocol for selecting a
 * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
 * Currently, filters are used in conjunction with classes like
 * {@link com.ibm.icu.text.Transliterator}
 * to only process selected characters through a
 * transformation.
 * @stable ICU 2.0
 */
@SuppressWarnings("javadoc")    // com.imb.icu.text.Transliterator is in another project
public abstract class UnicodeFilter implements UnicodeMatcher {

    /**
     * Returns true for characters that are in the selected
     * subset.  In other words, if a character is to be
     * filtered, then contains() returns
     * false.
     * @stable ICU 2.0
     */
    public abstract boolean contains(int c);

    /**
     * Default implementation of UnicodeMatcher::matches() for Unicode
     * filters.  Matches a single 16-bit code unit at offset.
     * @stable ICU 2.0
     */
    @Override
    public int matches(Replaceable text,
                       int[] offset,
                       int limit,
                       boolean incremental) {
        int c;
        if (offset[0] < limit &&
            contains(c = text.char32At(offset[0]))) {
            offset[0] += UTF16.getCharCount(c);
            return U_MATCH;
        }
        if (offset[0] > limit && contains(text.char32At(offset[0]))) {
            // Backup offset by 1, unless the preceding character is a
            // surrogate pair -- then backup by 2 (keep offset pointing at
            // the lead surrogate).
            --offset[0];
            if (offset[0] >= 0) {
                offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1;
            }
            return U_MATCH;
        }
        if (incremental && offset[0] == limit) {
            return U_PARTIAL_MATCH;
        }
        return U_MISMATCH;
    }

    // TODO Remove this when the JDK property implements MemberDoc.isSynthetic
    /**
     * (This should not be here; it is declared to make CheckTags
     * happy.  Java inserts a synthetic constructor and CheckTags
     * can't tell that it's synthetic.)
     *
     * @internal
     * @deprecated This API is ICU internal only.
     */
    @Deprecated
    protected UnicodeFilter() {}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy