All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.breakiter.BytesDictionaryMatcher Maven / Gradle / Ivy

Go to download

International Component for Unicode for Java (ICU4J) is a mature, widely used Java library providing Unicode and Globalization support

There is a newer version: 76.1
Show newest version
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 2014, International Business Machines Corporation and         *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
package com.ibm.icu.impl.breakiter;

import java.text.CharacterIterator;

import com.ibm.icu.impl.Assert;
import com.ibm.icu.text.UCharacterIterator;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.BytesTrie.Result;

class BytesDictionaryMatcher extends DictionaryMatcher {
    private final byte[] characters;
    private final int transform;

    public BytesDictionaryMatcher(byte[] chars, int transform) {
        characters = chars;
        Assert.assrt((transform & DictionaryData.TRANSFORM_TYPE_MASK) == DictionaryData.TRANSFORM_TYPE_OFFSET);
        // while there is only one transform type so far, save the entire transform constant so that
        // if we add any others, we need only change code in transform() and the assert above rather
        // than adding a "transform type" variable
        this.transform = transform;
    }

    private int transform(int c) {
        if (c == 0x200D) {
            return 0xFF;
        } else if (c == 0x200C) {
            return 0xFE;
        }

        int delta = c - (transform & DictionaryData.TRANSFORM_OFFSET_MASK);
        if (delta < 0 || 0xFD < delta) {
            return -1;
        }
        return delta;
    }

    @Override
    public int matches(CharacterIterator text_, int maxLength, int[] lengths, int[] count_, int limit, int[] values) {
        UCharacterIterator text = UCharacterIterator.getInstance(text_);
        BytesTrie bt = new BytesTrie(characters, 0);
        int c = text.nextCodePoint();
        if (c == UCharacterIterator.DONE) {
            return 0;
        }
        Result result = bt.first(transform(c));
        // TODO: should numChars count Character.charCount() ?
        int numChars = 1;
        int count = 0;
        for (;;) {
            if (result.hasValue()) {
                if (count < limit) {
                    if (values != null) {
                        values[count] = bt.getValue();
                    }
                    lengths[count] = numChars;
                    count++;
                }
                if (result == Result.FINAL_VALUE) {
                    break;
                }
            } else if (result == Result.NO_MATCH) {
                break;
            }

            if (numChars >= maxLength) {
                break;
            }

            c = text.nextCodePoint();
            if (c == UCharacterIterator.DONE) {
                break;
            }
            ++numChars;
            result = bt.next(transform(c));
        }
        count_[0] = count;
        return numChars;
    }

    @Override
    public int getType() {
        return DictionaryData.TRIE_TYPE_BYTES;
    }
}






© 2015 - 2024 Weber Informatics LLC | Privacy Policy