All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wildfly.common.iteration.Utf8DecodingIterator Maven / Gradle / Ivy

/*
 * JBoss, Home of Professional Open Source.
 * Copyright 2017 Red Hat, Inc., and individual contributors
 * as indicated by the @author tags.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.wildfly.common.iteration;

import java.util.NoSuchElementException;

/**
 */
class Utf8DecodingIterator extends CodePointIterator {
    private final ByteIterator iter;
    private long offset = 0;

    Utf8DecodingIterator(final ByteIterator iter) {
        this.iter = iter;
    }

    public boolean hasNext() {
        return iter.hasNext();
    }

    public boolean hasPrevious() {
        return offset > 0;
    }

    private void seekToNext() {
        int b;
        while (iter.hasNext()) {
            b = iter.next();
            if ((b & 0b11_000000) != 0b10_000000) {
                // back up one spot
                iter.previous();
                return;
            }
        }
    }

    private void seekToPrev() {
        int b;
        while (iter.hasPrevious()) {
            b = iter.previous();
            if ((b & 0b11_000000) != 0b10_000000) {
                return;
            }
        }
    }

    public int next() {
        if (! iter.hasNext()) throw new NoSuchElementException();
        offset++;
        // >= 1 byte
        int a = iter.next();
        if ((a & 0b1_0000000) == 0b0_0000000) {
            // one byte
            return a;
        }
        if ((a & 0b11_000000) == 0b10_000000) {
            // first byte is invalid; return � instead
            seekToNext();
            return '�';
        }
        // >= 2 bytes
        if (! iter.hasNext()) {
            // truncated
            return '�';
        }
        int b = iter.next();
        if ((b & 0b11_000000) != 0b10_000000) {
            // second byte is invalid; return � instead
            seekToNext();
            return '�';
        }
        if ((a & 0b111_00000) == 0b110_00000) {
            // two bytes
            return (a & 0b000_11111) << 6 | b & 0b00_111111;
        }
        // >= 3 bytes
        if (! iter.hasNext()) {
            // truncated
            return '�';
        }
        int c = iter.next();
        if ((c & 0b11_000000) != 0b10_000000) {
            // third byte is invalid; return � instead
            seekToNext();
            return '�';
        }
        if ((a & 0b1111_0000) == 0b1110_0000) {
            // three bytes
            return (a & 0b0000_1111) << 12 | (b & 0b00_111111) << 6 | c & 0b00_111111;
        }
        // >= 4 bytes
        if (! iter.hasNext()) {
            // truncated
            return '�';
        }
        int d = iter.next();
        if ((d & 0b11_000000) != 0b10_000000) {
            // fourth byte is invalid; return � instead
            seekToNext();
            return '�';
        }
        if ((a & 0b11111_000) == 0b11110_000) {
            // four bytes
            return (a & 0b00000_111) << 18 | (b & 0b00_111111) << 12 | (c & 0b00_111111) << 6 | d & 0b00_111111;
        }
        // only invalid possibilities are left; return � instead
        seekToNext();
        return '�';
    }

    public int peekNext() throws NoSuchElementException {
        if (! iter.hasNext()) throw new NoSuchElementException();
        int a = iter.peekNext();
        if ((a & 0b1_0000000) == 0b0_0000000) {
            // one byte
            return a;
        }
        if ((a & 0b11_000000) == 0b10_000000) {
            // first byte is invalid; return � instead
            return '�';
        }
        // >= 2 bytes
        iter.next();
        if (! iter.hasNext()) {
            iter.previous();
            // truncated
            return '�';
        }
        int b = iter.peekNext();
        if ((b & 0b11_000000) != 0b10_000000) {
            // second byte is invalid; return � instead
            iter.previous();
            return '�';
        }
        if ((a & 0b111_00000) == 0b110_00000) {
            // two bytes
            iter.previous();
            return (a & 0b000_11111) << 6 | b & 0b00_111111;
        }
        // >= 3 bytes
        iter.next();
        if (! iter.hasNext()) {
            // truncated
            iter.previous();
            iter.previous();
            return '�';
        }
        int c = iter.peekNext();
        if ((c & 0b11_000000) != 0b10_000000) {
            // third byte is invalid; return � instead
            iter.previous();
            iter.previous();
            return '�';
        }
        if ((a & 0b1111_0000) == 0b1110_0000) {
            // three bytes
            iter.previous();
            iter.previous();
            return (a & 0b0000_1111) << 12 | (b & 0b00_111111) << 6 | c & 0b00_111111;
        }
        // >= 4 bytes
        iter.next();
        if (! iter.hasNext()) {
            // truncated
            iter.previous();
            iter.previous();
            iter.previous();
            return '�';
        }
        int d = iter.peekNext();
        if ((d & 0b11_000000) != 0b10_000000) {
            // fourth byte is invalid; return � instead
            iter.previous();
            iter.previous();
            iter.previous();
            return '�';
        }
        if ((a & 0b11111_000) == 0b11110_000) {
            // four bytes
            iter.previous();
            iter.previous();
            iter.previous();
            return (a & 0b00000_111) << 18 | (b & 0b00_111111) << 12 | (c & 0b00_111111) << 6 | d & 0b00_111111;
        }
        // only invalid possibilities are left; return � instead
        iter.previous();
        iter.previous();
        iter.previous();
        return '�';
    }

    public int previous() {
        // read backwards
        if (! iter.hasPrevious()) throw new NoSuchElementException();
        offset--;
        // >= 1 byte
        int a = iter.previous();
        if ((a & 0b1_0000000) == 0b0_0000000) {
            // one byte
            return a;
        }
        if ((a & 0b11_000000) != 0b10_000000) {
            // last byte is invalid; return � instead
            seekToPrev();
            return '�';
        }
        int cp = a & 0b00_111111;
        // >= 2 bytes
        a = iter.previous();
        if ((a & 0b111_00000) == 0b110_00000) {
            // two bytes
            return (a & 0b000_11111) << 6 | cp;
        }
        if ((a & 0b11_000000) != 0b10_000000) {
            // second-to-last byte is invalid; return � instead
            seekToPrev();
            return '�';
        }
        cp |= (a & 0b00_111111) << 6;
        // >= 3 bytes
        a = iter.previous();
        if ((a & 0b1111_0000) == 0b1110_0000) {
            // three bytes
            return (a & 0b0000_1111) << 12 | cp;
        }
        if ((a & 0b11_000000) != 0b10_000000) {
            // third-to-last byte is invalid; return � instead
            seekToPrev();
            return '�';
        }
        cp |= (a & 0b00_111111) << 12;
        // >= 4 bytes
        a = iter.previous();
        if ((a & 0b11111_000) == 0b11110_000) {
            // four bytes
            return (a & 0b00000_111) << 18 | cp;
        }
        // only invalid possibilities are left; return � instead
        seekToPrev();
        return '�';
    }

    public int peekPrevious() throws NoSuchElementException {
        // read backwards
        if (! iter.hasPrevious()) throw new NoSuchElementException();
        // >= 1 byte
        int a = iter.peekPrevious();
        if ((a & 0b1_0000000) == 0b0_0000000) {
            // one byte
            return a;
        }
        if ((a & 0b11_000000) != 0b10_000000) {
            // last byte is invalid; return � instead
            return '�';
        }
        int cp = a & 0b00_111111;
        // >= 2 bytes
        iter.previous();
        a = iter.peekPrevious();
        if ((a & 0b111_00000) == 0b110_00000) {
            // two bytes
            iter.next();
            return (a & 0b000_11111) << 6 | cp;
        }
        if ((a & 0b11_000000) != 0b10_000000) {
            // second-to-last byte is invalid; return � instead
            iter.next();
            return '�';
        }
        cp |= (a & 0b00_111111) << 6;
        // >= 3 bytes
        iter.previous();
        a = iter.peekPrevious();
        if ((a & 0b1111_0000) == 0b1110_0000) {
            // three bytes
            iter.next();
            iter.next();
            return (a & 0b0000_1111) << 12 | cp;
        }
        if ((a & 0b11_000000) != 0b10_000000) {
            // third-to-last byte is invalid; return � instead
            iter.next();
            iter.next();
            return '�';
        }
        cp |= (a & 0b00_111111) << 12;
        // >= 4 bytes
        iter.previous();
        a = iter.peekPrevious();
        if ((a & 0b11111_000) == 0b11110_000) {
            // four bytes
            iter.next();
            iter.next();
            iter.next();
            return (a & 0b00000_111) << 18 | cp;
        }
        // only invalid possibilities are left; return � instead
        iter.next();
        iter.next();
        iter.next();
        return '�';
    }

    public long getIndex() {
        return offset;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy