
com.oracle.truffle.api.strings.TruffleStringIterator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of truffle-api Show documentation
Show all versions of truffle-api Show documentation
Truffle is a multi-language framework for executing dynamic languages
that achieves high performance when combined with Graal.
/*
* Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
*
* Subject to the condition set forth below, permission is hereby granted to any
* person obtaining a copy of this software, associated documentation and/or
* data (collectively the "Software"), free of charge and under any and all
* copyright rights in the Software, and any and all patent rights owned or
* freely licensable by each licensor hereunder covering either (i) the
* unmodified Software as contributed to or provided by such licensor, or (ii)
* the Larger Works (as defined below), to deal in both
*
* (a) the Software, and
*
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
* one is included with the Software each a "Larger Work" to which the Software
* is contributed by such licensors),
*
* without restriction, including without limitation the rights to copy, create
* derivative works of, display, perform, and distribute the Software and make,
* use, sell, offer for sale, import, export, have made, and have sold the
* Software and the Larger Work(s), and to sublicense the foregoing rights on
* either these or other terms.
*
* This license is subject to the following condition:
*
* The above copyright notice and either this complete permission notice or at a
* minimum a reference to the UPL must be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.oracle.truffle.api.strings;
import static com.oracle.truffle.api.strings.TStringGuards.isBuiltin;
import static com.oracle.truffle.api.strings.TStringGuards.isDefaultVariant;
import static com.oracle.truffle.api.strings.TStringGuards.isReturnNegative;
import com.oracle.truffle.api.CompilerAsserts;
import com.oracle.truffle.api.CompilerDirectives;
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.dsl.Cached.Shared;
import com.oracle.truffle.api.dsl.NeverDefault;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.nodes.Node;
import com.oracle.truffle.api.profiles.InlinedConditionProfile;
import com.oracle.truffle.api.strings.TruffleString.Encoding;
// Checkstyle: stop
/**
* An iterator object that allows iterating over a {@link TruffleString}'s codepoints, without
* having to re-calculate codepoint indices on every access.
*
* Usage Example:
*
*
*
* {@code
* abstract static class SomeNode extends Node {
*
* @Specialization
* static void someSpecialization(
* TruffleString string,
* @Cached TruffleString.CreateCodePointIteratorNode createCodePointIteratorNode,
* @Cached TruffleStringIterator.NextNode nextNode,
* @Cached TruffleString.CodePointLengthNode codePointLengthNode,
* @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode) {
*
* // iterating over a string's code points using TruffleStringIterator
* TruffleStringIterator iterator = createCodePointIteratorNode.execute(string, Encoding.UTF_8);
* while (iterator.hasNext()) {
* System.out.printf("%x%n", nextNode.execute(iterator));
* }
*
* // uncached variant:
* TruffleStringIterator iterator2 = string.createCodePointIteratorUncached(Encoding.UTF_8);
* while (iterator2.hasNext()) {
* System.out.printf("%x%n", iterator2.nextUncached());
* }
*
* // suboptimal variant: using CodePointAtIndexNode in a loop
* int codePointLength = codePointLengthNode.execute(string, Encoding.UTF_8);
* for (int i = 0; i < codePointLength; i++) {
* // performance problem: codePointAtIndexNode may have to calculate the byte index
* // corresponding
* // to codepoint index i for every loop iteration
* System.out.printf("%x%n", codePointAtIndexNode.execute(string, i, Encoding.UTF_8));
* }
* }
* }
* }
*
*
*
* @since 22.1
*/
// Checkstyle: resume
public final class TruffleStringIterator {
final AbstractTruffleString a;
final Object arrayA;
final byte codeRangeA;
final Encoding encoding;
final TruffleString.ErrorHandling errorHandling;
private int rawIndex;
TruffleStringIterator(AbstractTruffleString a, Object arrayA, int codeRangeA, Encoding encoding, TruffleString.ErrorHandling errorHandling, int rawIndex) {
assert TSCodeRange.isCodeRange(codeRangeA);
this.a = a;
this.arrayA = arrayA;
this.codeRangeA = (byte) codeRangeA;
this.encoding = encoding;
this.errorHandling = errorHandling;
this.rawIndex = rawIndex;
}
/**
* Returns {@code true} if there are more codepoints remaining.
*
* @since 22.1
*/
public boolean hasNext() {
return rawIndex < a.length();
}
/**
* Returns {@code true} if there are more codepoints remaining in reverse direction.
*
* @since 22.1
*/
public boolean hasPrevious() {
return rawIndex > 0;
}
/**
* Returns the next codepoint's byte index, where "byte index" refers the codepoint's first byte
* in forward mode, while in backward mode it refers to the first byte after the
* codepoint.
*
* @since 22.3
*/
public int getByteIndex() {
return rawIndex << encoding.naturalStride;
}
private int applyErrorHandler(DecodingErrorHandler errorHandler, int startIndex) {
return applyErrorHandler(errorHandler, startIndex, true);
}
private int applyErrorHandlerReverse(DecodingErrorHandler errorHandler, int startIndex) {
return applyErrorHandler(errorHandler, startIndex, false);
}
private int applyErrorHandler(DecodingErrorHandler errorHandler, int startIndex, boolean forward) {
CompilerAsserts.partialEvaluationConstant(errorHandler);
CompilerAsserts.partialEvaluationConstant(forward);
if (isReturnNegative(errorHandler)) {
return -1;
} else if (isBuiltin(errorHandler)) {
return Encodings.invalidCodepoint();
}
int byteEnd = getByteIndex();
rawIndex = startIndex;
int byteStart = getByteIndex();
int estimatedByteLength = forward ? byteEnd - byteStart : byteStart - byteEnd;
DecodingErrorHandler.Result result = errorHandler.apply(a, byteStart, estimatedByteLength);
errorHandlerSkipBytes(result.byteLength(), forward);
return result.codepoint();
}
void errorHandlerSkipBytes(int byteLength, boolean forward) {
int rawLength = byteLength >> encoding.naturalStride;
if (rawLength == 0) {
CompilerDirectives.transferToInterpreterAndInvalidate();
throw InternalErrors.illegalState("custom error handler consumed less than one char / int value");
}
if (forward) {
rawIndex += rawLength;
if (Integer.compareUnsigned(rawIndex, a.length()) > 0) {
CompilerDirectives.transferToInterpreterAndInvalidate();
throw InternalErrors.illegalState("custom error handler consumed more bytes than string length");
}
} else {
rawIndex -= rawLength;
if (rawIndex < 0) {
CompilerDirectives.transferToInterpreterAndInvalidate();
throw InternalErrors.illegalState("custom error handler consumed more bytes than string length");
}
}
}
abstract static class InternalNextNode extends AbstractInternalNode {
final int execute(Node node, TruffleStringIterator it) {
return execute(node, it, DecodingErrorHandler.DEFAULT);
}
final int execute(Node node, TruffleStringIterator it, DecodingErrorHandler errorHandler) {
if (!it.hasNext()) {
throw InternalErrors.illegalState("end of string has been reached already");
}
CompilerAsserts.partialEvaluationConstant(errorHandler);
return executeInternal(node, it, errorHandler);
}
abstract int executeInternal(Node node, TruffleStringIterator it, DecodingErrorHandler errorHandler);
@Specialization(guards = {"isUTF32(it.encoding) || isFixedWidth(it.codeRangeA)", "isDefaultVariant(errorHandler)"})
static int fixed(Node node, TruffleStringIterator it, @SuppressWarnings("unused") DecodingErrorHandler errorHandler,
@Shared("readRaw") @Cached TStringOpsNodes.RawReadValueNode readNode) {
return readAndInc(node, it, readNode);
}
@Specialization(guards = {"isUpToValidFixedWidth(it.codeRangeA)"})
static int fixedValid(Node node, TruffleStringIterator it, @SuppressWarnings("unused") DecodingErrorHandler errorHandler,
@Shared("readRaw") @Cached TStringOpsNodes.RawReadValueNode readNode) {
return readAndInc(node, it, readNode);
}
@Specialization(guards = {"isAscii(it.encoding)", "isBroken(it.codeRangeA)", "!isDefaultVariant(errorHandler)"})
static int brokenAscii(Node node, TruffleStringIterator it, DecodingErrorHandler errorHandler,
@Shared("readRaw") @Cached TStringOpsNodes.RawReadValueNode readNode) {
int codepoint = readAndInc(node, it, readNode);
if (codepoint < 0x80) {
return codepoint;
} else {
return it.applyErrorHandler(errorHandler, it.rawIndex - 1);
}
}
@Specialization(guards = {"isUTF32(it.encoding)", "isBroken(it.codeRangeA)", "!isDefaultVariant(errorHandler)"})
static int brokenUTF32(Node node, TruffleStringIterator it, @SuppressWarnings("unused") DecodingErrorHandler errorHandler,
@Shared("readRaw") @Cached TStringOpsNodes.RawReadValueNode readNode) {
int codepoint = readAndInc(node, it, readNode);
if (Encodings.isValidUnicodeCodepoint(codepoint)) {
return codepoint;
} else {
return it.applyErrorHandler(errorHandler, it.rawIndex - 1);
}
}
@SuppressWarnings("fallthrough")
@Specialization(guards = {"isUTF8(it.encoding)", "isValid(it.codeRangeA)"})
static int utf8Valid(TruffleStringIterator it, @SuppressWarnings("unused") DecodingErrorHandler errorHandler) {
int b = it.readAndIncS0();
if (b < 0x80) {
return b;
}
int nBytes = Integer.numberOfLeadingZeros(~(b << 24));
int codepoint = b & (0xff >>> nBytes);
assert 1 < nBytes && nBytes < 5 : nBytes;
assert it.rawIndex + nBytes - 1 <= it.a.length();
// Checkstyle: stop
switch (nBytes) {
case 4:
assert it.curIsUtf8ContinuationByte();
codepoint = codepoint << 6 | (it.readAndIncS0() & 0x3f);
case 3:
assert it.curIsUtf8ContinuationByte();
codepoint = codepoint << 6 | (it.readAndIncS0() & 0x3f);
default:
assert it.curIsUtf8ContinuationByte();
codepoint = codepoint << 6 | (it.readAndIncS0() & 0x3f);
}
// Checkstyle: resume
return codepoint;
}
@Specialization(guards = {"isUTF8(it.encoding)", "isBroken(it.codeRangeA)"})
static int utf8Broken(TruffleStringIterator it, DecodingErrorHandler errorHandler) {
int startIndex = it.rawIndex;
int b = it.readAndIncS0();
if (b < 0x80) {
return b;
}
int nBytes = Encodings.utf8CodePointLength(b);
int codepoint = b & (0xff >>> nBytes);
/*
* Copyright (c) 2008-2010 Bjoern Hoehrmann See
* http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
*/
byte[] stateMachine = Encodings.getUTF8DecodingStateMachine(errorHandler);
int type = stateMachine[b];
int state = stateMachine[256 + type];
if (state != Encodings.UTF8_REJECT) {
int maxIndex = Math.min(it.a.length(), it.rawIndex - 1 + nBytes);
while (it.rawIndex < maxIndex) {
b = it.readFwdS0();
type = stateMachine[b];
state = stateMachine[256 + state + type];
if (state == Encodings.UTF8_REJECT) {
break;
}
codepoint = (b & 0x3f) | (codepoint << 6);
it.rawIndex++;
}
}
if (state == Encodings.UTF8_ACCEPT) {
return codepoint;
} else if (isDefaultVariant(errorHandler)) {
if (errorHandler == DecodingErrorHandler.DEFAULT) {
it.rawIndex = startIndex + 1;
}
return Encodings.invalidCodepoint();
} else {
if (errorHandler == DecodingErrorHandler.RETURN_NEGATIVE) {
it.rawIndex = startIndex + 1;
}
return it.applyErrorHandler(errorHandler, startIndex);
}
}
@Specialization(guards = {"isUTF16(it.encoding)", "isValid(it.codeRangeA)"})
static int utf16Valid(TruffleStringIterator it, @SuppressWarnings("unused") DecodingErrorHandler errorHandler) {
char c = (char) it.readAndIncS1();
if (Encodings.isUTF16HighSurrogate(c)) {
assert it.hasNext();
assert Encodings.isUTF16LowSurrogate(it.readFwdS1());
return Character.toCodePoint(c, (char) it.readAndIncS1());
}
return c;
}
@Specialization(guards = {"isUTF16(it.encoding)", "isBroken(it.codeRangeA)"})
static int utf16Broken(TruffleStringIterator it, DecodingErrorHandler errorHandler) {
char c = (char) it.readAndIncS1();
if (isReturnNegative(errorHandler) || !isBuiltin(errorHandler)) {
if (Encodings.isUTF16Surrogate(c)) {
if (Encodings.isUTF16HighSurrogate(c) && it.hasNext()) {
char c2 = (char) it.readFwdS1();
if (Encodings.isUTF16LowSurrogate(c2)) {
it.rawIndex++;
return Character.toCodePoint(c, c2);
}
}
return it.applyErrorHandler(errorHandler, it.rawIndex - 1);
}
} else {
assert isDefaultVariant(errorHandler);
if (Encodings.isUTF16HighSurrogate(c) && it.hasNext()) {
char c2 = (char) it.readFwdS1();
if (Encodings.isUTF16LowSurrogate(c2)) {
it.rawIndex++;
return Character.toCodePoint(c, c2);
}
}
}
return c;
}
@Specialization(guards = {"isUnsupportedEncoding(it.encoding)"})
static int unsupported(TruffleStringIterator it, DecodingErrorHandler errorHandler) {
assert it.hasNext();
byte[] bytes = JCodings.asByteArray(it.arrayA);
int startIndex = it.rawIndex;
int p = it.a.byteArrayOffset() + it.rawIndex;
int end = it.a.byteArrayOffset() + it.a.length();
JCodings.Encoding jCoding = JCodings.getInstance().get(it.encoding);
int length = JCodings.getInstance().getCodePointLength(jCoding, bytes, p, end);
int codepoint = 0;
if (length < 1) {
if (length < -1) {
// broken multibyte codepoint at end of string
it.rawIndex = it.a.length();
} else {
it.rawIndex++;
}
} else {
it.rawIndex += length;
codepoint = JCodings.getInstance().readCodePoint(jCoding, bytes, p, end, errorHandler);
}
if (length < 1 || !JCodings.getInstance().isValidCodePoint(jCoding, codepoint)) {
return it.applyErrorHandler(errorHandler, startIndex);
}
return codepoint;
}
}
/**
* Returns the next codepoint in the string.
*
* @since 22.1
*/
public abstract static class NextNode extends AbstractPublicNode {
NextNode() {
}
/**
* Returns the next codepoint in the string.
*
* @since 22.1
*/
public abstract int execute(TruffleStringIterator it);
@Specialization
final int doDefault(TruffleStringIterator it,
@Cached InternalNextNode nextNode,
@Cached InlinedConditionProfile errorHandlerProfile) {
// make sure the error handler is PE constant
if (errorHandlerProfile.profile(this, it.errorHandling == TruffleString.ErrorHandling.BEST_EFFORT)) {
return nextNode.execute(this, it, DecodingErrorHandler.DEFAULT);
} else {
return nextNode.execute(this, it, DecodingErrorHandler.RETURN_NEGATIVE);
}
}
/**
* Create a new {@link NextNode}.
*
* @since 22.1
*/
@NeverDefault
public static NextNode create() {
return TruffleStringIteratorFactory.NextNodeGen.create();
}
/**
* Get the uncached version of {@link NextNode}.
*
* @since 22.1
*/
public static NextNode getUncached() {
return TruffleStringIteratorFactory.NextNodeGen.getUncached();
}
}
/**
* Shorthand for calling the uncached version of {@link NextNode}.
*
* @since 22.1
*/
@TruffleBoundary
public int nextUncached() {
return NextNode.getUncached().execute(this);
}
/**
* Returns the previous codepoint in the string.
*
* @since 22.1
*/
public abstract static class PreviousNode extends AbstractPublicNode {
PreviousNode() {
}
/**
* Returns the previous codepoint in the string.
*
* @since 22.1
*/
public abstract int execute(TruffleStringIterator it);
@Specialization
final int doDefault(TruffleStringIterator it,
@Cached InternalPreviousNode previousNode,
@Cached InlinedConditionProfile errorHandlerProfile) {
// make sure the error handler is PE constant
if (errorHandlerProfile.profile(this, it.errorHandling == TruffleString.ErrorHandling.BEST_EFFORT)) {
return previousNode.execute(this, it, DecodingErrorHandler.DEFAULT);
} else {
return previousNode.execute(this, it, DecodingErrorHandler.RETURN_NEGATIVE);
}
}
/**
* Create a new {@link PreviousNode}.
*
* @since 22.1
*/
@NeverDefault
public static PreviousNode create() {
return TruffleStringIteratorFactory.PreviousNodeGen.create();
}
/**
* Get the uncached version of {@link PreviousNode}.
*
* @since 22.1
*/
public static PreviousNode getUncached() {
return TruffleStringIteratorFactory.PreviousNodeGen.getUncached();
}
}
abstract static class InternalPreviousNode extends AbstractInternalNode {
InternalPreviousNode() {
}
public final int execute(Node node, TruffleStringIterator it, DecodingErrorHandler errorHandler) {
if (!it.hasPrevious()) {
throw InternalErrors.illegalState("beginning of string has been reached already");
}
return executeInternal(node, it, errorHandler);
}
abstract int executeInternal(Node node, TruffleStringIterator it, DecodingErrorHandler errorHandler);
@Specialization(guards = {"isFixedWidth(it.codeRangeA)", "isDefaultVariant(errorHandler)"})
static int fixed(Node node, TruffleStringIterator it, @SuppressWarnings("unused") DecodingErrorHandler errorHandler,
@Shared("readRaw") @Cached TStringOpsNodes.RawReadValueNode readNode) {
return readAndDec(node, it, readNode);
}
@Specialization(guards = {"isUpToValidFixedWidth(it.codeRangeA)", "!isDefaultVariant(errorHandler)"})
static int fixedValid(Node node, TruffleStringIterator it, @SuppressWarnings("unused") DecodingErrorHandler errorHandler,
@Shared("readRaw") @Cached TStringOpsNodes.RawReadValueNode readNode) {
return readAndDec(node, it, readNode);
}
@Specialization(guards = {"isAscii(it.encoding)", "isBroken(it.codeRangeA)", "!isDefaultVariant(errorHandler)"})
static int brokenAscii(Node node, TruffleStringIterator it, DecodingErrorHandler errorHandler,
@Shared("readRaw") @Cached TStringOpsNodes.RawReadValueNode readNode) {
int codepoint = readAndDec(node, it, readNode);
if (codepoint < 0x80) {
return codepoint;
} else {
return it.applyErrorHandlerReverse(errorHandler, it.rawIndex + 1);
}
}
@Specialization(guards = {"isUTF32(it.encoding)", "isBroken(it.codeRangeA)", "!isDefaultVariant(errorHandler)"})
static int brokenUTF32(Node node, TruffleStringIterator it, DecodingErrorHandler errorHandler,
@Shared("readRaw") @Cached TStringOpsNodes.RawReadValueNode readNode) {
int codepoint = readAndDec(node, it, readNode);
if (Encodings.isValidUnicodeCodepoint(codepoint)) {
return codepoint;
} else {
return it.applyErrorHandlerReverse(errorHandler, it.rawIndex + 1);
}
}
@Specialization(guards = {"isUTF8(it.encoding)", "isValid(it.codeRangeA)"})
static int utf8Valid(TruffleStringIterator it, @SuppressWarnings("unused") DecodingErrorHandler errorHandler) {
int b = it.readAndDecS0();
if (b < 0x80) {
return b;
}
assert Encodings.isUTF8ContinuationByte(b);
int codepoint = b & 0x3f;
for (int j = 1; j < 4; j++) {
b = it.readAndDecS0();
if (j < 3 && Encodings.isUTF8ContinuationByte(b)) {
codepoint |= (b & 0x3f) << (6 * j);
} else {
break;
}
}
int nBytes = Integer.numberOfLeadingZeros(~(b << 24));
assert 1 < nBytes && nBytes < 5 : nBytes;
return codepoint | (b & (0xff >>> nBytes)) << (6 * (nBytes - 1));
}
@Specialization(guards = {"isUTF8(it.encoding)", "isBroken(it.codeRangeA)"})
static int utf8Broken(TruffleStringIterator it, DecodingErrorHandler errorHandler) {
int startIndex = it.rawIndex;
int b = it.readAndDecS0();
if (b < 0x80) {
return b;
}
int codepoint = b & 0x3f;
byte[] stateMachine = Encodings.getUTF8DecodingStateMachineReverse(errorHandler);
int type = stateMachine[b];
int state = stateMachine[256 + type];
int shift = 6;
assert state != Encodings.UTF8_ACCEPT;
if (state > Encodings.UTF8_REVERSE_INCOMPLETE_SEQ) {
while (it.rawIndex > 0) {
b = it.readAndDecS0();
type = stateMachine[b];
state = stateMachine[256 + state + type];
if (state <= Encodings.UTF8_REVERSE_INCOMPLETE_SEQ) {
// breaks on ACCEPT, REJECT and INCOMPLETE_SEQ
break;
}
codepoint |= (b & 0x3f) << shift;
shift += 6;
}
}
if (state == Encodings.UTF8_ACCEPT) {
return (((0xff >> type) & b) << shift) | codepoint;
} else if (isDefaultVariant(errorHandler)) {
if (errorHandler == DecodingErrorHandler.DEFAULT || state != Encodings.UTF8_REVERSE_INCOMPLETE_SEQ) {
it.rawIndex = startIndex - 1;
}
return Encodings.invalidCodepoint();
} else {
if (errorHandler == DecodingErrorHandler.RETURN_NEGATIVE) {
it.rawIndex = startIndex - 1;
}
return it.applyErrorHandler(errorHandler, startIndex);
}
}
@Specialization(guards = {"isUTF16(it.encoding)", "isValid(it.codeRangeA)"})
static int utf16Valid(TruffleStringIterator it, @SuppressWarnings("unused") DecodingErrorHandler errorHandler) {
char c = (char) it.readAndDecS1();
if (Encodings.isUTF16LowSurrogate(c)) {
assert Encodings.isUTF16HighSurrogate((char) it.readBckS1());
return Character.toCodePoint((char) it.readAndDecS1(), c);
}
return c;
}
@Specialization(guards = {"isUTF16(it.encoding)", "isBroken(it.codeRangeA)"})
static int utf16Broken(TruffleStringIterator it, DecodingErrorHandler errorHandler) {
char c = (char) it.readAndDecS1();
if (isReturnNegative(errorHandler) || !isBuiltin(errorHandler)) {
if (Encodings.isUTF16Surrogate(c)) {
if (Encodings.isUTF16LowSurrogate(c) && it.hasPrevious()) {
char c2 = (char) it.readBckS1();
if (Encodings.isUTF16HighSurrogate(c2)) {
it.rawIndex--;
return Character.toCodePoint(c2, c);
}
}
return it.applyErrorHandlerReverse(errorHandler, it.rawIndex + 1);
}
} else {
if (Encodings.isUTF16LowSurrogate(c) && it.hasPrevious()) {
char c2 = (char) it.readBckS1();
if (Encodings.isUTF16HighSurrogate(c2)) {
it.rawIndex--;
return Character.toCodePoint(c2, c);
}
}
}
return c;
}
@Specialization(guards = {"isUnsupportedEncoding(it.encoding)"})
static int unsupported(TruffleStringIterator it, DecodingErrorHandler errorHandler) {
assert it.hasPrevious();
byte[] bytes = JCodings.asByteArray(it.arrayA);
int start = it.a.byteArrayOffset();
int index = it.a.byteArrayOffset() + it.rawIndex;
int end = it.a.byteArrayOffset() + it.a.length();
JCodings.Encoding jCoding = JCodings.getInstance().get(it.encoding);
int prevIndex = JCodings.getInstance().getPreviousCodePointIndex(jCoding, bytes, start, index, end);
int codepoint = 0;
if (prevIndex < 0) {
it.rawIndex--;
} else {
assert prevIndex >= it.a.byteArrayOffset();
assert prevIndex < index;
it.rawIndex = prevIndex - it.a.byteArrayOffset();
codepoint = JCodings.getInstance().readCodePoint(jCoding, bytes, prevIndex, end, errorHandler);
}
if (prevIndex < 0 || !JCodings.getInstance().isValidCodePoint(jCoding, codepoint)) {
return it.applyErrorHandlerReverse(errorHandler, index);
}
return codepoint;
}
}
/**
* Shorthand for calling the uncached version of {@link PreviousNode}.
*
* @since 22.1
*/
@TruffleBoundary
public int previousUncached() {
return PreviousNode.getUncached().execute(this);
}
int getRawIndex() {
return rawIndex;
}
void setRawIndex(int i) {
rawIndex = i;
}
private int readFwdS0() {
assert a.stride() == 0;
assert hasNext();
return TStringOps.readS0(a, arrayA, rawIndex);
}
private int readFwdS1() {
assert a.stride() == 1;
assert hasNext();
return TStringOps.readS1(a, arrayA, rawIndex);
}
private int readBckS1() {
assert a.stride() == 1;
assert hasPrevious();
return TStringOps.readS1(a, arrayA, rawIndex - 1);
}
private static int readAndInc(Node node, TruffleStringIterator it, TStringOpsNodes.RawReadValueNode readNode) {
assert it.hasNext();
return readNode.execute(node, it.a, it.arrayA, it.rawIndex++);
}
private int readAndIncS0() {
assert a.stride() == 0;
assert hasNext();
return TStringOps.readS0(a, arrayA, rawIndex++);
}
private int readAndIncS1() {
assert a.stride() == 1;
assert hasNext();
return TStringOps.readS1(a, arrayA, rawIndex++);
}
private static int readAndDec(Node node, TruffleStringIterator it, TStringOpsNodes.RawReadValueNode readNode) {
assert it.hasPrevious();
return readNode.execute(node, it.a, it.arrayA, --it.rawIndex);
}
private int readAndDecS0() {
assert a.stride() == 0;
assert hasPrevious();
return TStringOps.readS0(a, arrayA, --rawIndex);
}
private int readAndDecS1() {
assert a.stride() == 1;
assert hasPrevious();
return TStringOps.readS1(a, arrayA, --rawIndex);
}
private boolean curIsUtf8ContinuationByte() {
return Encodings.isUTF8ContinuationByte(readFwdS0());
}
static int indexOf(Node location, TruffleStringIterator it, int codepoint, int fromIndex, int toIndex, InternalNextNode nextNode) {
int aCodepointIndex = 0;
while (aCodepointIndex < fromIndex && it.hasNext()) {
nextNode.execute(location, it);
aCodepointIndex++;
TStringConstants.truffleSafePointPoll(location, aCodepointIndex);
}
if (aCodepointIndex < fromIndex) {
return -1;
}
while (it.hasNext() && aCodepointIndex < toIndex) {
if (nextNode.execute(location, it) == codepoint) {
return aCodepointIndex;
}
aCodepointIndex++;
TStringConstants.truffleSafePointPoll(location, aCodepointIndex);
}
return -1;
}
static int lastIndexOf(Node location, TruffleStringIterator it, int codepoint, int fromIndex, int toIndex, InternalNextNode nextNode) {
int aCodepointIndex = 0;
int result = -1;
// the code point index is based on the beginning of the string, so we have to count
// from there
while (aCodepointIndex < fromIndex && it.hasNext()) {
if (nextNode.execute(location, it) == codepoint) {
result = aCodepointIndex;
}
aCodepointIndex++;
TStringConstants.truffleSafePointPoll(location, aCodepointIndex);
}
if (aCodepointIndex < toIndex) {
// fromIndex was out of bounds
return -1;
}
return result;
}
static int indexOfString(Node node, TruffleStringIterator aIt, TruffleStringIterator bIt, int fromIndex, int toIndex, InternalNextNode nextNodeA, InternalNextNode nextNodeB) {
if (!bIt.hasNext()) {
return fromIndex;
}
int aCodepointIndex = 0;
while (aCodepointIndex < fromIndex && aIt.hasNext()) {
nextNodeA.execute(node, aIt);
aCodepointIndex++;
TStringConstants.truffleSafePointPoll(node, aCodepointIndex);
}
if (aCodepointIndex < fromIndex) {
return -1;
}
int bFirst = nextNodeB.execute(node, bIt);
int bSecondIndex = bIt.getRawIndex();
while (aIt.hasNext() && aCodepointIndex < toIndex) {
if (nextNodeA.execute(node, aIt) == bFirst) {
if (!bIt.hasNext()) {
return aCodepointIndex;
}
int aCurIndex = aIt.getRawIndex();
int innerLoopCount = 0;
while (bIt.hasNext()) {
if (!aIt.hasNext()) {
return -1;
}
if (nextNodeA.execute(node, aIt) != nextNodeB.execute(node, bIt)) {
break;
}
if (!bIt.hasNext()) {
return aCodepointIndex;
}
TStringConstants.truffleSafePointPoll(node, ++innerLoopCount);
}
aIt.setRawIndex(aCurIndex);
bIt.setRawIndex(bSecondIndex);
}
aCodepointIndex++;
TStringConstants.truffleSafePointPoll(node, aCodepointIndex);
}
return -1;
}
static int byteIndexOfString(Node node, TruffleStringIterator aIt, TruffleStringIterator bIt, int fromByteIndex, int toByteIndex,
InternalNextNode nextNodeA,
InternalNextNode nextNodeB) {
if (!bIt.hasNext()) {
return fromByteIndex;
}
aIt.setRawIndex(fromByteIndex);
int bFirst = nextNodeB.execute(node, bIt);
int bSecondIndex = bIt.getRawIndex();
int loopCount = 0;
while (aIt.hasNext() && aIt.getRawIndex() < toByteIndex) {
int ret = aIt.getRawIndex();
if (nextNodeA.execute(node, aIt) == bFirst) {
if (!bIt.hasNext()) {
return ret;
}
int aCurIndex = aIt.getRawIndex();
while (bIt.hasNext()) {
if (!aIt.hasNext()) {
return -1;
}
if (nextNodeA.execute(node, aIt) != nextNodeB.execute(node, bIt)) {
break;
}
if (!bIt.hasNext()) {
return ret;
}
TStringConstants.truffleSafePointPoll(node, ++loopCount);
}
aIt.setRawIndex(aCurIndex);
bIt.setRawIndex(bSecondIndex);
}
TStringConstants.truffleSafePointPoll(node, ++loopCount);
}
return -1;
}
static int lastIndexOfString(Node node, TruffleStringIterator aIt, TruffleStringIterator bIt, int fromIndex, int toIndex,
InternalNextNode nextNodeA,
InternalPreviousNode prevNodeA,
InternalPreviousNode prevNodeB) {
if (!bIt.hasPrevious()) {
return fromIndex;
}
int bFirstCodePoint = prevNodeB.execute(node, bIt, DecodingErrorHandler.DEFAULT);
int lastMatchIndex = -1;
int lastMatchByteIndex = -1;
int aCodepointIndex = 0;
while (aCodepointIndex < fromIndex && aIt.hasNext()) {
if (nextNodeA.execute(node, aIt) == bFirstCodePoint) {
lastMatchIndex = aCodepointIndex;
lastMatchByteIndex = aIt.getRawIndex();
}
aCodepointIndex++;
TStringConstants.truffleSafePointPoll(node, aCodepointIndex);
}
if (aCodepointIndex < fromIndex || lastMatchIndex < 0) {
return -1;
}
aCodepointIndex = lastMatchIndex;
aIt.setRawIndex(lastMatchByteIndex);
int bSecondIndex = bIt.getRawIndex();
while (aIt.hasPrevious() && aCodepointIndex >= toIndex) {
if (prevNodeA.execute(node, aIt, DecodingErrorHandler.DEFAULT) == bFirstCodePoint) {
if (!bIt.hasPrevious()) {
return aCodepointIndex;
}
int aCurIndex = aIt.getRawIndex();
int aCurCodePointIndex = aCodepointIndex;
while (bIt.hasPrevious()) {
if (!aIt.hasPrevious()) {
return -1;
}
if (prevNodeA.execute(node, aIt, DecodingErrorHandler.DEFAULT) != prevNodeB.execute(node, bIt, DecodingErrorHandler.DEFAULT)) {
break;
}
aCurCodePointIndex--;
if (!bIt.hasPrevious() && aCurCodePointIndex >= toIndex) {
return aCurCodePointIndex;
}
TStringConstants.truffleSafePointPoll(node, aCurCodePointIndex);
}
aIt.setRawIndex(aCurIndex);
bIt.setRawIndex(bSecondIndex);
}
aCodepointIndex--;
TStringConstants.truffleSafePointPoll(node, aCodepointIndex);
}
return -1;
}
static int lastByteIndexOfString(Node node, TruffleStringIterator aIt, TruffleStringIterator bIt, int fromByteIndex, int toByteIndex,
InternalNextNode nextNodeA,
InternalPreviousNode prevNodeA,
InternalPreviousNode prevNodeB) {
if (!bIt.hasPrevious()) {
return fromByteIndex;
}
int bFirstCodePoint = prevNodeB.execute(node, bIt, DecodingErrorHandler.DEFAULT);
int lastMatchByteIndex = -1;
int loopCount = 0;
while (aIt.getRawIndex() < fromByteIndex && aIt.hasNext()) {
if (nextNodeA.execute(node, aIt) == bFirstCodePoint) {
lastMatchByteIndex = aIt.getRawIndex();
}
TStringConstants.truffleSafePointPoll(node, ++loopCount);
}
if (aIt.getRawIndex() < fromByteIndex || lastMatchByteIndex < 0) {
return -1;
}
aIt.setRawIndex(lastMatchByteIndex);
int bSecondIndex = bIt.getRawIndex();
while (aIt.hasPrevious() && aIt.getRawIndex() > toByteIndex) {
if (prevNodeA.execute(node, aIt, DecodingErrorHandler.DEFAULT) == bFirstCodePoint) {
if (!bIt.hasPrevious()) {
return aIt.getRawIndex();
}
int aCurIndex = aIt.getRawIndex();
while (bIt.hasPrevious()) {
if (!aIt.hasPrevious()) {
return -1;
}
if (prevNodeA.execute(node, aIt, DecodingErrorHandler.DEFAULT) != prevNodeB.execute(node, bIt, DecodingErrorHandler.DEFAULT)) {
break;
}
if (!bIt.hasPrevious() && aIt.getRawIndex() >= toByteIndex) {
return aIt.getRawIndex();
}
TStringConstants.truffleSafePointPoll(node, ++loopCount);
}
aIt.setRawIndex(aCurIndex);
bIt.setRawIndex(bSecondIndex);
}
TStringConstants.truffleSafePointPoll(node, ++loopCount);
}
return -1;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy