All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.oracle.truffle.api.strings.TruffleString Maven / Gradle / Ivy

Go to download

Truffle is a multi-language framework for executing dynamic languages that achieves high performance when combined with Graal.

There is a newer version: 24.1.1
Show newest version
/*
 * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * The Universal Permissive License (UPL), Version 1.0
 *
 * Subject to the condition set forth below, permission is hereby granted to any
 * person obtaining a copy of this software, associated documentation and/or
 * data (collectively the "Software"), free of charge and under any and all
 * copyright rights in the Software, and any and all patent rights owned or
 * freely licensable by each licensor hereunder covering either (i) the
 * unmodified Software as contributed to or provided by such licensor, or (ii)
 * the Larger Works (as defined below), to deal in both
 *
 * (a) the Software, and
 *
 * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
 * one is included with the Software each a "Larger Work" to which the Software
 * is contributed by such licensors),
 *
 * without restriction, including without limitation the rights to copy, create
 * derivative works of, display, perform, and distribute the Software and make,
 * use, sell, offer for sale, import, export, have made, and have sold the
 * Software and the Larger Work(s), and to sublicense the foregoing rights on
 * either these or other terms.
 *
 * This license is subject to the following condition:
 *
 * The above copyright notice and either this complete permission notice or at a
 * minimum a reference to the UPL must be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package com.oracle.truffle.api.strings;

import static com.oracle.truffle.api.CompilerDirectives.CompilationFinal;
import static com.oracle.truffle.api.strings.TStringGuards.indexOfCannotMatch;
import static com.oracle.truffle.api.strings.TStringGuards.is16Bit;
import static com.oracle.truffle.api.strings.TStringGuards.is7Bit;
import static com.oracle.truffle.api.strings.TStringGuards.is7BitCompatible;
import static com.oracle.truffle.api.strings.TStringGuards.is8Bit;
import static com.oracle.truffle.api.strings.TStringGuards.is8BitCompatible;
import static com.oracle.truffle.api.strings.TStringGuards.isAscii;
import static com.oracle.truffle.api.strings.TStringGuards.isBrokenFixedWidth;
import static com.oracle.truffle.api.strings.TStringGuards.isBrokenMultiByte;
import static com.oracle.truffle.api.strings.TStringGuards.isBytes;
import static com.oracle.truffle.api.strings.TStringGuards.isFixedWidth;
import static com.oracle.truffle.api.strings.TStringGuards.isInlinedJavaString;
import static com.oracle.truffle.api.strings.TStringGuards.isLatin1;
import static com.oracle.truffle.api.strings.TStringGuards.isStride0;
import static com.oracle.truffle.api.strings.TStringGuards.isStride1;
import static com.oracle.truffle.api.strings.TStringGuards.isSupportedEncoding;
import static com.oracle.truffle.api.strings.TStringGuards.isUTF16;
import static com.oracle.truffle.api.strings.TStringGuards.isUTF16Or32;
import static com.oracle.truffle.api.strings.TStringGuards.isUTF32;
import static com.oracle.truffle.api.strings.TStringGuards.isUTF8;
import static com.oracle.truffle.api.strings.TStringGuards.isUnsupportedEncoding;
import static com.oracle.truffle.api.strings.TStringGuards.isValidFixedWidth;
import static com.oracle.truffle.api.strings.TStringGuards.isValidMultiByte;
import static com.oracle.truffle.api.strings.TStringGuards.littleEndian;

import java.lang.invoke.MethodHandles;
import java.lang.invoke.VarHandle;
import java.util.Arrays;
import java.util.BitSet;

import org.graalvm.collections.EconomicMap;
import org.graalvm.collections.EconomicSet;
import org.graalvm.collections.Equivalence;

import com.oracle.truffle.api.CompilerAsserts;
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.dsl.Cached.Shared;
import com.oracle.truffle.api.dsl.GeneratePackagePrivate;
import com.oracle.truffle.api.dsl.GenerateUncached;
import com.oracle.truffle.api.dsl.ImportStatic;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.dsl.UnsupportedSpecializationException;
import com.oracle.truffle.api.nodes.DenyReplace;
import com.oracle.truffle.api.nodes.Node;
import com.oracle.truffle.api.nodes.NodeCost;
import com.oracle.truffle.api.profiles.BranchProfile;
import com.oracle.truffle.api.profiles.ConditionProfile;
import com.oracle.truffle.api.profiles.IntValueProfile;

/**
 * Represents a primitive String type, which can be reused across languages. Language implementers
 * are encouraged to use Truffle Strings as their language's string type for easier interoperability
 * and better performance. Truffle strings can be encoded in a number of {@link Encoding encodings}.
 * A {@link TruffleString} object can cache multiple representations (in multiple encodings) of the
 * same string in the string object itself. A single {@link TruffleString} instance can also
 * represent the same string in multiple encodings, if the string's content would be equal in all
 * such encodings (e.g. a string containing only ASCII characters can be viewed as being encoded in
 * almost any encoding, since the encoded bytes would be equal). To facilitate this, all methods
 * have an {@code expectedEncoding} parameter to indicate which encoding a given string should be
 * viewed in.
 * 

* {@link TruffleString} instances can be created via one of the following nodes, or via * {@link TruffleStringBuilder}. *

    *
  • {@link FromByteArrayNode}
  • *
  • {@link FromCharArrayUTF16Node}
  • *
  • {@link FromJavaStringNode}
  • *
  • {@link FromIntArrayUTF32Node}
  • *
  • {@link FromNativePointerNode}
  • *
  • {@link FromCodePointNode}
  • *
  • {@link FromLongNode}
  • *
* * For iteration use {@link TruffleStringIterator}. There is a version of {@link TruffleString} that * is also mutable. See {@link MutableTruffleString} for details. *

* Please see the * tutorial * for further usage instructions. * * @since 22.1 */ public final class TruffleString extends AbstractTruffleString { private static final VarHandle NEXT_UPDATER = initializeNextUpdater(); @TruffleBoundary private static VarHandle initializeNextUpdater() { try { return MethodHandles.lookup().findVarHandle(TruffleString.class, "next", TruffleString.class); } catch (NoSuchFieldException | IllegalAccessException e) { throw new RuntimeException(e); } } private static final byte FLAG_CACHE_HEAD = (byte) 0x80; private final int codePointLength; private final byte codeRange; TruffleString next; private TruffleString(Object data, int offset, int length, int stride, Encoding encoding, int codePointLength, int codeRange, boolean isCacheHead) { super(data, offset, length, stride, encoding, isCacheHead ? FLAG_CACHE_HEAD : 0); assert codePointLength >= 0; assert validateCodeRange(encoding, codeRange); this.codePointLength = codePointLength; this.codeRange = (byte) codeRange; } private static TruffleString create(Object data, int offset, int length, int stride, Encoding encoding, int codePointLength, int codeRange, boolean isCacheHead) { TruffleString string = new TruffleString(data, offset, length, stride, encoding, codePointLength, codeRange, isCacheHead); if (AbstractTruffleString.DEBUG_ALWAYS_CREATE_JAVA_STRING) { string.toJavaStringUncached(); } return string; } private static boolean validateCodeRange(Encoding encoding, int codeRange) { assert isByte(codeRange); assert TSCodeRange.isCodeRange(codeRange); assert !isAscii(encoding) || is7Bit(codeRange) || isBrokenFixedWidth(codeRange); assert !isLatin1(encoding) || is7Bit(codeRange) || is8Bit(codeRange); assert !isUTF8(encoding) || !is8Bit(codeRange) && !is16Bit(codeRange) && !isValidFixedWidth(codeRange) && !isBrokenFixedWidth(codeRange); assert !isUTF16(encoding) || !isValidFixedWidth(codeRange) && !isBrokenFixedWidth(codeRange); assert !isUTF32(encoding) || !isValidMultiByte(codeRange) && !isBrokenMultiByte(codeRange); assert !isBytes(encoding) || is7Bit(codeRange) || isValidFixedWidth(codeRange); return true; } static TruffleString createFromByteArray(byte[] bytes, int length, int stride, Encoding encoding, int codePointLength, int codeRange) { return createFromByteArray(bytes, length, stride, encoding, codePointLength, codeRange, true); } static TruffleString createFromByteArray(byte[] bytes, int length, int stride, Encoding encoding, int codePointLength, int codeRange, boolean isCacheHead) { return createFromArray(bytes, 0, length, stride, encoding, codePointLength, codeRange, isCacheHead); } static TruffleString createFromArray(Object bytes, int offset, int length, int stride, Encoding encoding, int codePointLength, int codeRange) { return createFromArray(bytes, offset, length, stride, encoding, codePointLength, codeRange, true); } static TruffleString createFromArray(Object bytes, int offset, int length, int stride, Encoding encoding, int codePointLength, int codeRange, boolean isCacheHead) { assert bytes instanceof byte[] || isInlinedJavaString(bytes) || bytes instanceof NativePointer; assert offset >= 0; assert bytes instanceof NativePointer || offset + ((long) length << stride) <= TStringOps.byteLength(bytes); assert attrsAreCorrect(bytes, encoding, offset, length, codePointLength, codeRange, stride); if (DEBUG_NON_ZERO_OFFSET && bytes instanceof byte[]) { int byteLength = Math.toIntExact((long) length << stride); int add = byteLength; byte[] copy = new byte[add + byteLength]; System.arraycopy(bytes, offset, copy, add, byteLength); return TruffleString.create(copy, add, length, stride, encoding, codePointLength, codeRange, isCacheHead); } return TruffleString.create(bytes, offset, length, stride, encoding, codePointLength, codeRange, isCacheHead); } static TruffleString createConstant(byte[] bytes, int length, int stride, Encoding encoding, int codePointLength, int codeRange) { return createConstant(bytes, length, stride, encoding, codePointLength, codeRange, true); } static TruffleString createConstant(byte[] bytes, int length, int stride, Encoding encoding, int codePointLength, int codeRange, boolean isCacheHead) { TruffleString ret = createFromByteArray(bytes, length, stride, encoding, codePointLength, codeRange, isCacheHead); // eagerly compute cached hash ret.hashCode(); return ret; } static TruffleString createLazyLong(long value, Encoding encoding) { int length = NumberConversion.stringLengthLong(value); return TruffleString.create(new LazyLong(value), 0, length, 0, encoding, length, TSCodeRange.get7Bit(), true); } static TruffleString createLazyConcat(TruffleString a, TruffleString b, Encoding encoding, int length, int stride) { assert !TSCodeRange.isBrokenMultiByte(a.codeRange()); assert !TSCodeRange.isBrokenMultiByte(b.codeRange()); assert a.isLooselyCompatibleTo(encoding); assert b.isLooselyCompatibleTo(encoding); assert length == a.length() + b.length(); int codeRange = TSCodeRange.commonCodeRange(a.codeRange(), b.codeRange()); return TruffleString.create(new LazyConcat(a, b), 0, length, stride, encoding, a.codePointLength() + b.codePointLength(), codeRange, true); } static TruffleString createWrapJavaString(String str, int codePointLength, int codeRange) { int stride = TStringUnsafe.getJavaStringStride(str); return TruffleString.create(str, 0, str.length(), stride, Encoding.UTF_16, codePointLength, codeRange, false); } private static boolean attrsAreCorrect(Object bytes, Encoding encoding, int offset, int length, int codePointLength, int codeRange, int stride) { CompilerAsserts.neverPartOfCompilation(); if (length == 0) { int length0CodeRange = is7BitCompatible(encoding) ? TSCodeRange.get7Bit() : JCodings.getInstance().isSingleByte(encoding.jCoding) ? TSCodeRange.getValidFixedWidth() : TSCodeRange.getValidMultiByte(); return TStringOps.byteLength(bytes) == 0 && offset == 0 && codePointLength == 0 && codeRange == length0CodeRange && stride == 0; } int knownCodeRange = TSCodeRange.getUnknown(); if (isUTF16Or32(encoding) && stride == 0) { knownCodeRange = TSCodeRange.get8Bit(); } else if (isUTF32(encoding) && stride == 1) { knownCodeRange = TSCodeRange.get16Bit(); } if (bytes instanceof NativePointer) { ((NativePointer) bytes).materializeByteArray(length << stride, ConditionProfile.getUncached()); } long attrs = TStringInternalNodes.CalcStringAttributesNode.getUncached().execute(null, bytes, offset, length, stride, encoding, knownCodeRange); int cpLengthCalc = StringAttributes.getCodePointLength(attrs); int codeRangeCalc = StringAttributes.getCodeRange(attrs); assert cpLengthCalc == codePointLength : "inconsistent codePointLength: " + cpLengthCalc + " != " + codePointLength; assert codeRangeCalc == codeRange : "inconsistent codeRange: " + TSCodeRange.toString(codeRangeCalc) + " != " + TSCodeRange.toString(codeRange); return attrs == StringAttributes.create(codePointLength, codeRange); } boolean isLooselyCompatibleTo(Encoding expectedEncoding) { return isLooselyCompatibleTo(expectedEncoding.id, expectedEncoding.maxCompatibleCodeRange, codeRange()); } /** * Get this string's length in codepoints. */ int codePointLength() { return codePointLength; } /** * Get this string's code range as defined in {@link TSCodeRange}. */ int codeRange() { return codeRange; } boolean isCacheHead() { assert ((flags() & FLAG_CACHE_HEAD) != 0) == (flags() < 0); return flags() < 0; } TruffleString getCacheHead() { assert cacheRingIsValid(); TruffleString cur = next; if (cur == null) { assert isCacheHead(); return this; } while (!cur.isCacheHead()) { cur = cur.next; } return cur; } @TruffleBoundary void cacheInsert(TruffleString entry) { assert !entry.isCacheHead(); // the cache head does never change TruffleString cacheHead = getCacheHead(); assert !cacheEntryEquals(cacheHead, entry); TruffleString cacheHeadNext; do { cacheHeadNext = cacheHead.next; if (hasDuplicateEncoding(cacheHead, cacheHeadNext, entry)) { return; } entry.next = cacheHeadNext == null ? cacheHead : cacheHeadNext; } while (!setNextAtomic(cacheHead, cacheHeadNext, entry)); } /* * Simpler and faster insertion for the case `this` and `entry` were just allocated together and * before they are published. The CAS is not needed in that case since we know nobody could * write to `next` fields before us. */ void cacheInsertFirstBeforePublished(TruffleString entry) { assert !entry.isCacheHead(); assert isCacheHead(); assert next == null; TruffleString cacheHead = this; entry.next = cacheHead; cacheHead.next = entry; } private static boolean hasDuplicateEncoding(TruffleString cacheHead, TruffleString start, TruffleString insertEntry) { if (start == null) { return false; } TruffleString current = start; while (current != cacheHead) { if (cacheEntryEquals(insertEntry, current)) { return true; } current = current.next; } return false; } private static boolean cacheEntryEquals(TruffleString a, TruffleString b) { return b.encoding() == a.encoding() && (!isUTF16(a.encoding()) || b.isJavaString() == a.isJavaString()); } @TruffleBoundary private static boolean setNextAtomic(TruffleString cacheHead, TruffleString currentNext, TruffleString newNext) { return NEXT_UPDATER.compareAndSet(cacheHead, currentNext, newNext); } private boolean cacheRingIsValid() { CompilerAsserts.neverPartOfCompilation(); TruffleString head = null; TruffleString cur = this; boolean javaStringVisited = false; BitSet visitedEncodings = new BitSet(Encoding.values().length); EconomicSet visited = EconomicSet.create(Equivalence.IDENTITY_WITH_SYSTEM_HASHCODE); do { if (cur.isCacheHead()) { assert head == null : "multiple cache heads"; head = cur; } if (cur.isJavaString()) { assert !javaStringVisited : "duplicate cached java string"; javaStringVisited = true; } else { assert !visitedEncodings.get(cur.encoding()) : "duplicate encoding"; visitedEncodings.set(cur.encoding()); } assert visited.add(cur) : "not a ring structure"; cur = cur.next; } while (cur != this && cur != null); return true; } /** * The list of encodings supported by {@link TruffleString}. {@link TruffleString} is especially * optimized for the following encodings: *

    *
  • {@code UTF-32}: this means UTF-32 in your system's endianness. * {@link TruffleString} transparently compacts UTF-32 strings to 8-bit or 16-bit * representations, where possible.
  • *
  • {@code UTF-16}: this means UTF-16 in your system's endianness. * {@link TruffleString} transparently compacts UTF-16 strings to 8-bit representations, where * possible.
  • *
  • {@code UTF-8}
  • *
  • {@code ISO-8859-1}
  • *
  • {@code US-ASCII}
  • *
  • {@code BYTES}, which is essentially identical to US-ASCII, with the only difference being * that {@code BYTES} treats all byte values as valid codepoints.
  • *
*

*

* All other encodings are supported using the JRuby JCodings library, which incurs more * {@link TruffleBoundary} calls. NOTE: to enable support for these encodings, * {@code TruffleLanguage.Registration#needsAllEncodings()} must be set to {@code true} in the * truffle language's registration. * * @since 22.1 */ public enum Encoding { /* directly supported encodings */ /** * UTF-32LE. Directly supported if the current system is little-endian. * * @since 22.1 */ UTF_32LE(littleEndian() ? 0 : 97, "UTF_32LE", littleEndian() ? 2 : 0), /** * UTF-32BE. Directly supported if the current system is big-endian. * * @since 22.1 */ UTF_32BE(littleEndian() ? 97 : 0, "UTF_32BE", littleEndian() ? 0 : 2), /** * UTF-16LE. Directly supported if the current system is little-endian. * * @since 22.1 */ UTF_16LE(littleEndian() ? 1 : 98, "UTF_16LE", littleEndian() ? 1 : 0), /** * UTF-16BE. Directly supported if the current system is big-endian. * * @since 22.1 */ UTF_16BE(littleEndian() ? 98 : 1, "UTF_16BE", littleEndian() ? 0 : 1), /** * ISO-8859-1, also known as LATIN-1, which is equivalent to US-ASCII + the LATIN-1 * Supplement Unicode block. * * @since 22.1 */ ISO_8859_1(2, "ISO_8859_1"), /** * UTF-8. * * @since 22.1 */ UTF_8(3, "UTF_8"), /** * US-ASCII, which maps only 7-bit characters. * * @since 22.1 */ US_ASCII(4, "US_ASCII"), /** * Special "encoding" BYTES: This encoding is identical to US-ASCII, but treats all values * outside the us-ascii range as valid codepoints as well. Caution: no codepoint mappings * are defined for non-us-ascii values in this encoding, so {@link SwitchEncodingNode} will * replace all of them with {@code '?'} when converting from or to BYTES! To preserve all * bytes and "reinterpret" a BYTES string in another encoding, use * {@link ForceEncodingNode}. * * @since 22.1 */ BYTES(5, "BYTES"), /* encodings supported by falling back to JCodings */ /** * Big5. * * @since 22.1 */ Big5(6, "Big5"), /** * Big5-HKSCS. * * @since 22.1 */ Big5_HKSCS(7, "Big5_HKSCS"), /** * Big5-UAO. * * @since 22.1 */ Big5_UAO(8, "Big5_UAO"), /** * CP51932. * * @since 22.1 */ CP51932(9, "CP51932"), /** * CP850. * * @since 22.1 */ CP850(10, "CP850"), /** * CP852. * * @since 22.1 */ CP852(11, "CP852"), /** * CP855. * * @since 22.1 */ CP855(12, "CP855"), /** * CP949. * * @since 22.1 */ CP949(13, "CP949"), /** * CP950. * * @since 22.1 */ CP950(14, "CP950"), /** * CP951. * * @since 22.1 */ CP951(15, "CP951"), /** * EUC-JIS-2004. * * @since 22.1 */ EUC_JIS_2004(16, "EUC_JIS_2004"), /** * EUC-JP. * * @since 22.1 */ EUC_JP(17, "EUC_JP"), /** * EUC-KR. * * @since 22.1 */ EUC_KR(18, "EUC_KR"), /** * EUC-TW. * * @since 22.1 */ EUC_TW(19, "EUC_TW"), /** * Emacs-Mule. * * @since 22.1 */ Emacs_Mule(20, "Emacs_Mule"), /** * EucJP-ms. * * @since 22.1 */ EucJP_ms(21, "EucJP_ms"), /** * GB12345. * * @since 22.1 */ GB12345(22, "GB12345"), /** * GB18030. * * @since 22.1 */ GB18030(23, "GB18030"), /** * GB1988. * * @since 22.1 */ GB1988(24, "GB1988"), /** * GB2312. * * @since 22.1 */ GB2312(25, "GB2312"), /** * GBK. * * @since 22.1 */ GBK(26, "GBK"), /** * IBM437. * * @since 22.1 */ IBM437(27, "IBM437"), /** * IBM737. * * @since 22.1 */ IBM737(28, "IBM737"), /** * IBM775. * * @since 22.1 */ IBM775(29, "IBM775"), /** * IBM852. * * @since 22.1 */ IBM852(30, "IBM852"), /** * IBM855. * * @since 22.1 */ IBM855(31, "IBM855"), /** * IBM857. * * @since 22.1 */ IBM857(32, "IBM857"), /** * IBM860. * * @since 22.1 */ IBM860(33, "IBM860"), /** * IBM861. * * @since 22.1 */ IBM861(34, "IBM861"), /** * IBM862. * * @since 22.1 */ IBM862(35, "IBM862"), /** * IBM863. * * @since 22.1 */ IBM863(36, "IBM863"), /** * IBM864. * * @since 22.1 */ IBM864(37, "IBM864"), /** * IBM865. * * @since 22.1 */ IBM865(38, "IBM865"), /** * IBM866. * * @since 22.1 */ IBM866(39, "IBM866"), /** * IBM869. * * @since 22.1 */ IBM869(40, "IBM869"), /** * ISO-8859-10. * * @since 22.1 */ ISO_8859_10(41, "ISO_8859_10"), /** * ISO-8859-11. * * @since 22.1 */ ISO_8859_11(42, "ISO_8859_11"), /** * ISO-8859-13. * * @since 22.1 */ ISO_8859_13(43, "ISO_8859_13"), /** * ISO-8859-14. * * @since 22.1 */ ISO_8859_14(44, "ISO_8859_14"), /** * ISO-8859-15. * * @since 22.1 */ ISO_8859_15(45, "ISO_8859_15"), /** * ISO-8859-16. * * @since 22.1 */ ISO_8859_16(46, "ISO_8859_16"), /** * ISO-8859-2. * * @since 22.1 */ ISO_8859_2(47, "ISO_8859_2"), /** * ISO-8859-3. * * @since 22.1 */ ISO_8859_3(48, "ISO_8859_3"), /** * ISO-8859-4. * * @since 22.1 */ ISO_8859_4(49, "ISO_8859_4"), /** * ISO-8859-5. * * @since 22.1 */ ISO_8859_5(50, "ISO_8859_5"), /** * ISO-8859-6. * * @since 22.1 */ ISO_8859_6(51, "ISO_8859_6"), /** * ISO-8859-7. * * @since 22.1 */ ISO_8859_7(52, "ISO_8859_7"), /** * ISO-8859-8. * * @since 22.1 */ ISO_8859_8(53, "ISO_8859_8"), /** * ISO-8859-9. * * @since 22.1 */ ISO_8859_9(54, "ISO_8859_9"), /** * KOI8-R. * * @since 22.1 */ KOI8_R(55, "KOI8_R"), /** * KOI8-U. * * @since 22.1 */ KOI8_U(56, "KOI8_U"), /** * MacCentEuro. * * @since 22.1 */ MacCentEuro(57, "MacCentEuro"), /** * MacCroatian. * * @since 22.1 */ MacCroatian(58, "MacCroatian"), /** * MacCyrillic. * * @since 22.1 */ MacCyrillic(59, "MacCyrillic"), /** * MacGreek. * * @since 22.1 */ MacGreek(60, "MacGreek"), /** * MacIceland. * * @since 22.1 */ MacIceland(61, "MacIceland"), /** * MacJapanese. * * @since 22.1 */ MacJapanese(62, "MacJapanese"), /** * MacRoman. * * @since 22.1 */ MacRoman(63, "MacRoman"), /** * MacRomania. * * @since 22.1 */ MacRomania(64, "MacRomania"), /** * MacThai. * * @since 22.1 */ MacThai(65, "MacThai"), /** * MacTurkish. * * @since 22.1 */ MacTurkish(66, "MacTurkish"), /** * MacUkraine. * * @since 22.1 */ MacUkraine(67, "MacUkraine"), /** * SJIS-DoCoMo. * * @since 22.1 */ SJIS_DoCoMo(68, "SJIS_DoCoMo"), /** * SJIS-KDDI. * * @since 22.1 */ SJIS_KDDI(69, "SJIS_KDDI"), /** * SJIS-SoftBank. * * @since 22.1 */ SJIS_SoftBank(70, "SJIS_SoftBank"), /** * Shift-JIS. * * @since 22.1 */ Shift_JIS(71, "Shift_JIS"), /** * Stateless-ISO-2022-JP. * * @since 22.1 */ Stateless_ISO_2022_JP(72, "Stateless_ISO_2022_JP"), /** * Stateless-ISO-2022-JP-KDDI. * * @since 22.1 */ Stateless_ISO_2022_JP_KDDI(73, "Stateless_ISO_2022_JP_KDDI"), /** * TIS-620. * * @since 22.1 */ TIS_620(74, "TIS_620"), /** * UTF8-DoCoMo. * * @since 22.1 */ UTF8_DoCoMo(75, "UTF8_DoCoMo"), /** * UTF8-KDDI. * * @since 22.1 */ UTF8_KDDI(76, "UTF8_KDDI"), /** * UTF8-MAC. * * @since 22.1 */ UTF8_MAC(77, "UTF8_MAC"), /** * UTF8-SoftBank. * * @since 22.1 */ UTF8_SoftBank(78, "UTF8_SoftBank"), /** * Windows-1250. * * @since 22.1 */ Windows_1250(79, "Windows_1250"), /** * Windows-1251. * * @since 22.1 */ Windows_1251(80, "Windows_1251"), /** * Windows-1252. * * @since 22.1 */ Windows_1252(81, "Windows_1252"), /** * Windows-1253. * * @since 22.1 */ Windows_1253(82, "Windows_1253"), /** * Windows-1254. * * @since 22.1 */ Windows_1254(83, "Windows_1254"), /** * Windows-1255. * * @since 22.1 */ Windows_1255(84, "Windows_1255"), /** * Windows-1256. * * @since 22.1 */ Windows_1256(85, "Windows_1256"), /** * Windows-1257. * * @since 22.1 */ Windows_1257(86, "Windows_1257"), /** * Windows-1258. * * @since 22.1 */ Windows_1258(87, "Windows_1258"), /** * Windows-31J. * * @since 22.1 */ Windows_31J(88, "Windows_31J"), /** * Windows-874. * * @since 22.1 */ Windows_874(89, "Windows_874"), /* non-ascii-compatible encodings */ /** * CP50220. * * @since 22.1 */ CP50220(90, "CP50220"), /** * CP50221. * * @since 22.1 */ CP50221(91, "CP50221"), /** * IBM037. * * @since 22.1 */ IBM037(92, "IBM037"), /** * ISO-2022-JP. * * @since 22.1 */ ISO_2022_JP(93, "ISO_2022_JP"), /** * ISO-2022-JP-2. * * @since 22.1 */ ISO_2022_JP_2(94, "ISO_2022_JP_2"), /** * ISO-2022-JP-KDDI. * * @since 22.1 */ ISO_2022_JP_KDDI(95, "ISO_2022_JP_KDDI"), /** * UTF-7. * * @since 22.1 */ UTF_7(96, "UTF_7"); /** * UTF-32 in the current system's endianness, without byte-order mark, with * transparent string compaction. * * @since 22.1 */ public static final Encoding UTF_32 = littleEndian() ? UTF_32LE : UTF_32BE; /** * UTF-16 in the current system's endianness, without byte-order mark, with * transparent string compaction. * * @since 22.1 */ public static final Encoding UTF_16 = littleEndian() ? UTF_16LE : UTF_16BE; final byte id; final String name; final JCodings.Encoding jCoding; final byte maxCompatibleCodeRange; final byte naturalStride; Encoding(int id, String name) { this(id, name, 0); } Encoding(int id, String name, int naturalStride) { assert id <= 0x7f; assert Stride.isStride(naturalStride); this.id = (byte) id; this.name = name; this.jCoding = JCodings.ENABLED ? JCodings.getInstance().get(name) : null; if (is16BitCompatible()) { maxCompatibleCodeRange = (byte) (TSCodeRange.get16Bit() + 1); } else if (is8BitCompatible()) { maxCompatibleCodeRange = (byte) (TSCodeRange.get8Bit() + 1); } else if (is7BitCompatible()) { maxCompatibleCodeRange = (byte) (TSCodeRange.get7Bit() + 1); } else { maxCompatibleCodeRange = 0; } this.naturalStride = (byte) naturalStride; } @CompilationFinal(dimensions = 1) private static final Encoding[] ENCODINGS_TABLE = new Encoding[Encoding.values().length]; @CompilationFinal(dimensions = 1) private static final JCodings.Encoding[] J_CODINGS_TABLE = new JCodings.Encoding[Encoding.values().length]; @CompilationFinal(dimensions = 1) private static final byte[] MAX_COMPATIBLE_CODE_RANGE = new byte[Encoding.values().length]; @CompilationFinal(dimensions = 1) private static final TruffleString[] EMPTY_STRINGS = new TruffleString[Encoding.values().length]; private static final EconomicMap J_CODINGS_NAME_MAP = EconomicMap.create(Encoding.values().length); static { for (Encoding e : Encoding.values()) { assert ENCODINGS_TABLE[e.id] == null; ENCODINGS_TABLE[e.id] = e; assert J_CODINGS_TABLE[e.id] == null; J_CODINGS_TABLE[e.id] = e.jCoding; MAX_COMPATIBLE_CODE_RANGE[e.id] = e.maxCompatibleCodeRange; if (JCodings.ENABLED) { J_CODINGS_NAME_MAP.put(JCodings.getInstance().name(e.jCoding), e); } } assert UTF_16.naturalStride == 1; assert UTF_32.naturalStride == 2; EMPTY_STRINGS[US_ASCII.id] = createConstant(new byte[0], 0, 0, US_ASCII, 0, TSCodeRange.get7Bit()); for (Encoding e : Encoding.values()) { if (e != US_ASCII) { assert EMPTY_STRINGS[e.id] == null; if (e.isSupported() || JCodings.ENABLED) { EMPTY_STRINGS[e.id] = createEmpty(e); } } } } private static TruffleString createEmpty(Encoding encoding) { if (encoding.is7BitCompatible() && !AbstractTruffleString.DEBUG_STRICT_ENCODING_CHECKS || encoding == Encoding.US_ASCII) { return EMPTY_STRINGS[US_ASCII.id]; } TruffleString ret = createConstant(new byte[0], 0, 0, encoding, 0, TSCodeRange.getAsciiCodeRange(encoding), false); EMPTY_STRINGS[US_ASCII.id].cacheInsert(ret); return ret; } /** * Get an empty {@link TruffleString} with this encoding. * * @since 22.1 */ public TruffleString getEmpty() { return EMPTY_STRINGS[id]; } /** * Get the {@link Encoding} corresponding to the given encoding name from the * {@code JCodings} library. * * @since 22.1 */ public static Encoding fromJCodingName(String name) { Encoding encoding = J_CODINGS_NAME_MAP.get(name, null); if (encoding == null) { throw InternalErrors.unknownEncoding(name); } return encoding; } static Encoding get(int encoding) { return ENCODINGS_TABLE[encoding]; } static JCodings.Encoding getJCoding(int encoding) { assert J_CODINGS_TABLE[encoding] == get(encoding).jCoding; return J_CODINGS_TABLE[encoding]; } static int getMaxCompatibleCodeRange(int encoding) { return MAX_COMPATIBLE_CODE_RANGE[encoding]; } boolean is7BitCompatible() { return is7BitCompatible(id); } boolean is8BitCompatible() { return is8BitCompatible(id); } boolean is16BitCompatible() { return is16BitCompatible(id); } boolean isSupported() { return isSupported(id); } boolean isUnsupported() { return isUnsupported(id); } static boolean is7BitCompatible(int encoding) { return encoding < 90; } static boolean is8BitCompatible(int encoding) { return encoding < 3; } static boolean is16BitCompatible(int encoding) { return encoding < 2; } static boolean isSupported(int encoding) { return encoding < 6; } static boolean isUnsupported(int encoding) { return encoding >= 6; } static boolean isFixedWidth(int encoding) { return JCodings.getInstance().isFixedWidth(getJCoding(encoding)); } static boolean isFixedWidth(Encoding encoding) { return JCodings.getInstance().isFixedWidth(encoding.jCoding); } } /** * Provides information about a string's content. All values of this enum describe a set of * codepoints potentially contained by a string reporting said value. * * @since 22.1 */ public enum CodeRange { /** * All codepoints in this string are part of the Basic Latin Unicode block, also known as * ASCII (0x00 - 0x7f). * * @since 22.1 */ ASCII, /** * All codepoints in this string are part of the ISO-8859-1 character set (0x00 - 0xff), * which is equivalent to the union of the Basic Latin and the Latin-1 Supplement Unicode * block. At least one codepoint is outside the ASCII range (greater than 0x7f). Applicable * to {@link Encoding#ISO_8859_1}, {@link Encoding#UTF_16} and {@link Encoding#UTF_32} only. * * @since 22.1 */ LATIN_1, /** * All codepoints in this string are part of the Unicode Basic Multilingual Plane (BMP) ( * 0x0000 - 0xffff). At least one codepoint is outside the LATIN_1 range (greater than * 0xff). Applicable to {@link Encoding#UTF_16} and {@link Encoding#UTF_32} only. * * @since 22.1 */ BMP, /** * This string is encoded correctly ({@link IsValidNode} returns {@code true}), and at least * one codepoint is outside the largest other applicable code range (e.g. greater than 0x7f * on {@link Encoding#UTF_8}, greater than 0xffff on {@link Encoding#UTF_16}). * * @since 22.1 */ VALID, /** * This string is not encoded correctly ({@link IsValidNode} returns {@code false}), and * contains at least one invalid codepoint. * * @since 22.1 */ BROKEN; /** * Returns {@code true} if this set of potential codepoints is equal to or contained by * {@code other}. * * @since 22.1 */ public boolean isSubsetOf(CodeRange other) { return ordinal() <= other.ordinal(); } /** * Returns {@code true} if this set of potential codepoints is equal to or contains * {@code other}. * * @since 22.1 */ public boolean isSupersetOf(CodeRange other) { return ordinal() >= other.ordinal(); } @CompilationFinal(dimensions = 1) private static final CodeRange[] CODE_RANGES = { CodeRange.ASCII, CodeRange.LATIN_1, CodeRange.BMP, CodeRange.VALID, CodeRange.BROKEN, CodeRange.VALID, CodeRange.BROKEN}; @CompilationFinal(dimensions = 1) private static final CodeRange[] BYTE_CODE_RANGES = { CodeRange.ASCII, CodeRange.VALID, CodeRange.VALID, CodeRange.VALID, CodeRange.BROKEN, CodeRange.VALID, CodeRange.BROKEN}; static CodeRange get(int codeRange) { return CODE_RANGES[codeRange]; } static CodeRange getByteCodeRange(int codeRange, Encoding encoding) { return codeRange == TSCodeRange.get7Bit() && isUTF16Or32(encoding) ? CodeRange.VALID : BYTE_CODE_RANGES[codeRange]; } static boolean equals(int codeRange, CodeRange codeRangeEnum) { return codeRange == codeRangeEnum.ordinal() || codeRangeEnum == VALID && isValidMultiByte(codeRange) || codeRangeEnum == BROKEN && isBrokenMultiByte(codeRange); } static { assert get(TSCodeRange.get7Bit()) == CodeRange.ASCII; assert get(TSCodeRange.get8Bit()) == CodeRange.LATIN_1; assert get(TSCodeRange.get16Bit()) == CodeRange.BMP; assert get(TSCodeRange.getValidFixedWidth()) == CodeRange.VALID; assert get(TSCodeRange.getBrokenFixedWidth()) == CodeRange.BROKEN; assert get(TSCodeRange.getValidMultiByte()) == CodeRange.VALID; assert get(TSCodeRange.getBrokenMultiByte()) == CodeRange.BROKEN; assert equals(TSCodeRange.get7Bit(), CodeRange.ASCII); assert equals(TSCodeRange.get8Bit(), CodeRange.LATIN_1); assert equals(TSCodeRange.get16Bit(), CodeRange.BMP); assert equals(TSCodeRange.getValidFixedWidth(), CodeRange.VALID); assert equals(TSCodeRange.getBrokenFixedWidth(), CodeRange.BROKEN); assert equals(TSCodeRange.getValidMultiByte(), CodeRange.VALID); assert equals(TSCodeRange.getBrokenMultiByte(), CodeRange.BROKEN); assert TSCodeRange.getUnknown() == CODE_RANGES.length; } } /** * Extended parameter type for the operations {@link ByteIndexOfStringNode}, * {@link LastByteIndexOfStringNode} and {@link RegionEqualByteIndexNode}. These operations can * optionally perform a logical OR operation when matching their string parameters against each * other, in the following way: *

* Given a parameter {@link TruffleString} {@code a} and {@link WithMask} {@code b}, region * equality will be checked as shown in this exemplary method: * *

     * {@code
     * boolean regionEquals(TruffleString a, int fromIndexA, TruffleString.WithMask b, int fromIndexB) {
     *     for (int i = 0; i < length; i++) {
     *         if ((readRaw(a, fromIndexA + i) | readRaw(b.mask, i)) != readRaw(b.string, fromIndexB + i)) {
     *             return false;
     *         }
     *     }
     *     return true;
     * }
     * }
     * 
* * @see ByteIndexOfStringNode * @see LastByteIndexOfStringNode * @see RegionEqualByteIndexNode * @since 22.1 */ public static final class WithMask { final AbstractTruffleString string; @CompilationFinal(dimensions = 1) final byte[] mask; WithMask(AbstractTruffleString string, byte[] mask) { this.string = string; this.mask = mask; } /** * Node to create a new {@link WithMask} from a string and a byte array. See * {@code #execute(AbstractTruffleString, byte[], Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CreateNode extends Node { CreateNode() { } /** * Creates a new {@link WithMask} from {@code a} and {@code mask}. {@code mask.length} * must be equal to the string's length in bytes. Cannot be used for UTF-16 or UTF-32 * strings. * * @since 22.1 */ public abstract WithMask execute(AbstractTruffleString a, byte[] mask, Encoding expectedEncoding); @Specialization WithMask doCreate(AbstractTruffleString a, byte[] mask, Encoding expectedEncoding) { if (expectedEncoding == Encoding.UTF_16 || expectedEncoding == Encoding.UTF_32) { throw InternalErrors.illegalArgument("use a CreateUTF16Node for UTF-16, and CreateUTF32Node for UTF-32"); } a.checkEncoding(expectedEncoding); checkMaskLength(a, mask.length); assert isStride0(a); return new WithMask(a, Arrays.copyOf(mask, mask.length)); } /** * Create a new {@link TruffleString.WithMask.CreateNode}. * * @since 22.1 */ public static TruffleString.WithMask.CreateNode create() { return TruffleStringFactory.WithMaskFactory.CreateNodeGen.create(); } /** * Get the uncached version of {@link TruffleString.WithMask.CreateNode}. * * @since 22.1 */ public static TruffleString.WithMask.CreateNode getUncached() { return TruffleStringFactory.WithMaskFactory.CreateNodeGen.getUncached(); } } /** * Shorthand for calling the uncached version of {@link CreateNode}. * * @since 22.1 */ public static WithMask createUncached(AbstractTruffleString a, byte[] mask, Encoding expectedEncoding) { return CreateNode.getUncached().execute(a, mask, expectedEncoding); } /** * Node to create a new {@link WithMask} from a UTF-16 string and a char array. See * {@code #execute(AbstractTruffleString, char[])} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CreateUTF16Node extends Node { CreateUTF16Node() { } /** * Creates a new {@link WithMask} from {@code a} and {@code mask}. {@code mask.length} * must be equal to the string's length in {@code char}s. * * @since 22.1 */ public abstract WithMask execute(AbstractTruffleString a, char[] mask); @Specialization WithMask doCreate(AbstractTruffleString a, char[] mask) { a.checkEncoding(Encoding.UTF_16); checkMaskLength(a, mask.length); byte[] maskBytes = new byte[a.length() << a.stride()]; if (a.stride() == 0) { TStringOps.arraycopyWithStrideCB(this, mask, 0, maskBytes, 0, 0, mask.length); } else { TStringOps.arraycopyWithStrideCB(this, mask, 0, maskBytes, 0, 1, mask.length); } return new WithMask(a, maskBytes); } /** * Create a new {@link TruffleString.WithMask.CreateNode}. * * @since 22.1 */ public static TruffleString.WithMask.CreateUTF16Node create() { return TruffleStringFactory.WithMaskFactory.CreateUTF16NodeGen.create(); } /** * Get the uncached version of {@link TruffleString.WithMask.CreateNode}. * * @since 22.1 */ public static TruffleString.WithMask.CreateUTF16Node getUncached() { return TruffleStringFactory.WithMaskFactory.CreateUTF16NodeGen.getUncached(); } } /** * Shorthand for calling the uncached version of {@link CreateUTF16Node}. * * @since 22.1 */ public static WithMask createUTF16Uncached(AbstractTruffleString a, char[] mask) { return CreateUTF16Node.getUncached().execute(a, mask); } /** * Node to create a new {@link WithMask} from a UTF-32 string and an int array. See * {@code #execute(AbstractTruffleString, int[])} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CreateUTF32Node extends Node { CreateUTF32Node() { } /** * Creates a new {@link WithMask} from {@code a} and {@code mask}. {@code mask.length} * must be equal to the string's length in {@code int}s. * * @since 22.1 */ public abstract WithMask execute(AbstractTruffleString a, int[] mask); @Specialization WithMask doCreate(AbstractTruffleString a, int[] mask) { a.checkEncoding(Encoding.UTF_32); checkMaskLength(a, mask.length); byte[] maskBytes = new byte[a.length() << a.stride()]; if (a.stride() == 0) { TStringOps.arraycopyWithStrideIB(this, mask, 0, maskBytes, 0, 0, mask.length); } else if (a.stride() == 1) { TStringOps.arraycopyWithStrideIB(this, mask, 0, maskBytes, 0, 1, mask.length); } else { TStringOps.arraycopyWithStrideIB(this, mask, 0, maskBytes, 0, 2, mask.length); } return new WithMask(a, maskBytes); } /** * Create a new {@link TruffleString.WithMask.CreateNode}. * * @since 22.1 */ public static TruffleString.WithMask.CreateUTF32Node create() { return TruffleStringFactory.WithMaskFactory.CreateUTF32NodeGen.create(); } /** * Get the uncached version of {@link TruffleString.WithMask.CreateNode}. * * @since 22.1 */ public static TruffleString.WithMask.CreateUTF32Node getUncached() { return TruffleStringFactory.WithMaskFactory.CreateUTF32NodeGen.getUncached(); } } /** * Shorthand for calling the uncached version of {@link CreateUTF32Node}. * * @since 22.1 */ public static WithMask createUTF32Uncached(AbstractTruffleString a, int[] mask) { return CreateUTF32Node.getUncached().execute(a, mask); } private static void checkMaskLength(AbstractTruffleString string, int length) { if (length != string.length()) { throw InternalErrors.illegalArgument("mask length does not match string length!"); } } } /** * Error handling instructions for operations that return integer values, such as indices or * code points. * * @since 22.3 */ public enum ErrorHandling { /** * This mode generally means that the operation will try to determine the "most reasonable" * or "most useful" return value in respect to the expected encoding and the error that * occurred. * * For example: best-effort error handling will cause {@link CodePointAtByteIndexNode} to * return the value of the integer read when reading an invalid codepoint from a * {@link Encoding#UTF_32} string. * * @since 22.3 */ BEST_EFFORT, /** * This mode will cause a negative value to be returned in all error cases. * * For example: return-negative error handling will cause {@link CodePointAtByteIndexNode} * to return a negative value when reading an invalid codepoint from a * {@link Encoding#UTF_32} string. * * @since 22.3 */ RETURN_NEGATIVE } /** * Node to create a new {@link TruffleString} from a single codepoint. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class FromCodePointNode extends Node { FromCodePointNode() { } /** * Creates a new TruffleString from a given code point. * * @since 22.1 */ public final TruffleString execute(int codepoint, Encoding encoding) { return execute(codepoint, encoding, encoding == Encoding.UTF_16); } /** * Creates a new TruffleString from a given code point. * * If {@code allowUTF16Surrogates} is {@code true}, {@link Character#isSurrogate(char) * UTF-16 surrogate values} passed as {@code codepoint} will not result in a {@code null} * return value, but instead be encoded on a best-effort basis. This option is only * supported on {@link TruffleString.Encoding#UTF_16} and * {@link TruffleString.Encoding#UTF_32}. * * @return a new {@link TruffleString}, or {@code null} if the given codepoint is not * defined in the given encoding. * * @since 22.2 */ public abstract TruffleString execute(int codepoint, Encoding encoding, boolean allowUTF16Surrogates); @Specialization static TruffleString fromCodePoint(int c, Encoding enc, boolean allowUTF16Surrogates, @Cached ConditionProfile bytesProfile, @Cached ConditionProfile utf8Profile, @Cached ConditionProfile utf16Profile, @Cached ConditionProfile utf32Profile, @Cached ConditionProfile exoticProfile, @Cached ConditionProfile bmpProfile, @Cached BranchProfile invalidCodePoint) { assert !allowUTF16Surrogates || isUTF16Or32(enc) : "allowUTF16Surrogates is only supported on UTF-16 and UTF-32"; CompilerAsserts.partialEvaluationConstant(allowUTF16Surrogates); if (is7BitCompatible(enc) && Integer.compareUnsigned(c, 0x7f) <= 0) { return TStringConstants.getSingleByteAscii(enc, c); } if (is8BitCompatible(enc) && Integer.compareUnsigned(c, 0xff) <= 0) { assert isSupportedEncoding(enc); return TStringConstants.getSingleByte(enc, c); } if (bytesProfile.profile(isBytes(enc))) { if (Integer.compareUnsigned(c, 0xff) > 0) { invalidCodePoint.enter(); return null; } return TStringConstants.getSingleByte(Encoding.BYTES, c); } final byte[] bytes; final int length; final int stride; final int codeRange; if (utf8Profile.profile(isUTF8(enc))) { if (!Encodings.isValidUnicodeCodepoint(c)) { invalidCodePoint.enter(); return null; } assert c > 0x7f; bytes = Encodings.utf8Encode(c); length = bytes.length; stride = 0; codeRange = TSCodeRange.getValidMultiByte(); } else if (utf16Profile.profile(isUTF16(enc))) { if (Integer.toUnsignedLong(c) > 0x10ffff) { invalidCodePoint.enter(); return null; } assert c > 0xff; bytes = new byte[c <= 0xffff ? 2 : 4]; stride = 1; if (bmpProfile.profile(c <= 0xffff)) { length = 1; if (Encodings.isUTF16Surrogate(c)) { if (allowUTF16Surrogates) { codeRange = TSCodeRange.getBrokenMultiByte(); } else { invalidCodePoint.enter(); return null; } } else { codeRange = TSCodeRange.get16Bit(); } TStringOps.writeToByteArray(bytes, 1, 0, c); } else { length = 2; codeRange = TSCodeRange.getValidMultiByte(); Encodings.utf16EncodeSurrogatePair(c, bytes, 0); } } else if (utf32Profile.profile(isUTF32(enc))) { if (Integer.toUnsignedLong(c) > 0x10ffff) { invalidCodePoint.enter(); return null; } assert c > 0xff; if (c <= 0xffff) { if (Encodings.isUTF16Surrogate(c)) { if (allowUTF16Surrogates) { codeRange = TSCodeRange.getBrokenFixedWidth(); } else { invalidCodePoint.enter(); return null; } } else { codeRange = TSCodeRange.get16Bit(); } } else { codeRange = TSCodeRange.getValidFixedWidth(); } final boolean compact1 = TSCodeRange.is16Bit(codeRange); bytes = new byte[compact1 ? 2 : 4]; length = 1; if (bmpProfile.profile(compact1)) { stride = 1; TStringOps.writeToByteArray(bytes, 1, 0, c); } else { stride = 2; TStringOps.writeToByteArray(bytes, 2, 0, c); } } else if (exoticProfile.profile(!isSupportedEncoding(enc))) { assert !isBytes(enc); JCodings.Encoding jCodingsEnc = JCodings.getInstance().get(enc); length = JCodings.getInstance().getCodePointLength(jCodingsEnc, c); stride = 0; codeRange = JCodings.getInstance().isSingleByte(jCodingsEnc) ? TSCodeRange.getValidFixedWidth() : TSCodeRange.getValidMultiByte(); if (length < 1) { invalidCodePoint.enter(); return null; } bytes = new byte[length]; int ret = JCodings.getInstance().writeCodePoint(jCodingsEnc, c, bytes, 0); if (ret != length || JCodings.getInstance().getCodePointLength(jCodingsEnc, bytes, 0, length) != ret || JCodings.getInstance().readCodePoint(jCodingsEnc, bytes, 0, length) != c) { invalidCodePoint.enter(); return null; } } else { assert isAscii(enc) && Integer.compareUnsigned(c, 0x7f) > 0 || (isLatin1(enc) && Integer.compareUnsigned(c, 0xff) > 0); invalidCodePoint.enter(); return null; } return TruffleString.createFromByteArray(bytes, length, stride, enc, 1, codeRange); } /** * Create a new {@link FromCodePointNode}. * * @since 22.1 */ public static FromCodePointNode create() { return TruffleStringFactory.FromCodePointNodeGen.create(); } /** * Get the uncached version of {@link FromCodePointNode}. * * @since 22.1 */ public static FromCodePointNode getUncached() { return TruffleStringFactory.FromCodePointNodeGen.getUncached(); } } /** * Shorthand for calling the uncached version of {@link FromCodePointNode}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromCodePointUncached(int codepoint, Encoding encoding) { return FromCodePointNode.getUncached().execute(codepoint, encoding); } /** * Shorthand for calling the uncached version of {@link FromCodePointNode}. * * @since 22.2 */ @TruffleBoundary public static TruffleString fromCodePointUncached(int codepoint, Encoding encoding, boolean allowUTF16Surrogates) { return FromCodePointNode.getUncached().execute(codepoint, encoding, allowUTF16Surrogates); } /** * Node to create a new {@link TruffleString} from a {@code long} value. See * {@link #execute(long, TruffleString.Encoding, boolean)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class FromLongNode extends Node { FromLongNode() { } /** * Creates a 10's complement string from the given long value, using ASCII digits (0x30 - * 0x39). This operation does not support encodings that are incompatible with the ASCII * character set. * * @param lazy if true, the string representation of the number is computed lazily the first * time it is needed. This parameter is expected to be * {@link CompilerAsserts#partialEvaluationConstant(boolean) partial evaluation * constant}. * * @since 22.1 */ public abstract TruffleString execute(long value, Encoding encoding, boolean lazy); @Specialization(guards = {"is7BitCompatible(enc)", "lazy"}) static TruffleString doLazy(long value, Encoding enc, @SuppressWarnings("unused") boolean lazy) { CompilerAsserts.partialEvaluationConstant(lazy); return TruffleString.createLazyLong(value, enc); } @Specialization(guards = {"is7BitCompatible(enc)", "!lazy"}) static TruffleString doEager(long value, Encoding enc, @SuppressWarnings("unused") boolean lazy) { CompilerAsserts.partialEvaluationConstant(lazy); int length = NumberConversion.stringLengthLong(value); return TruffleString.createFromByteArray(NumberConversion.longToString(value, length), length, 0, enc, length, TSCodeRange.get7Bit()); } @Specialization(guards = "!is7BitCompatible(enc)") static TruffleString unsupported(@SuppressWarnings("unused") long value, Encoding enc, @SuppressWarnings("unused") boolean lazy) { CompilerAsserts.partialEvaluationConstant(lazy); throw InternalErrors.unsupportedOperation(nonAsciiCompatibleMessage(enc)); } @TruffleBoundary private static String nonAsciiCompatibleMessage(Encoding enc) { return "Encoding " + enc + " is not ASCII-compatible"; } /** * Create a new {@link FromLongNode}. * * @since 22.1 */ public static FromLongNode create() { return TruffleStringFactory.FromLongNodeGen.create(); } /** * Get the uncached version of {@link FromLongNode}. * * @since 22.1 */ public static FromLongNode getUncached() { return TruffleStringFactory.FromLongNodeGen.getUncached(); } } /** * Shorthand for calling the uncached version of {@link FromLongNode}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromLongUncached(long value, Encoding encoding, boolean lazy) { return FromLongNode.getUncached().execute(value, encoding, lazy); } /** * Node to create a new {@link TruffleString} from a byte array. See * {@link #execute(byte[], int, int, TruffleString.Encoding, boolean)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class FromByteArrayNode extends Node { FromByteArrayNode() { } /** * Creates a new {@link TruffleString} from a byte array. See * {@link #execute(byte[], int, int, TruffleString.Encoding, boolean)} for details. * * @since 22.1 */ public final TruffleString execute(byte[] value, Encoding encoding) { return execute(value, encoding, true); } /** * Creates a new {@link TruffleString} from a byte array. See * {@link #execute(byte[], int, int, TruffleString.Encoding, boolean)} for details. * * @since 22.1 */ public final TruffleString execute(byte[] value, Encoding encoding, boolean copy) { return execute(value, 0, value.length, encoding, copy); } /** * Creates a new {@link TruffleString} from a byte array. The array content is assumed to be * encoded in the given encoding already. This operation allows non-copying string creation, * i.e. the array parameter can be used directly by passing {@code copy = false}. Caution: * {@link TruffleString} assumes the array to be immutable, do not modify the byte array * after passing it to the non-copying variant of this operation! * * @since 22.1 */ public abstract TruffleString execute(byte[] value, int byteOffset, int byteLength, Encoding encoding, boolean copy); @Specialization static TruffleString fromByteArray(byte[] value, int byteOffset, int byteLength, Encoding enc, boolean copy, @Cached TStringInternalNodes.FromBufferWithStringCompactionNode fromBufferWithStringCompactionNode) { checkArrayRange(value, byteOffset, byteLength); return fromBufferWithStringCompactionNode.execute(value, byteOffset, byteLength, enc, copy, true); } /** * Create a new {@link FromByteArrayNode}. * * @since 22.1 */ public static FromByteArrayNode create() { return TruffleStringFactory.FromByteArrayNodeGen.create(); } /** * Get the uncached version of {@link FromByteArrayNode}. * * @since 22.1 */ public static FromByteArrayNode getUncached() { return TruffleStringFactory.FromByteArrayNodeGen.getUncached(); } } /** * Shorthand for calling the uncached version of {@link FromByteArrayNode}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromByteArrayUncached(byte[] value, Encoding encoding) { return FromByteArrayNode.getUncached().execute(value, encoding); } /** * Shorthand for calling the uncached version of {@link FromByteArrayNode}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromByteArrayUncached(byte[] value, Encoding encoding, boolean copy) { return FromByteArrayNode.getUncached().execute(value, encoding, copy); } /** * Shorthand for calling the uncached version of {@link FromByteArrayNode}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromByteArrayUncached(byte[] value, int byteOffset, int byteLength, Encoding encoding, boolean copy) { return FromByteArrayNode.getUncached().execute(value, byteOffset, byteLength, encoding, copy); } /** * Node to create a new UTF-16 {@link TruffleString} from a char array. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class FromCharArrayUTF16Node extends Node { FromCharArrayUTF16Node() { } /** * Creates a UTF-16 {@link TruffleString} from a char array. * * @since 22.1 */ public final TruffleString execute(char[] value) { return execute(value, 0, value.length); } /** * Creates a UTF-16 {@link TruffleString} from a char-array. * * @since 22.1 */ public abstract TruffleString execute(char[] value, int charOffset, int charLength); @Specialization TruffleString doNonEmpty(char[] value, int charOffset, int charLength, @Cached ConditionProfile utf16CompactProfile, @Cached BranchProfile outOfMemoryProfile) { checkArrayRange(value.length, charOffset, charLength); if (charLength == 0) { return Encoding.UTF_16.getEmpty(); } if (charLength == 1 && value[charOffset] <= 0xff) { return TStringConstants.getSingleByte(Encoding.UTF_16, value[charOffset]); } int offsetV = charOffset << 1; if (value.length > TStringConstants.MAX_ARRAY_SIZE_S1 || offsetV < 0) { outOfMemoryProfile.enter(); throw InternalErrors.outOfMemory(); } long attrs = TStringOps.calcStringAttributesUTF16C(this, value, offsetV, charLength); final int codePointLength = StringAttributes.getCodePointLength(attrs); final int codeRange = StringAttributes.getCodeRange(attrs); final int stride = Stride.fromCodeRangeUTF16(codeRange); final byte[] array = new byte[charLength << stride]; if (utf16CompactProfile.profile(stride == 0)) { TStringOps.arraycopyWithStrideCB(this, value, offsetV, array, 0, 0, charLength); } else { TStringOps.arraycopyWithStrideCB(this, value, offsetV, array, 0, 1, charLength); } return TruffleString.createFromArray(array, 0, charLength, stride, Encoding.UTF_16, codePointLength, codeRange); } /** * Create a new {@link FromCharArrayUTF16Node}. * * @since 22.1 */ public static FromCharArrayUTF16Node create() { return TruffleStringFactory.FromCharArrayUTF16NodeGen.create(); } /** * Get the uncached version of {@link FromCharArrayUTF16Node}. * * @since 22.1 */ public static FromCharArrayUTF16Node getUncached() { return TruffleStringFactory.FromCharArrayUTF16NodeGen.getUncached(); } } /** * Shorthand for calling the uncached version of {@link FromCharArrayUTF16Node}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromCharArrayUTF16Uncached(char[] value) { return FromCharArrayUTF16Node.getUncached().execute(value); } /** * Shorthand for calling the uncached version of {@link FromCharArrayUTF16Node}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromCharArrayUTF16Uncached(char[] value, int charOffset, int charLength) { return FromCharArrayUTF16Node.getUncached().execute(value, charOffset, charLength); } /** * Node to create a new {@link TruffleString} from a Java string. See * {@link #execute(String, int, int, TruffleString.Encoding, boolean)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class FromJavaStringNode extends Node { FromJavaStringNode() { } /** * Creates a {@link TruffleString} from a Java string, re-using its internal byte array if * possible. * * @since 22.1 */ public final TruffleString execute(String value, Encoding encoding) { return execute(value, 0, value.length(), encoding, false); } /** * Creates a {@link TruffleString} from a given region in a Java string, re-using its * internal byte array if possible and the region covers the entire string. If {@code copy} * is {@code false}, the Java string's internal byte array will be re-used even if the * region does not cover the entire string. Note that this will keep the Java string's byte * array alive as long as the resulting {@link TruffleString} is alive. * * @since 22.1 */ public abstract TruffleString execute(String value, int charOffset, int length, Encoding encoding, boolean copy); @Specialization static TruffleString doUTF16(String javaString, int charOffset, int length, Encoding encoding, final boolean copy, @Cached TStringInternalNodes.FromJavaStringUTF16Node fromJavaStringUTF16Node, @Cached SwitchEncodingNode switchEncodingNode, @Cached ConditionProfile utf16Profile) { if (javaString.isEmpty()) { return encoding.getEmpty(); } TruffleString utf16String = fromJavaStringUTF16Node.execute(javaString, charOffset, length, copy); if (utf16Profile.profile(encoding == Encoding.UTF_16)) { return utf16String; } return switchEncodingNode.execute(utf16String, encoding); } /** * Create a new {@link FromJavaStringNode}. * * @since 22.1 */ public static FromJavaStringNode create() { return TruffleStringFactory.FromJavaStringNodeGen.create(); } /** * Get the uncached version of {@link FromJavaStringNode}. * * @since 22.1 */ public static FromJavaStringNode getUncached() { return TruffleStringFactory.FromJavaStringNodeGen.getUncached(); } } /** * Shorthand for calling the uncached version of {@link FromJavaStringNode}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromJavaStringUncached(String s, Encoding encoding) { return FromJavaStringNode.getUncached().execute(s, encoding); } /** * Shorthand for calling the uncached version of {@link FromJavaStringNode}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromJavaStringUncached(String s, int charOffset, int length, Encoding encoding, boolean copy) { return FromJavaStringNode.getUncached().execute(s, charOffset, length, encoding, copy); } /** * Node to create a new UTF-32 {@link TruffleString} from an int-array. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class FromIntArrayUTF32Node extends Node { FromIntArrayUTF32Node() { } /** * Creates a UTF-32 {@link TruffleString} from an int-array. * * @since 22.1 */ public final TruffleString execute(int[] value) { return execute(value, 0, value.length); } /** * Creates a UTF-32 {@link TruffleString} from an int-array. * * @since 22.1 */ public abstract TruffleString execute(int[] value, int intOffset, int intLength); @Specialization TruffleString doNonEmpty(int[] value, int intOffset, int length, @Cached ConditionProfile utf32Compact0Profile, @Cached ConditionProfile utf32Compact1Profile, @Cached BranchProfile outOfMemoryProfile) { checkArrayRange(value.length, intOffset, length); if (length == 0) { return Encoding.UTF_32.getEmpty(); } if (length == 1 && value[intOffset] <= 0xff) { return TStringConstants.getSingleByte(Encoding.UTF_32, value[intOffset]); } int offsetV = intOffset << 2; if (length > TStringConstants.MAX_ARRAY_SIZE_S2 || offsetV < 0) { outOfMemoryProfile.enter(); throw InternalErrors.outOfMemory(); } final int codeRange = TStringOps.calcStringAttributesUTF32I(this, value, offsetV, length); final int stride = Stride.fromCodeRangeUTF32(codeRange); final byte[] array = new byte[length << stride]; if (utf32Compact0Profile.profile(stride == 0)) { TStringOps.arraycopyWithStrideIB(this, value, offsetV, array, 0, 0, length); } else if (utf32Compact1Profile.profile(stride == 1)) { TStringOps.arraycopyWithStrideIB(this, value, offsetV, array, 0, 1, length); } else { TStringOps.arraycopyWithStrideIB(this, value, offsetV, array, 0, 2, length); } return TruffleString.createFromArray(array, 0, length, stride, Encoding.UTF_32, length, codeRange); } /** * Create a new {@link FromIntArrayUTF32Node}. * * @since 22.1 */ public static FromIntArrayUTF32Node create() { return TruffleStringFactory.FromIntArrayUTF32NodeGen.create(); } /** * Get the uncached version of {@link FromIntArrayUTF32Node}. * * @since 22.1 */ public static FromIntArrayUTF32Node getUncached() { return TruffleStringFactory.FromIntArrayUTF32NodeGen.getUncached(); } } /** * Shorthand for calling the uncached version of {@link FromIntArrayUTF32Node}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromIntArrayUTF32Uncached(int[] value) { return FromIntArrayUTF32Node.getUncached().execute(value); } /** * Shorthand for calling the uncached version of {@link FromIntArrayUTF32Node}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromIntArrayUTF32Uncached(int[] value, int intOffset, int intLength) { return FromIntArrayUTF32Node.getUncached().execute(value, intOffset, intLength); } /** * Node to create a new {@link TruffleString} from an interop object representing a native * pointer. See {@link #execute(Object, int, int, TruffleString.Encoding, boolean)} for details. * * @since 22.1 */ @ImportStatic({TStringGuards.class, TStringAccessor.class}) @GeneratePackagePrivate @GenerateUncached public abstract static class FromNativePointerNode extends Node { FromNativePointerNode() { } /** * Create a new {@link TruffleString} from an interop object representing a native pointer ( * {@code isPointer(pointerObject)} must return {@code true}). The pointer is immediately * unboxed with ({@code asPointer(pointerObject)}) and saved in the {@link TruffleString} * instance, i.e. {@link TruffleString} assumes that the pointer address does not change. * The pointer's content is assumed to be encoded in the given encoding already. If * {@code copy} is {@code false}, the native pointer is used directly as the new string's * backing storage. Caution: {@link TruffleString} assumes the pointer's content to be * immutable, do not modify the pointer's content after passing it to this operation! * *

* WARNING: {@link TruffleString} cannot reason about the lifetime of the native * pointer, so it is up to the user to make sure that the native pointer is valid to * access and not freed as long the {@code pointerObject} is alive (if {@code copy} is * {@code false}). To help with this the TruffleString keeps a reference to the given * {@code pointerObject}, so the {@code pointerObject} is kept alive at least as long as the * TruffleString is used. In order to be able to use the string past the native pointer's * life time, convert it to a managed string via {@link AsManagedNode} before the native * pointer is freed. *

*

* If {@code copy} is {@code true}, the pointer's contents are copied to a Java byte array, * and the pointer can be freed safely after the operation completes. *

* This operation requires native access permissions * ({@code TruffleLanguage.Env#isNativeAccessAllowed()}). * * @since 22.1 */ public abstract TruffleString execute(Object pointerObject, int byteOffset, int byteLength, Encoding encoding, boolean copy); @Specialization TruffleString fromNativePointer(Object pointerObject, int byteOffset, int byteLength, Encoding enc, boolean copy, @Cached(value = "createInteropLibrary()", uncached = "getUncachedInteropLibrary()") Node interopLibrary, @Cached TStringInternalNodes.FromNativePointerNode fromNativePointerNode, @Cached TStringInternalNodes.FromBufferWithStringCompactionNode fromBufferWithStringCompactionNode) { NativePointer pointer = NativePointer.create(this, pointerObject, interopLibrary, byteOffset); if (copy) { return fromBufferWithStringCompactionNode.execute(pointer, byteOffset, byteLength, enc, true, true); } return fromNativePointerNode.execute(pointer, byteOffset, byteLength, enc, true); } /** * Create a new {@link FromNativePointerNode}. * * @since 22.1 */ public static FromNativePointerNode create() { return TruffleStringFactory.FromNativePointerNodeGen.create(); } /** * Get the uncached version of {@link FromNativePointerNode}. * * @since 22.1 */ public static FromNativePointerNode getUncached() { return TruffleStringFactory.FromNativePointerNodeGen.getUncached(); } } /** * Shorthand for calling the uncached version of {@link FromNativePointerNode}. * * @since 22.1 */ @TruffleBoundary public static TruffleString fromNativePointerUncached(Object pointerObject, int byteOffset, int byteLength, Encoding encoding, boolean copy) { return FromNativePointerNode.getUncached().execute(pointerObject, byteOffset, byteLength, encoding, copy); } /** * Node to get the given {@link AbstractTruffleString} as a {@link TruffleString}. See * {@link #execute(AbstractTruffleString, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class AsTruffleStringNode extends Node { AsTruffleStringNode() { } /** * If the given string is already a {@link TruffleString}, return it. If it is a * {@link MutableTruffleString}, create a new {@link TruffleString}, copying the mutable * string's contents. * * @since 22.1 */ public abstract TruffleString execute(AbstractTruffleString value, Encoding expectedEncoding); @Specialization static TruffleString immutable(TruffleString a, Encoding expectedEncoding) { a.checkEncoding(expectedEncoding); return a; } @Specialization static TruffleString fromMutableString(MutableTruffleString a, Encoding expectedEncoding, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.FromBufferWithStringCompactionKnownAttributesNode fromBufferWithStringCompactionNode) { int codeRange = getCodeRangeNode.execute(a); a.looseCheckEncoding(expectedEncoding, codeRange); return fromBufferWithStringCompactionNode.execute(a.data(), a.offset(), a.length() << a.stride(), expectedEncoding, getCodePointLengthNode.execute(a), codeRange); } /** * Create a new {@link AsTruffleStringNode}. * * @since 22.1 */ public static AsTruffleStringNode create() { return TruffleStringFactory.AsTruffleStringNodeGen.create(); } /** * Get the uncached version of {@link AsTruffleStringNode}. * * @since 22.1 */ public static AsTruffleStringNode getUncached() { return TruffleStringFactory.AsTruffleStringNodeGen.getUncached(); } } /** * Node to get the given {@link AbstractTruffleString} as a managed {@link TruffleString}, * meaning that the resulting string's backing memory is not a native pointer. See * {@link #execute(AbstractTruffleString, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class AsManagedNode extends Node { AsManagedNode() { } /** * If the given string is already a managed (i.e. not backed by a native pointer) string, * return it. Otherwise, copy the string's native pointer content into a Java byte array and * return a new string backed by the byte array. * * @since 22.1 */ public abstract TruffleString execute(AbstractTruffleString a, Encoding expectedEncoding); @Specialization(guards = "!a.isNative()") static TruffleString managedImmutable(TruffleString a, Encoding expectedEncoding) { a.checkEncoding(expectedEncoding); assert !(a.data() instanceof NativePointer); return a; } @Specialization(guards = "a.isNative() || a.isMutable()") static TruffleString nativeOrMutable(AbstractTruffleString a, Encoding expectedEncoding, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.FromBufferWithStringCompactionKnownAttributesNode fromBufferWithStringCompactionNode) { a.checkEncoding(expectedEncoding); Object data = a.data(); assert data instanceof byte[] || data instanceof NativePointer; return fromBufferWithStringCompactionNode.execute(data, a.offset(), a.length() << a.stride(), expectedEncoding, getCodePointLengthNode.execute(a), getCodeRangeNode.execute(a)); } /** * Create a new {@link AsManagedNode}. * * @since 22.1 */ public static AsManagedNode create() { return TruffleStringFactory.AsManagedNodeGen.create(); } /** * Get the uncached version of {@link AsManagedNode}. * * @since 22.1 */ public static AsManagedNode getUncached() { return TruffleStringFactory.AsManagedNodeGen.getUncached(); } } @ImportStatic(TStringGuards.class) abstract static class ToIndexableNode extends Node { abstract Object execute(AbstractTruffleString a, Object data); abstract static class ToIndexableImplNode extends ToIndexableNode { @Specialization static byte[] doByteArray(@SuppressWarnings("unused") AbstractTruffleString a, byte[] data) { return data; } @Specialization(guards = "isSupportedEncoding(a.encoding())") static NativePointer doNativeSupported(@SuppressWarnings("unused") AbstractTruffleString a, NativePointer data) { return data; } @Specialization(guards = "!isSupportedEncoding(a.encoding())") static NativePointer doNativeUnsupported(@SuppressWarnings("unused") AbstractTruffleString a, NativePointer data, @Cached ConditionProfile materializeProfile) { data.materializeByteArray(a, materializeProfile); return data; } @Specialization byte[] doLazyConcat(AbstractTruffleString a, @SuppressWarnings("unused") LazyConcat data) { return doLazyConcatIntl(this, a); } private static byte[] doLazyConcatIntl(ToIndexableNode location, AbstractTruffleString a) { // note: the write to a.data is racy, and we deliberately read it from the TString // object again after the race to de-duplicate simultaneously generated arrays a.setData(LazyConcat.flatten(location, (TruffleString) a)); return (byte[]) a.data(); } @Specialization static byte[] doLazyLong(AbstractTruffleString a, LazyLong data, @Cached ConditionProfile materializeProfile) { // same pattern as in #doLazyConcat: racy write to data.bytes and read the result // again to de-duplicate if (materializeProfile.profile(data.bytes == null)) { data.setBytes((TruffleString) a, NumberConversion.longToString(data.value, a.length())); } return data.bytes; } } @DenyReplace private static final class Uncached extends ToIndexableNode { private static final Uncached INSTANCE = new Uncached(); @TruffleBoundary @Override Object execute(AbstractTruffleString a, Object data) { if (data instanceof byte[]) { return data; } return slowPath(a, data); } private static Object slowPath(AbstractTruffleString a, Object data) { if (data instanceof NativePointer) { if (TStringGuards.isSupportedEncoding(a.encoding())) { return ToIndexableImplNode.doNativeSupported(a, (NativePointer) data); } else { return ToIndexableImplNode.doNativeUnsupported(a, (NativePointer) data, ConditionProfile.getUncached()); } } if (data instanceof LazyConcat) { return ToIndexableImplNode.doLazyConcatIntl(INSTANCE, a); } if (data instanceof LazyLong) { return ToIndexableImplNode.doLazyLong(a, (LazyLong) data, ConditionProfile.getUncached()); } throw new UnsupportedSpecializationException(INSTANCE, new Node[]{null, null}, a, data); } @Override public NodeCost getCost() { return NodeCost.MEGAMORPHIC; } @Override public boolean isAdoptable() { return false; } } static ToIndexableNode create() { return TruffleStringFactory.ToIndexableNodeFactory.ToIndexableImplNodeGen.create(); } static ToIndexableNode getUncached() { return Uncached.INSTANCE; } } /** * Node to force materialization of any lazy internal data. Use this node to avoid * materialization code inside loops, e.g. when iterating over a string's code points or bytes. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class MaterializeNode extends Node { MaterializeNode() { } /** * Forces materialization of any lazy internal data. Use this node to avoid materialization * code inside loops, e.g. when iterating over a string's code points or bytes. * * @since 22.1 */ public abstract void execute(AbstractTruffleString a, Encoding expectedEncoding); @Specialization static void doMaterialize(AbstractTruffleString a, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNode) { a.checkEncoding(expectedEncoding); toIndexableNode.execute(a, a.data()); assert a.isMaterialized(expectedEncoding); } /** * Create a new {@link MaterializeNode}. * * @since 22.1 */ public static MaterializeNode create() { return TruffleStringFactory.MaterializeNodeGen.create(); } /** * Get the uncached version of {@link MaterializeNode}. * * @since 22.1 */ public static MaterializeNode getUncached() { return TruffleStringFactory.MaterializeNodeGen.getUncached(); } } /** * Node to get a string's {@link CodeRange}. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class GetCodeRangeNode extends Node { GetCodeRangeNode() { } /** * Get the string's {@link CodeRange}. * * @since 22.1 */ public abstract CodeRange execute(AbstractTruffleString a, Encoding expectedEncoding); @Specialization static CodeRange getCodeRange(AbstractTruffleString a, Encoding expectedEncoding, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode) { a.checkEncoding(expectedEncoding); return CodeRange.get(getCodeRangeNode.execute(a)); } /** * Create a new {@link GetCodeRangeNode}. * * @since 22.1 */ public static GetCodeRangeNode create() { return TruffleStringFactory.GetCodeRangeNodeGen.create(); } /** * Get the uncached version of {@link GetCodeRangeNode}. * * @since 22.1 */ public static GetCodeRangeNode getUncached() { return TruffleStringFactory.GetCodeRangeNodeGen.getUncached(); } } /** * Node to get a string's "byte-based" {@link CodeRange}. See * {@link #execute(AbstractTruffleString, TruffleString.Encoding)} for details. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class GetByteCodeRangeNode extends Node { GetByteCodeRangeNode() { } /** * Get the string's "byte-based" {@link CodeRange}. This differs from * {@link GetCodeRangeNode} in the following way: *
    *
  • A string is only considered to be in the {@link CodeRange#ASCII} code range if its * encoding is byte-based, so {@link Encoding#UTF_16} and {@link Encoding#UTF_32} cannot be * {@link CodeRange#ASCII}.
  • *
  • {@link CodeRange#LATIN_1} and {@link CodeRange#BMP} are mapped to * {@link CodeRange#VALID}
  • . *
* The return value is always one of {@link CodeRange#ASCII}, {@link CodeRange#VALID} or * {@link CodeRange#BROKEN}. * * @since 22.1 */ public abstract CodeRange execute(AbstractTruffleString a, Encoding expectedEncoding); @Specialization static CodeRange getCodeRange(AbstractTruffleString a, Encoding expectedEncoding, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode) { a.checkEncoding(expectedEncoding); return CodeRange.getByteCodeRange(getCodeRangeNode.execute(a), expectedEncoding); } /** * Create a new {@link GetByteCodeRangeNode}. * * @since 22.1 */ public static GetByteCodeRangeNode create() { return TruffleStringFactory.GetByteCodeRangeNodeGen.create(); } /** * Get the uncached version of {@link GetByteCodeRangeNode}. * * @since 22.1 */ public static GetByteCodeRangeNode getUncached() { return TruffleStringFactory.GetByteCodeRangeNodeGen.getUncached(); } } /** * Node to check if a string's code range is equal to the given {@link CodeRange}. See * {@link #execute(AbstractTruffleString, TruffleString.CodeRange)} for details. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class CodeRangeEqualsNode extends Node { CodeRangeEqualsNode() { } /** * Returns {@code true} if the string's code range is equal to the given {@link CodeRange}. * Use this operation when caching code range instances, e.g.: * *
         * {@code
         * @Specialization(guards = "codeRangeEqualsNode.execute(a, cachedCodeRange)")
         * static void someOperation(TString a,
         *              @Cached TruffleString.GetCodeRangeNode getCodeRangeNode,
         *              @Cached TruffleString.CodeRangeEqualsNode codeRangeEqualsNode,
         *              @Cached("getCodeRangeNode.execute(a)") CodeRange cachedCodeRange) {
         *      // ...
         * }
         * }
         * 
* * @since 22.1 */ public abstract boolean execute(AbstractTruffleString a, CodeRange codeRange); @Specialization static boolean codeRangeEquals(AbstractTruffleString a, CodeRange codeRange, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode) { return CodeRange.equals(getCodeRangeNode.execute(a), codeRange); } /** * Create a new {@link CodeRangeEqualsNode}. * * @since 22.1 */ public static CodeRangeEqualsNode create() { return TruffleStringFactory.CodeRangeEqualsNodeGen.create(); } /** * Get the uncached version of {@link CodeRangeEqualsNode}. * * @since 22.1 */ public static CodeRangeEqualsNode getUncached() { return TruffleStringFactory.CodeRangeEqualsNodeGen.getUncached(); } } /** * Node to check if a string is encoded correctly. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class IsValidNode extends Node { IsValidNode() { } /** * Returns {@code true} if the string encoded correctly. * * @since 22.1 */ public abstract boolean execute(AbstractTruffleString a, Encoding expectedEncoding); @Specialization static boolean isValid(AbstractTruffleString a, Encoding expectedEncoding, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode) { a.checkEncoding(expectedEncoding); int codeRange = getCodeRangeNode.execute(a); return !isBrokenMultiByte(codeRange) && !isBrokenFixedWidth(codeRange); } /** * Create a new {@link IsValidNode}. * * @since 22.1 */ public static IsValidNode create() { return TruffleStringFactory.IsValidNodeGen.create(); } /** * Get the uncached version of {@link IsValidNode}. * * @since 22.1 */ public static IsValidNode getUncached() { return TruffleStringFactory.IsValidNodeGen.getUncached(); } } /** * Node to get the number of codepoints in a string. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CodePointLengthNode extends Node { CodePointLengthNode() { } /** * Return the number of codepoints in the string. *

* If the string is not encoded correctly (if its coderange is {@link CodeRange#BROKEN}), * every broken minimum-length sequence in the encoding (4 bytes for UTF-32, 2 bytes for * UTF-16, 1 byte for other encodings) adds 1 to the length. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, Encoding expectedEncoding); @Specialization static int get(AbstractTruffleString a, Encoding expectedEncoding, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode) { a.checkEncoding(expectedEncoding); return getCodePointLengthNode.execute(a); } /** * Create a new {@link CodePointLengthNode}. * * @since 22.1 */ public static CodePointLengthNode create() { return TruffleStringFactory.CodePointLengthNodeGen.create(); } /** * Get the uncached version of {@link CodePointLengthNode}. * * @since 22.1 */ public static CodePointLengthNode getUncached() { return TruffleStringFactory.CodePointLengthNodeGen.getUncached(); } } /** * Node to get a string's hash code. See * {@link #execute(AbstractTruffleString, TruffleString.Encoding)} for details. * * @see TruffleString#hashCode() * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class HashCodeNode extends Node { HashCodeNode() { } /** * Returns the string's hash code. The hash is dependent on the string's encoding, make sure * to convert strings to a common encoding before comparing their hash codes! * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, Encoding expectedEncoding); @Specialization static int calculateHash(AbstractTruffleString a, Encoding expectedEncoding, @Cached ConditionProfile cacheMiss, @Cached ToIndexableNode toIndexableNode, @Cached TStringOpsNodes.CalculateHashCodeNode calculateHashCodeNode) { a.checkEncoding(expectedEncoding); int h = a.hashCode; if (cacheMiss.profile(h == 0)) { h = calculateHashCodeNode.execute(a, toIndexableNode.execute(a, a.data())); if (h == 0) { h--; } a.hashCode = h; } return h; } /** * Create a new {@link HashCodeNode}. * * @since 22.1 */ public static HashCodeNode create() { return TruffleStringFactory.HashCodeNodeGen.create(); } /** * Get the uncached version of {@link HashCodeNode}. * * @since 22.1 */ public static HashCodeNode getUncached() { return TruffleStringFactory.HashCodeNodeGen.getUncached(); } } /** * Node to read a single byte from a string. See * {@link #execute(AbstractTruffleString, int, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class ReadByteNode extends Node { ReadByteNode() { } /** * Read a single byte from a string. If used inside a loop or repetitively, * {@link MaterializeNode} should be used before. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, int byteIndex, Encoding expectedEncoding); @Specialization static int doRead(AbstractTruffleString a, int i, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.ReadByteNode readByteNode) { a.checkEncoding(expectedEncoding); Object arrayA = toIndexableNode.execute(a, a.data()); return readByteNode.execute(a, arrayA, i, expectedEncoding); } /** * Create a new {@link ReadByteNode}. * * @since 22.1 */ public static ReadByteNode create() { return TruffleStringFactory.ReadByteNodeGen.create(); } /** * Get the uncached version of {@link ReadByteNode}. * * @since 22.1 */ public static ReadByteNode getUncached() { return TruffleStringFactory.ReadByteNodeGen.getUncached(); } } /** * Node to read a single char from a UTF-16 string. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class ReadCharUTF16Node extends Node { ReadCharUTF16Node() { } /** * Read a single char from a UTF-16 string. * * @since 22.1 */ public abstract char execute(AbstractTruffleString a, int charIndex); @Specialization static char doRead(AbstractTruffleString a, int i, @Cached ToIndexableNode toIndexableNode, @Cached ConditionProfile utf16S0Profile) { a.checkEncoding(Encoding.UTF_16); a.boundsCheckRaw(i); Object arrayA = toIndexableNode.execute(a, a.data()); if (utf16S0Profile.profile(isStride0(a))) { return (char) TStringOps.readS0(a, arrayA, i); } else { assert isStride1(a); return TStringOps.readS1(a, arrayA, i); } } /** * Create a new {@link ReadCharUTF16Node}. * * @since 22.1 */ public static ReadCharUTF16Node create() { return TruffleStringFactory.ReadCharUTF16NodeGen.create(); } /** * Get the uncached version of {@link ReadCharUTF16Node}. * * @since 22.1 */ public static ReadCharUTF16Node getUncached() { return TruffleStringFactory.ReadCharUTF16NodeGen.getUncached(); } } /** * Node to get the number of bytes occupied by the codepoint starting at a given byte index. See * {@link #execute(AbstractTruffleString, int, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class ByteLengthOfCodePointNode extends Node { ByteLengthOfCodePointNode() { } /** * Get the number of bytes occupied by the codepoint starting at {@code byteIndex}, with * {@link ErrorHandling#BEST_EFFORT best-effort} error handling. * * @since 22.1 */ public final int execute(AbstractTruffleString a, int byteIndex, Encoding expectedEncoding) { return execute(a, byteIndex, expectedEncoding, ErrorHandling.BEST_EFFORT); } /** * Get the number of bytes occupied by the codepoint starting at {@code byteIndex}. * * @param errorHandling if set to {@link ErrorHandling#BEST_EFFORT}, this node will return * the encoding's minimum number of bytes per codepoint if an error occurs while * reading the codepoint. If set to {@link ErrorHandling#RETURN_NEGATIVE}, a * negative value will be returned instead, where two error cases are * distinguished: if the codepoint is invalid, the return value is -1. If the * codepoint is an unfinished, possibly valid byte sequence at the end of the * string, the return value is {@code -1 - (number of missing bytes)}. This * parameter is expected to be * {@link CompilerAsserts#partialEvaluationConstant(Object) partial evaluation * constant}. * * @since 22.3 */ public abstract int execute(AbstractTruffleString a, int byteIndex, Encoding expectedEncoding, ErrorHandling errorHandling); @Specialization static int translate(AbstractTruffleString a, int byteIndex, Encoding expectedEncoding, ErrorHandling errorHandling, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.ByteLengthOfCodePointNode byteLengthOfCodePointNode) { CompilerAsserts.partialEvaluationConstant(errorHandling); a.checkEncoding(expectedEncoding); int rawIndex = rawIndex(byteIndex, expectedEncoding); a.boundsCheckRaw(rawIndex); Object arrayA = toIndexableNode.execute(a, a.data()); int codeRangeA = getCodeRangeNode.execute(a); return byteLengthOfCodePointNode.execute(a, arrayA, codeRangeA, expectedEncoding, rawIndex, errorHandling); } /** * Create a new {@link ByteLengthOfCodePointNode}. * * @since 22.1 */ public static ByteLengthOfCodePointNode create() { return TruffleStringFactory.ByteLengthOfCodePointNodeGen.create(); } /** * Get the uncached version of {@link ByteLengthOfCodePointNode}. * * @since 22.1 */ public static ByteLengthOfCodePointNode getUncached() { return TruffleStringFactory.ByteLengthOfCodePointNodeGen.getUncached(); } } /** * Node to convert a given byte index to a codepoint index. See * {@link #execute(AbstractTruffleString, int, int, TruffleString.Encoding)} for details. * * @since 22.2 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class ByteIndexToCodePointIndexNode extends Node { ByteIndexToCodePointIndexNode() { } /** * Convert the given byte index to a codepoint index, relative to starting point * {@code byteOffset}. * * @since 22.2 */ public abstract int execute(AbstractTruffleString a, int byteOffset, int byteIndex, Encoding expectedEncoding); @Specialization static int translate(AbstractTruffleString a, int byteOffset, int byteIndex, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.RawIndexToCodePointIndexNode rawIndexToCodePointIndexNode) { a.checkEncoding(expectedEncoding); int rawOffset = rawIndex(byteOffset, expectedEncoding); int rawIndex = rawIndex(byteIndex, expectedEncoding); a.boundsCheckRegionRaw(rawOffset, rawIndex); if (byteIndex == 0) { return 0; } Object arrayA = toIndexableNode.execute(a, a.data()); int codeRangeA = getCodeRangeNode.execute(a); return rawIndexToCodePointIndexNode.execute(a, arrayA, codeRangeA, expectedEncoding, a.offset() + byteOffset, rawIndex); } /** * Create a new {@link ByteIndexToCodePointIndexNode}. * * @since 22.2 */ public static ByteIndexToCodePointIndexNode create() { return TruffleStringFactory.ByteIndexToCodePointIndexNodeGen.create(); } /** * Get the uncached version of {@link ByteIndexToCodePointIndexNode}. * * @since 22.2 */ public static ByteIndexToCodePointIndexNode getUncached() { return TruffleStringFactory.ByteIndexToCodePointIndexNodeGen.getUncached(); } } /** * Node to convert a given codepoint index to a byte index. See * {@link #execute(AbstractTruffleString, int, int, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CodePointIndexToByteIndexNode extends Node { CodePointIndexToByteIndexNode() { } /** * Convert the given codepoint index to a byte index, relative to starting point * {@code byteOffset}. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, int byteOffset, int codepointIndex, Encoding expectedEncoding); @Specialization static int translate(AbstractTruffleString a, int byteOffset, int codepointIndex, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.CodePointIndexToRawNode codePointIndexToRawNode) { a.checkEncoding(expectedEncoding); a.boundsCheckRegion(0, codepointIndex, getCodePointLengthNode); int rawOffset = rawIndex(byteOffset, expectedEncoding); a.boundsCheckRawLength(rawOffset); if (codepointIndex == 0) { return 0; } Object arrayA = toIndexableNode.execute(a, a.data()); int codeRangeA = getCodeRangeNode.execute(a); return codePointIndexToRawNode.execute(a, arrayA, codeRangeA, expectedEncoding, rawOffset, codepointIndex, true) << expectedEncoding.naturalStride; } /** * Create a new {@link CodePointIndexToByteIndexNode}. * * @since 22.1 */ public static CodePointIndexToByteIndexNode create() { return TruffleStringFactory.CodePointIndexToByteIndexNodeGen.create(); } /** * Get the uncached version of {@link CodePointIndexToByteIndexNode}. * * @since 22.1 */ public static CodePointIndexToByteIndexNode getUncached() { return TruffleStringFactory.CodePointIndexToByteIndexNodeGen.getUncached(); } } /** * Node to read a codepoint at a given codepoint index. See * {@link #execute(AbstractTruffleString, int, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CodePointAtIndexNode extends Node { CodePointAtIndexNode() { } /** * Decode and return the codepoint at codepoint index {@code i}, with * {@link ErrorHandling#BEST_EFFORT best-effort} error handling. * * @since 22.1 */ public final int execute(AbstractTruffleString a, int i, Encoding expectedEncoding) { return execute(a, i, expectedEncoding, ErrorHandling.BEST_EFFORT); } /** * Decode and return the codepoint at codepoint index {@code i}. * * @param errorHandling if set to {@link ErrorHandling#BEST_EFFORT}, the return value on * invalid codepoints depends on {@code expectedEncoding}: *

    *
  • {@link Encoding#UTF_8}: Unicode Replacement character {@code 0xFFFD}
  • *
  • {@link Encoding#UTF_16}: the (16-bit) {@code char} value read at index * {@code i}
  • *
  • {@link Encoding#UTF_32}: the (32-bit) {@code int} value read at index * {@code i}
  • *
  • {@link Encoding#US_ASCII}, {@link Encoding#ISO_8859_1}, * {@link Encoding#BYTES}: the (8-bit) unsigned {@code byte} value read at index * {@code i}
  • *
  • All other Encodings: Unicode Replacement character {@code 0xFFFD}
  • *
* If set to {@link ErrorHandling#RETURN_NEGATIVE}, {@code -1} will be returned * instead. This parameter is expected to be * {@link CompilerAsserts#partialEvaluationConstant(Object) partial evaluation * constant}. * * @since 22.3 */ public abstract int execute(AbstractTruffleString a, int i, Encoding expectedEncoding, ErrorHandling errorHandling); @Specialization static int readCodePoint(AbstractTruffleString a, int i, Encoding expectedEncoding, ErrorHandling errorHandling, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.CodePointAtNode readCodePointNode) { CompilerAsserts.partialEvaluationConstant(errorHandling); a.checkEncoding(expectedEncoding); a.boundsCheck(i, getCodePointLengthNode); Object arrayA = toIndexableNode.execute(a, a.data()); return readCodePointNode.execute(a, arrayA, getCodeRangeNode.execute(a), expectedEncoding, i, errorHandling); } /** * Create a new {@link CodePointAtIndexNode}. * * @since 22.1 */ public static CodePointAtIndexNode create() { return TruffleStringFactory.CodePointAtIndexNodeGen.create(); } /** * Get the uncached version of {@link CodePointAtIndexNode}. * * @since 22.1 */ public static CodePointAtIndexNode getUncached() { return TruffleStringFactory.CodePointAtIndexNodeGen.getUncached(); } } /** * Node to read a codepoint at a given byte index. See * {@link #execute(AbstractTruffleString, int, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CodePointAtByteIndexNode extends Node { CodePointAtByteIndexNode() { } /** * Decode and return the codepoint at byte index {@code i}, with * {@link ErrorHandling#BEST_EFFORT best-effort} error handling. * * @since 22.1 */ public final int execute(AbstractTruffleString a, int i, Encoding expectedEncoding) { return execute(a, i, expectedEncoding, ErrorHandling.BEST_EFFORT); } /** * Decode and return the codepoint at byte index {@code i}. * * @param errorHandling analogous to {@link CodePointAtIndexNode}. * * @since 22.3 */ public abstract int execute(AbstractTruffleString a, int i, Encoding expectedEncoding, ErrorHandling errorHandling); @Specialization static int readCodePoint(AbstractTruffleString a, int byteIndex, Encoding expectedEncoding, ErrorHandling errorHandling, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.CodePointAtRawNode readCodePointNode) { CompilerAsserts.partialEvaluationConstant(errorHandling); final int i = rawIndex(byteIndex, expectedEncoding); a.checkEncoding(expectedEncoding); a.boundsCheckRaw(i); return readCodePointNode.execute(a, toIndexableNode.execute(a, a.data()), getCodeRangeNode.execute(a), expectedEncoding, i, errorHandling); } /** * Create a new {@link CodePointAtByteIndexNode}. * * @since 22.1 */ public static CodePointAtByteIndexNode create() { return TruffleStringFactory.CodePointAtByteIndexNodeGen.create(); } /** * Get the uncached version of {@link CodePointAtByteIndexNode}. * * @since 22.1 */ public static CodePointAtByteIndexNode getUncached() { return TruffleStringFactory.CodePointAtByteIndexNodeGen.getUncached(); } } /** * Node to find the index of the first occurrence of any byte from a given array. See * {@link #execute(AbstractTruffleString, int, int, byte[], TruffleString.Encoding)} for * details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class ByteIndexOfAnyByteNode extends Node { ByteIndexOfAnyByteNode() { } /** * Return the byte index of the first occurrence of any byte contained in {@code values}, * bounded by {@code fromByteIndex} (inclusive) and {@code maxByteIndex} (exclusive). *

* If none of the values is found, return a negative value. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, int fromByteIndex, int maxByteIndex, byte[] values, Encoding expectedEncoding); @Specialization int indexOfRaw(AbstractTruffleString a, int fromByteIndex, int maxByteIndex, byte[] values, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode) { if (isUTF16Or32(expectedEncoding)) { throw InternalErrors.illegalArgument("UTF-16 and UTF-32 not supported!"); } a.checkEncoding(expectedEncoding); if (a.isEmpty()) { return -1; } a.boundsCheckRaw(fromByteIndex, maxByteIndex); if (fromByteIndex == maxByteIndex || TSCodeRange.is7Bit(getCodeRangeNode.execute(a)) && noneIsAscii(this, values)) { return -1; } assert isStride0(a); Object arrayA = toIndexableNode.execute(a, a.data()); return TStringOps.indexOfAnyByte(this, a, arrayA, fromByteIndex, maxByteIndex, values); } private static boolean noneIsAscii(Node location, byte[] values) { for (int i = 0; i < values.length; i++) { if (Byte.toUnsignedInt(values[i]) <= 0x7f) { return false; } TStringConstants.truffleSafePointPoll(location, i + 1); } return true; } /** * Create a new {@link ByteIndexOfAnyByteNode}. * * @since 22.1 */ public static ByteIndexOfAnyByteNode create() { return TruffleStringFactory.ByteIndexOfAnyByteNodeGen.create(); } /** * Get the uncached version of {@link ByteIndexOfAnyByteNode}. * * @since 22.1 */ public static ByteIndexOfAnyByteNode getUncached() { return TruffleStringFactory.ByteIndexOfAnyByteNodeGen.getUncached(); } } /** * Node to find the index of the first occurrence of any {@code char} from a given array. See * {@link #execute(AbstractTruffleString, int, int, char[])} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CharIndexOfAnyCharUTF16Node extends Node { CharIndexOfAnyCharUTF16Node() { } /** * Return the char index of the first occurrence of any char contained in {@code values}, * bounded by {@code fromCharIndex} (inclusive) and {@code maxCharIndex} (exclusive). *

* If none of the values is found, return a negative value. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, int fromCharIndex, int maxCharIndex, char[] values); @Specialization int indexOfRaw(AbstractTruffleString a, int fromCharIndex, int maxCharIndex, char[] values, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringOpsNodes.IndexOfAnyCharNode indexOfNode) { a.checkEncoding(Encoding.UTF_16); if (a.isEmpty()) { return -1; } a.boundsCheckRaw(fromCharIndex, maxCharIndex); int codeRangeA = getCodeRangeNode.execute(a); if (fromCharIndex == maxCharIndex || TSCodeRange.isFixedWidth(codeRangeA) && noneInCodeRange(this, codeRangeA, values)) { return -1; } return indexOfNode.execute(a, toIndexableNode.execute(a, a.data()), fromCharIndex, maxCharIndex, values); } private static boolean noneInCodeRange(Node location, int codeRange, char[] values) { for (int i = 0; i < values.length; i++) { if (TSCodeRange.isInCodeRange(values[i], codeRange)) { return false; } TStringConstants.truffleSafePointPoll(location, i + 1); } return true; } /** * Create a new {@link CharIndexOfAnyCharUTF16Node}. * * @since 22.1 */ public static CharIndexOfAnyCharUTF16Node create() { return TruffleStringFactory.CharIndexOfAnyCharUTF16NodeGen.create(); } /** * Get the uncached version of {@link CharIndexOfAnyCharUTF16Node}. * * @since 22.1 */ public static CharIndexOfAnyCharUTF16Node getUncached() { return TruffleStringFactory.CharIndexOfAnyCharUTF16NodeGen.getUncached(); } } /** * Node to find the index of the first occurrence of any {@code int} from a given array. See * {@link #execute(AbstractTruffleString, int, int, int[])} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class IntIndexOfAnyIntUTF32Node extends Node { IntIndexOfAnyIntUTF32Node() { } /** * Return the int index of the first occurrence of any int contained in {@code values}, * bounded by {@code fromIntIndex} (inclusive) and {@code maxIntIndex} (exclusive). *

* If none of the values is found, return a negative value. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, int fromIntIndex, int maxIntIndex, int[] values); @Specialization int indexOfRaw(AbstractTruffleString a, int fromIntIndex, int maxIntIndex, int[] values, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringOpsNodes.IndexOfAnyIntNode indexOfNode) { a.checkEncoding(Encoding.UTF_32); if (a.isEmpty()) { return -1; } a.boundsCheckRaw(fromIntIndex, maxIntIndex); if (fromIntIndex == maxIntIndex || noneInCodeRange(this, getCodeRangeNode.execute(a), values)) { return -1; } return indexOfNode.execute(a, toIndexableNode.execute(a, a.data()), fromIntIndex, maxIntIndex, values); } private static boolean noneInCodeRange(Node location, int codeRange, int[] values) { for (int i = 0; i < values.length; i++) { if (TSCodeRange.isInCodeRange(values[i], codeRange)) { return false; } TStringConstants.truffleSafePointPoll(location, i + 1); } return true; } /** * Create a new {@link IntIndexOfAnyIntUTF32Node}. * * @since 22.1 */ public static IntIndexOfAnyIntUTF32Node create() { return TruffleStringFactory.IntIndexOfAnyIntUTF32NodeGen.create(); } /** * Get the uncached version of {@link IntIndexOfAnyIntUTF32Node}. * * @since 22.1 */ public static IntIndexOfAnyIntUTF32Node getUncached() { return TruffleStringFactory.IntIndexOfAnyIntUTF32NodeGen.getUncached(); } } /** * Node to find the index of the first occurrence of a given code point. See * {@link #execute(AbstractTruffleString, int, int, int, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class IndexOfCodePointNode extends Node { IndexOfCodePointNode() { } /** * Return the codepoint index of the first occurrence of {@code codepoint}, bounded by * {@code fromIndex} (inclusive) and {@code toIndex} (exclusive), if no occurrence is found * return a negative value. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, int codepoint, int fromIndex, int toIndex, Encoding expectedEncoding); @Specialization static int doIndexOf(AbstractTruffleString a, int codepoint, int fromIndex, int toIndex, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.IndexOfCodePointNode indexOfNode) { a.checkEncoding(expectedEncoding); if (a.isEmpty()) { return -1; } a.boundsCheck(fromIndex, toIndex, getCodePointLengthNode); Object arrayA = toIndexableNode.execute(a, a.data()); return indexOfNode.execute(a, arrayA, getCodeRangeNode.execute(a), expectedEncoding, codepoint, fromIndex, toIndex); } /** * Create a new {@link IndexOfCodePointNode}. * * @since 22.1 */ public static IndexOfCodePointNode create() { return TruffleStringFactory.IndexOfCodePointNodeGen.create(); } /** * Get the uncached version of {@link IndexOfCodePointNode}. * * @since 22.1 */ public static IndexOfCodePointNode getUncached() { return TruffleStringFactory.IndexOfCodePointNodeGen.getUncached(); } } /** * {@link IndexOfCodePointNode}, but with byte indices. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class ByteIndexOfCodePointNode extends Node { ByteIndexOfCodePointNode() { } /** * {@link IndexOfCodePointNode}, but with byte indices. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, int codepoint, int fromByteIndex, int toByteIndex, Encoding expectedEncoding); @Specialization static int doIndexOf(AbstractTruffleString a, int codepoint, int fromByteIndex, int toByteIndex, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.IndexOfCodePointRawNode indexOfNode) { a.checkEncoding(expectedEncoding); if (a.isEmpty()) { return -1; } final int fromIndex = rawIndex(fromByteIndex, expectedEncoding); final int toIndex = rawIndex(toByteIndex, expectedEncoding); a.boundsCheckRaw(fromIndex, toIndex); return byteIndex(indexOfNode.execute(a, toIndexableNode.execute(a, a.data()), getCodeRangeNode.execute(a), expectedEncoding, codepoint, fromIndex, toIndex), expectedEncoding); } /** * Create a new {@link ByteIndexOfCodePointNode}. * * @since 22.1 */ public static ByteIndexOfCodePointNode create() { return TruffleStringFactory.ByteIndexOfCodePointNodeGen.create(); } /** * Get the uncached version of {@link ByteIndexOfCodePointNode}. * * @since 22.1 */ public static ByteIndexOfCodePointNode getUncached() { return TruffleStringFactory.ByteIndexOfCodePointNodeGen.getUncached(); } } /** * Node to find the codepoint index of the last occurrence of a given code point. See * {@link #execute(AbstractTruffleString, int, int, int, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class LastIndexOfCodePointNode extends Node { LastIndexOfCodePointNode() { } /** * Return the codepoint index of the last occurrence of {@code codepoint}, bounded by * {@code fromIndex} (exclusive upper limit) and {@code toIndex} (inclusive lower limit), if * no occurrence is found return a negative value. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, int codepoint, int fromIndex, int toIndex, Encoding expectedEncoding); @Specialization static int doIndexOf(AbstractTruffleString a, int codepoint, int fromIndex, int toIndex, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.LastIndexOfCodePointNode lastIndexOfNode) { a.checkEncoding(expectedEncoding); if (a.isEmpty()) { return -1; } a.boundsCheck(toIndex, fromIndex, getCodePointLengthNode); Object arrayA = toIndexableNode.execute(a, a.data()); return lastIndexOfNode.execute(a, arrayA, getCodeRangeNode.execute(a), expectedEncoding, codepoint, fromIndex, toIndex); } /** * Create a new {@link LastIndexOfCodePointNode}. * * @since 22.1 */ public static LastIndexOfCodePointNode create() { return TruffleStringFactory.LastIndexOfCodePointNodeGen.create(); } /** * Get the uncached version of {@link LastIndexOfCodePointNode}. * * @since 22.1 */ public static LastIndexOfCodePointNode getUncached() { return TruffleStringFactory.LastIndexOfCodePointNodeGen.getUncached(); } } /** * {@link LastIndexOfCodePointNode}, but with byte indices. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class LastByteIndexOfCodePointNode extends Node { LastByteIndexOfCodePointNode() { } /** * {@link LastIndexOfCodePointNode}, but with byte indices. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, int codepoint, int fromByteIndex, int toByteIndex, Encoding expectedEncoding); @Specialization static int doIndexOf(AbstractTruffleString a, int codepoint, int fromByteIndex, int toByteIndex, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.LastIndexOfCodePointRawNode lastIndexOfNode) { a.checkEncoding(expectedEncoding); if (a.isEmpty()) { return -1; } final int fromIndex = rawIndex(fromByteIndex, expectedEncoding); final int toIndex = rawIndex(toByteIndex, expectedEncoding); a.boundsCheckRaw(toIndex, fromIndex); return byteIndex(lastIndexOfNode.execute(a, toIndexableNode.execute(a, a.data()), getCodeRangeNode.execute(a), expectedEncoding, codepoint, fromIndex, toIndex), expectedEncoding); } /** * Create a new {@link LastByteIndexOfCodePointNode}. * * @since 22.1 */ public static LastByteIndexOfCodePointNode create() { return TruffleStringFactory.LastByteIndexOfCodePointNodeGen.create(); } /** * Get the uncached version of {@link LastByteIndexOfCodePointNode}. * * @since 22.1 */ public static LastByteIndexOfCodePointNode getUncached() { return TruffleStringFactory.LastByteIndexOfCodePointNodeGen.getUncached(); } } /** * Node to find the index of the first occurrence of a given string. See * {@link #execute(AbstractTruffleString, AbstractTruffleString, int, int, TruffleString.Encoding)} * for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class IndexOfStringNode extends Node { IndexOfStringNode() { } /** * Return the codepoint index of the first occurrence of {@code string}, bounded by * {@code fromIndex} (inclusive) and {@code toIndex} (exclusive), if no occurrence is found * return a negative value. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, AbstractTruffleString b, int fromIndex, int toIndex, Encoding expectedEncoding); @Specialization static int indexOfString(AbstractTruffleString a, AbstractTruffleString b, int fromIndex, int toIndex, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNodeA, @Cached ToIndexableNode toIndexableNodeB, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthANode, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthBNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeBNode, @Cached TStringInternalNodes.IndexOfStringNode indexOfStringNode) { int codeRangeA = getCodeRangeANode.execute(a); int codeRangeB = getCodeRangeBNode.execute(b); a.looseCheckEncoding(expectedEncoding, codeRangeA); b.looseCheckEncoding(expectedEncoding, codeRangeB); if (b.isEmpty()) { return fromIndex; } if (a.isEmpty()) { return -1; } a.boundsCheck(fromIndex, toIndex, getCodePointLengthANode); Object arrayA = toIndexableNodeA.execute(a, a.data()); Object arrayB = toIndexableNodeB.execute(b, b.data()); if (indexOfCannotMatch(codeRangeA, b, codeRangeB, toIndex - fromIndex, getCodePointLengthBNode)) { return -1; } return indexOfStringNode.execute(a, arrayA, codeRangeA, b, arrayB, codeRangeB, fromIndex, toIndex, expectedEncoding); } /** * Create a new {@link IndexOfStringNode}. * * @since 22.1 */ public static IndexOfStringNode create() { return TruffleStringFactory.IndexOfStringNodeGen.create(); } /** * Get the uncached version of {@link IndexOfStringNode}. * * @since 22.1 */ public static IndexOfStringNode getUncached() { return TruffleStringFactory.IndexOfStringNodeGen.getUncached(); } } /** * {@link IndexOfStringNode}, but with byte indices. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class ByteIndexOfStringNode extends Node { ByteIndexOfStringNode() { } /** * {@link IndexOfStringNode}, but with byte indices. * * @since 22.1 */ public final int execute(AbstractTruffleString a, AbstractTruffleString b, int fromByteIndex, int toByteIndex, Encoding expectedEncoding) { return execute(a, b, fromByteIndex, toByteIndex, null, expectedEncoding); } /** * {@link IndexOfStringNode}, but with byte indices. This variant accepts a * {@link TruffleString.WithMask} as the search value {@code b}, which changes the searching * algorithm in the following manner: whenever the contents of {@code a} and {@code b} are * compared, the mask is OR'ed to {@code a}, as shown in this exemplary method: * *

         * {@code
         * boolean bytesEqualAt(TruffleString a, int byteIndexA, TruffleString.WithMask b, int byteIndexB) {
         *     return (readByte(a, byteIndexA) | readByte(b.mask, byteIndexB)) == readByte(b, byteIndexB);
         * }
         * }
         * 
* * @since 22.1 */ public final int execute(AbstractTruffleString a, WithMask b, int fromByteIndex, int toByteIndex, Encoding expectedEncoding) { return execute(a, b.string, fromByteIndex, toByteIndex, b.mask, expectedEncoding); } abstract int execute(AbstractTruffleString a, AbstractTruffleString b, int fromByteIndex, int toByteIndex, byte[] mask, Encoding expectedEncoding); @Specialization static int indexOfString(AbstractTruffleString a, AbstractTruffleString b, int fromByteIndex, int toByteIndex, byte[] mask, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNodeA, @Cached ToIndexableNode toIndexableNodeB, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeBNode, @Cached TStringInternalNodes.IndexOfStringRawNode indexOfStringNode) { final int codeRangeA = getCodeRangeANode.execute(a); final int codeRangeB = getCodeRangeBNode.execute(b); a.looseCheckEncoding(expectedEncoding, codeRangeA); b.looseCheckEncoding(expectedEncoding, codeRangeB); if (mask != null && isUnsupportedEncoding(expectedEncoding) && !isFixedWidth(codeRangeA)) { throw InternalErrors.unsupportedOperation(); } if (b.isEmpty()) { return fromByteIndex; } if (a.isEmpty()) { return -1; } final int fromIndex = rawIndex(fromByteIndex, expectedEncoding); final int toIndex = rawIndex(toByteIndex, expectedEncoding); a.boundsCheckRaw(fromIndex, toIndex); Object arrayA = toIndexableNodeA.execute(a, a.data()); Object arrayB = toIndexableNodeB.execute(b, b.data()); if (indexOfCannotMatch(codeRangeA, b, codeRangeB, mask, toIndex - fromIndex)) { return -1; } return byteIndex(indexOfStringNode.execute(a, arrayA, codeRangeA, b, arrayB, codeRangeB, fromIndex, toIndex, mask, expectedEncoding), expectedEncoding); } /** * Create a new {@link ByteIndexOfStringNode}. * * @since 22.1 */ public static ByteIndexOfStringNode create() { return TruffleStringFactory.ByteIndexOfStringNodeGen.create(); } /** * Get the uncached version of {@link ByteIndexOfStringNode}. * * @since 22.1 */ public static ByteIndexOfStringNode getUncached() { return TruffleStringFactory.ByteIndexOfStringNodeGen.getUncached(); } } /** * Node to find the index of the last occurrence of a given string. See * {@link #execute(AbstractTruffleString, AbstractTruffleString, int, int, TruffleString.Encoding)} * for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class LastIndexOfStringNode extends Node { LastIndexOfStringNode() { } /** * Return the codepoint index of the last occurrence of {@code string}, bounded by * {@code fromIndex} (exclusive upper limit) and {@code toIndex} (inclusive lower limit), if * no occurrence is found return a negative value. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, AbstractTruffleString b, int fromIndex, int toIndex, Encoding expectedEncoding); @Specialization static int lastIndexOfString(AbstractTruffleString a, AbstractTruffleString b, int fromIndex, int toIndex, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNodeA, @Cached ToIndexableNode toIndexableNodeB, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthANode, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthBNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeBNode, @Cached TStringInternalNodes.LastIndexOfStringNode indexOfStringNode) { final int codeRangeA = getCodeRangeANode.execute(a); final int codeRangeB = getCodeRangeBNode.execute(b); a.looseCheckEncoding(expectedEncoding, codeRangeA); b.looseCheckEncoding(expectedEncoding, codeRangeB); if (b.isEmpty()) { return fromIndex; } if (a.isEmpty()) { return -1; } a.boundsCheck(toIndex, fromIndex, getCodePointLengthANode); Object arrayA = toIndexableNodeA.execute(a, a.data()); Object arrayB = toIndexableNodeB.execute(b, b.data()); if (indexOfCannotMatch(codeRangeA, b, codeRangeB, fromIndex - toIndex, getCodePointLengthBNode)) { return -1; } return indexOfStringNode.execute(a, arrayA, codeRangeA, b, arrayB, codeRangeB, fromIndex, toIndex, expectedEncoding); } /** * Create a new {@link LastIndexOfStringNode}. * * @since 22.1 */ public static LastIndexOfStringNode create() { return TruffleStringFactory.LastIndexOfStringNodeGen.create(); } /** * Get the uncached version of {@link LastIndexOfStringNode}. * * @since 22.1 */ public static LastIndexOfStringNode getUncached() { return TruffleStringFactory.LastIndexOfStringNodeGen.getUncached(); } } /** * {@link LastIndexOfStringNode}, but with byte indices. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class LastByteIndexOfStringNode extends Node { LastByteIndexOfStringNode() { } /** * {@link LastIndexOfStringNode}, but with byte indices. * * @since 22.1 */ public final int execute(AbstractTruffleString a, AbstractTruffleString b, int fromIndex, int toIndex, Encoding expectedEncoding) { return execute(a, b, fromIndex, toIndex, null, expectedEncoding); } /** * {@link LastIndexOfStringNode}, but with byte indices. This variant accepts a * {@link TruffleString.WithMask} as the search value {@code b}, which changes the searching * algorithm in the following manner: whenever the contents of {@code a} and {@code b} are * compared, the mask is OR'ed to {@code a}, as shown in this exemplary method: * *
         * {@code
         * boolean bytesEqualAt(TruffleString a, int byteIndexA, TruffleString.WithMask b, int byteIndexB) {
         *     return (readByte(a, byteIndexA) | readByte(b.mask, byteIndexB)) == readByte(b, byteIndexB);
         * }
         * }
         * 
* * @since 22.1 */ public final int execute(AbstractTruffleString a, WithMask b, int fromIndex, int toIndex, Encoding expectedEncoding) { return execute(a, b.string, fromIndex, toIndex, b.mask, expectedEncoding); } abstract int execute(AbstractTruffleString a, AbstractTruffleString b, int fromIndex, int toIndex, byte[] mask, Encoding expectedEncoding); @Specialization static int lastByteIndexOfString(AbstractTruffleString a, AbstractTruffleString b, int fromIndexB, int toIndexB, byte[] mask, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNodeA, @Cached ToIndexableNode toIndexableNodeB, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeBNode, @Cached TStringInternalNodes.LastIndexOfStringRawNode indexOfStringNode) { final int codeRangeA = getCodeRangeANode.execute(a); final int codeRangeB = getCodeRangeBNode.execute(b); a.looseCheckEncoding(expectedEncoding, codeRangeA); b.looseCheckEncoding(expectedEncoding, codeRangeB); if (mask != null && isUnsupportedEncoding(expectedEncoding) && !isFixedWidth(codeRangeA)) { throw InternalErrors.unsupportedOperation(); } if (b.isEmpty()) { return fromIndexB; } if (a.isEmpty()) { return -1; } final int fromIndex = rawIndex(fromIndexB, expectedEncoding); final int toIndex = rawIndex(toIndexB, expectedEncoding); a.boundsCheckRaw(toIndex, fromIndex); Object arrayA = toIndexableNodeA.execute(a, a.data()); Object arrayB = toIndexableNodeB.execute(b, b.data()); if (indexOfCannotMatch(codeRangeA, b, codeRangeB, mask, fromIndex - toIndex)) { return -1; } return byteIndex(indexOfStringNode.execute(a, arrayA, codeRangeA, b, arrayB, codeRangeB, fromIndex, toIndex, mask, expectedEncoding), expectedEncoding); } /** * Create a new {@link LastByteIndexOfStringNode}. * * @since 22.1 */ public static LastByteIndexOfStringNode create() { return TruffleStringFactory.LastByteIndexOfStringNodeGen.create(); } /** * Get the uncached version of {@link LastByteIndexOfStringNode}. * * @since 22.1 */ public static LastByteIndexOfStringNode getUncached() { return TruffleStringFactory.LastByteIndexOfStringNodeGen.getUncached(); } } /** * Node to compare two strings byte-by-byte. See * {@link #execute(AbstractTruffleString, AbstractTruffleString, TruffleString.Encoding)} for * details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CompareBytesNode extends Node { CompareBytesNode() { } /** * Compare strings {@code a} and {@code b} byte-by-byte. Returns zero if {@code a} and * {@code b} are equal. If {@code a} is equal to {@code b} up to its length, but {@code b} * is longer than {@code a}, a negative value is returned. In the inverse case, a positive * value is returned. Otherwise, elements {@code a[i]} and {@code b[i]} at a byte index * {@code i} are different. If {@code a[i]} is greater than {@code b[i]}, a positive value * is returned, otherwise a negative value is returned. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, AbstractTruffleString b, Encoding expectedEncoding); @Specialization int compare(AbstractTruffleString a, AbstractTruffleString b, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNodeA, @Cached ToIndexableNode toIndexableNodeB, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeBNode) { nullCheck(expectedEncoding); final int codeRangeA = getCodeRangeANode.execute(a); final int codeRangeB = getCodeRangeBNode.execute(b); a.looseCheckEncoding(expectedEncoding, codeRangeA); b.looseCheckEncoding(expectedEncoding, codeRangeB); Object aData = toIndexableNodeA.execute(a, a.data()); Object bData = toIndexableNodeB.execute(b, b.data()); if (aData instanceof byte[] && bData instanceof byte[] && (a.stride() | b.stride()) == 0 && a.length() != 0 && b.length() != 0) { int cmp = Byte.compareUnsigned(((byte[]) aData)[a.offset()], ((byte[]) bData)[b.offset()]); if (cmp != 0) { return cmp; } } if (a == b) { return 0; } return TStringOpsNodes.memcmpBytes(this, a, aData, b, bData); } /** * Create a new {@link CompareBytesNode}. * * @since 22.1 */ public static CompareBytesNode create() { return TruffleStringFactory.CompareBytesNodeGen.create(); } /** * Get the uncached version of {@link CompareBytesNode}. * * @since 22.1 */ public static CompareBytesNode getUncached() { return TruffleStringFactory.CompareBytesNodeGen.getUncached(); } } /** * Node to compare two UTF-16 strings. See * {@link #execute(AbstractTruffleString, AbstractTruffleString)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CompareCharsUTF16Node extends Node { CompareCharsUTF16Node() { } /** * Compare UTF-16 strings {@code a} and {@code b} char-by-char. Returns zero if {@code a} * and {@code b} are equal. If {@code a} is equal to {@code b} up to its length, but * {@code b} is longer than {@code a}, a negative value is returned. In the inverse case, a * positive value is returned. Otherwise, elements {@code a[i]} and {@code b[i]} at an index * {@code i} are different. If {@code a[i]} is greater than {@code b[i]}, a positive value * is returned, otherwise a negative value is returned. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, AbstractTruffleString b); @Specialization int compare(AbstractTruffleString a, AbstractTruffleString b, @Cached ToIndexableNode toIndexableNodeA, @Cached ToIndexableNode toIndexableNodeB, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeBNode) { final int codeRangeA = getCodeRangeANode.execute(a); final int codeRangeB = getCodeRangeBNode.execute(b); a.looseCheckEncoding(Encoding.UTF_16, codeRangeA); b.looseCheckEncoding(Encoding.UTF_16, codeRangeB); Object aData = toIndexableNodeA.execute(a, a.data()); Object bData = toIndexableNodeB.execute(b, b.data()); if (aData instanceof byte[] && bData instanceof byte[] && (a.stride() | b.stride()) == 0 && a.length() != 0 && b.length() != 0) { int cmp = Byte.compareUnsigned(((byte[]) aData)[a.offset()], ((byte[]) bData)[b.offset()]); if (cmp != 0) { return cmp; } } if (a == b) { return 0; } return TStringOpsNodes.memcmp(this, a, aData, b, bData); } /** * Create a new {@link CompareCharsUTF16Node}. * * @since 22.1 */ public static CompareCharsUTF16Node create() { return TruffleStringFactory.CompareCharsUTF16NodeGen.create(); } /** * Get the uncached version of {@link CompareCharsUTF16Node}. * * @since 22.1 */ public static CompareCharsUTF16Node getUncached() { return TruffleStringFactory.CompareCharsUTF16NodeGen.getUncached(); } } /** * Node to compare two UTF-32 strings. See * {@link #execute(AbstractTruffleString, AbstractTruffleString)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CompareIntsUTF32Node extends Node { CompareIntsUTF32Node() { } /** * Compare UTF-32 strings {@code a} and {@code b} int-by-int. Returns zero if {@code a} and * {@code b} are equal. If {@code a} is equal to {@code b} up to its length, but {@code b} * is longer than {@code a}, a negative value is returned. In the inverse case, a positive * value is returned. Otherwise, elements {@code a[i]} and {@code b[i]} at an index * {@code i} are different. If {@code a[i]} is greater than {@code b[i]}, a positive value * is returned, otherwise a negative value is returned. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, AbstractTruffleString b); @Specialization int compare(AbstractTruffleString a, AbstractTruffleString b, @Cached ToIndexableNode toIndexableNodeA, @Cached ToIndexableNode toIndexableNodeB, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeBNode) { final int codeRangeA = getCodeRangeANode.execute(a); final int codeRangeB = getCodeRangeBNode.execute(b); a.looseCheckEncoding(Encoding.UTF_32, codeRangeA); b.looseCheckEncoding(Encoding.UTF_32, codeRangeB); Object aData = toIndexableNodeA.execute(a, a.data()); Object bData = toIndexableNodeB.execute(b, b.data()); if (aData instanceof byte[] && bData instanceof byte[] && (a.stride() | b.stride()) == 0 && a.length() != 0 && b.length() != 0) { int cmp = Byte.compareUnsigned(((byte[]) aData)[a.offset()], ((byte[]) bData)[b.offset()]); if (cmp != 0) { return cmp; } } if (a == b) { return 0; } return TStringOpsNodes.memcmp(this, a, aData, b, bData); } /** * Create a new {@link CompareIntsUTF32Node}. * * @since 22.1 */ public static CompareIntsUTF32Node create() { return TruffleStringFactory.CompareIntsUTF32NodeGen.create(); } /** * Get the uncached version of {@link CompareIntsUTF32Node}. * * @since 22.1 */ public static CompareIntsUTF32Node getUncached() { return TruffleStringFactory.CompareIntsUTF32NodeGen.getUncached(); } } /** * Node to check codepoint equality of two string regions. See * {@link #execute(AbstractTruffleString, int, AbstractTruffleString, int, int, TruffleString.Encoding)}. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class RegionEqualNode extends Node { RegionEqualNode() { } /** * Checks for codepoint equality in a region with the given codepoint index and codepoint * length. *

* Equivalent to: * *

         * for (int i = 0; i < length; i++) {
         *     if (codePointAt(a, fromIndexA + i) != codePointAt(b, fromIndexB + i)) {
         *         return false;
         *     }
         * }
         * return true;
         * 
* * @since 22.1 */ public abstract boolean execute(AbstractTruffleString a, int fromIndexA, AbstractTruffleString b, int fromIndexB, int length, Encoding expectedEncoding); @Specialization static boolean regionEquals(AbstractTruffleString a, int fromIndexA, AbstractTruffleString b, int fromIndexB, int length, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNodeA, @Cached ToIndexableNode toIndexableNodeB, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthANode, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthBNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeBNode, @Cached TStringInternalNodes.RegionEqualsNode regionEqualsNode) { if (length == 0) { return true; } final int codeRangeA = getCodeRangeANode.execute(a); final int codeRangeB = getCodeRangeBNode.execute(b); a.looseCheckEncoding(expectedEncoding, codeRangeA); b.looseCheckEncoding(expectedEncoding, codeRangeB); a.boundsCheckRegion(fromIndexA, length, getCodePointLengthANode); b.boundsCheckRegion(fromIndexB, length, getCodePointLengthBNode); Object arrayA = toIndexableNodeA.execute(a, a.data()); Object arrayB = toIndexableNodeB.execute(b, b.data()); return regionEqualsNode.execute(a, arrayA, codeRangeA, fromIndexA, b, arrayB, codeRangeB, fromIndexB, length, expectedEncoding); } /** * Create a new {@link RegionEqualNode}. * * @since 22.1 */ public static RegionEqualNode create() { return TruffleStringFactory.RegionEqualNodeGen.create(); } /** * Get the uncached version of {@link RegionEqualNode}. * * @since 22.1 */ public static RegionEqualNode getUncached() { return TruffleStringFactory.RegionEqualNodeGen.getUncached(); } } /** * Node to check for a region match, byte-by-byte. See * {@link #execute(AbstractTruffleString, int, AbstractTruffleString, int, int, TruffleString.Encoding)} * and * {@link #execute(AbstractTruffleString, int, TruffleString.WithMask, int, int, TruffleString.Encoding)} * for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class RegionEqualByteIndexNode extends Node { RegionEqualByteIndexNode() { } /** * Checks for a region match, byte-by-byte. * * @since 22.1 */ public final boolean execute(AbstractTruffleString a, int fromByteIndexA, AbstractTruffleString b, int fromByteIndexB, int length, Encoding expectedEncoding) { return execute(a, fromByteIndexA, b, fromByteIndexB, length, null, expectedEncoding); } /** * Checks for a region match, byte-by-byte. This variant accepts a * {@link TruffleString.WithMask} as the search value {@code b}, which changes the matching * algorithm in the following manner: when the contents of {@code a} and {@code b} are * compared, the mask is OR'ed to {@code a}, as shown in this exemplary method: * *
         * {@code
         * boolean bytesEqualAt(TruffleString a, int byteIndexA, TruffleString.WithMask b, int byteIndexB) {
         *     return (readByte(a, byteIndexA) | readByte(b.mask, byteIndexB)) == readByte(b, byteIndexB);
         * }
         * }
         * 
* * @since 22.1 */ public final boolean execute(AbstractTruffleString a, int fromByteIndexA, WithMask b, int fromByteIndexB, int length, Encoding expectedEncoding) { return execute(a, fromByteIndexA, b.string, fromByteIndexB, length, b.mask, expectedEncoding); } abstract boolean execute(AbstractTruffleString a, int fromIndexA, AbstractTruffleString b, int fromIndexB, int length, byte[] mask, Encoding expectedEncoding); @Specialization boolean regionEquals(AbstractTruffleString a, int byteFromIndexA, AbstractTruffleString b, int byteFromIndexB, int byteLength, byte[] mask, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNodeA, @Cached ToIndexableNode toIndexableNodeB, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeBNode) { if (byteLength == 0) { return true; } final int codeRangeA = getCodeRangeANode.execute(a); final int codeRangeB = getCodeRangeBNode.execute(b); a.looseCheckEncoding(expectedEncoding, codeRangeA); b.looseCheckEncoding(expectedEncoding, codeRangeB); final int fromIndexA = rawIndex(byteFromIndexA, expectedEncoding); final int fromIndexB = rawIndex(byteFromIndexB, expectedEncoding); final int length = rawIndex(byteLength, expectedEncoding); a.boundsCheckRegionRaw(fromIndexA, length); b.boundsCheckRegionRaw(fromIndexB, length); Object arrayA = toIndexableNodeA.execute(a, a.data()); Object arrayB = toIndexableNodeB.execute(b, b.data()); return TStringOps.regionEqualsWithOrMaskWithStride(this, a, arrayA, a.stride(), fromIndexA, b, arrayB, b.stride(), fromIndexB, mask, length); } /** * Create a new {@link RegionEqualByteIndexNode}. * * @since 22.1 */ public static RegionEqualByteIndexNode create() { return TruffleStringFactory.RegionEqualByteIndexNodeGen.create(); } /** * Get the uncached version of {@link RegionEqualByteIndexNode}. * * @since 22.1 */ public static RegionEqualByteIndexNode getUncached() { return TruffleStringFactory.RegionEqualByteIndexNodeGen.getUncached(); } } /** * Node to concatenate two strings. See * {@link #execute(AbstractTruffleString, AbstractTruffleString, TruffleString.Encoding, boolean)} * for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class ConcatNode extends Node { ConcatNode() { } /** * Create a new string by concatenating {@code a} and {@code b}. * * @param lazy if {@code true}, the creation of the new string's internal array may be * delayed until it is required by another operation. This parameter is expected * to be {@link CompilerAsserts#partialEvaluationConstant(boolean) partial * evaluation constant}. * @since 22.1 */ public abstract TruffleString execute(AbstractTruffleString a, AbstractTruffleString b, Encoding expectedEncoding, boolean lazy); @SuppressWarnings("unused") @Specialization(guards = "isEmpty(a)") static TruffleString aEmpty(AbstractTruffleString a, TruffleString b, Encoding expectedEncoding, boolean lazy) { CompilerAsserts.partialEvaluationConstant(lazy); if (AbstractTruffleString.DEBUG_STRICT_ENCODING_CHECKS) { b.looseCheckEncoding(expectedEncoding, b.codeRange()); return b.switchEncodingUncached(expectedEncoding); } b.checkEncoding(expectedEncoding); return b; } @SuppressWarnings("unused") @Specialization(guards = "isEmpty(a)") static TruffleString aEmptyMutable(AbstractTruffleString a, MutableTruffleString b, Encoding expectedEncoding, boolean lazy, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.FromBufferWithStringCompactionKnownAttributesNode fromBufferWithStringCompactionNode) { CompilerAsserts.partialEvaluationConstant(lazy); if (AbstractTruffleString.DEBUG_STRICT_ENCODING_CHECKS) { b.looseCheckEncoding(expectedEncoding, TStringInternalNodes.GetCodeRangeNode.getUncached().execute(b)); return b.switchEncodingUncached(expectedEncoding); } int codeRange = getCodeRangeNode.execute(b); b.looseCheckEncoding(expectedEncoding, codeRange); return fromBufferWithStringCompactionNode.execute(b.data(), b.offset(), b.length() << b.stride(), expectedEncoding, getCodePointLengthNode.execute(b), codeRange); } @SuppressWarnings("unused") @Specialization(guards = "isEmpty(b)") static TruffleString bEmpty(TruffleString a, AbstractTruffleString b, Encoding expectedEncoding, boolean lazy) { CompilerAsserts.partialEvaluationConstant(lazy); if (AbstractTruffleString.DEBUG_STRICT_ENCODING_CHECKS) { a.looseCheckEncoding(expectedEncoding, a.codeRange()); return a.switchEncodingUncached(expectedEncoding); } a.checkEncoding(expectedEncoding); return a; } @SuppressWarnings("unused") @Specialization(guards = "isEmpty(b)") static TruffleString bEmptyMutable(MutableTruffleString a, AbstractTruffleString b, Encoding expectedEncoding, boolean lazy, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.FromBufferWithStringCompactionKnownAttributesNode fromBufferWithStringCompactionNode) { CompilerAsserts.partialEvaluationConstant(lazy); if (AbstractTruffleString.DEBUG_STRICT_ENCODING_CHECKS) { a.looseCheckEncoding(expectedEncoding, TStringInternalNodes.GetCodeRangeNode.getUncached().execute(a)); return a.switchEncodingUncached(expectedEncoding); } int codeRange = getCodeRangeNode.execute(a); a.looseCheckEncoding(expectedEncoding, codeRange); return fromBufferWithStringCompactionNode.execute(a.data(), a.offset(), a.length() << a.stride(), expectedEncoding, getCodePointLengthNode.execute(a), codeRange); } @Specialization(guards = {"!isEmpty(a)", "!isEmpty(b)"}) static TruffleString doConcat(AbstractTruffleString a, AbstractTruffleString b, Encoding encoding, boolean lazy, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeBNode, @Cached TStringInternalNodes.StrideFromCodeRangeNode getStrideNode, @Cached TStringInternalNodes.ConcatEagerNode concatEagerNode, @Cached AsTruffleStringNode asTruffleStringANode, @Cached AsTruffleStringNode asTruffleStringBNode, @Cached BranchProfile outOfMemoryProfile, @Cached ConditionProfile lazyProfile) { CompilerAsserts.partialEvaluationConstant(lazy); final int codeRangeA = getCodeRangeANode.execute(a); final int codeRangeB = getCodeRangeBNode.execute(b); a.looseCheckEncoding(encoding, codeRangeA); b.looseCheckEncoding(encoding, codeRangeB); int commonCodeRange = TSCodeRange.commonCodeRange(codeRangeA, codeRangeB); assert !(isBrokenMultiByte(codeRangeA) || isBrokenMultiByte(codeRangeB)) || isBrokenMultiByte(commonCodeRange); int targetStride = getStrideNode.execute(commonCodeRange, encoding); int length = addByteLengths(a, b, targetStride, outOfMemoryProfile); boolean valid = !isBrokenMultiByte(commonCodeRange); if (lazyProfile.profile(lazy && valid && (a.isImmutable() || b.isImmutable()) && (length << targetStride) >= TStringConstants.LAZY_CONCAT_MIN_LENGTH)) { if (AbstractTruffleString.DEBUG_STRICT_ENCODING_CHECKS) { return TruffleString.createLazyConcat(asTruffleStringLoose(a, encoding), asTruffleStringLoose(b, encoding), encoding, length, targetStride); } else { return TruffleString.createLazyConcat(asTruffleStringANode.execute(a, encoding), asTruffleStringBNode.execute(b, encoding), encoding, length, targetStride); } } return concatEagerNode.execute(a, b, encoding, length, targetStride, commonCodeRange); } static int addByteLengths(AbstractTruffleString a, AbstractTruffleString b, int targetStride, BranchProfile outOfMemoryProfile) { long length = (long) a.length() + (long) b.length(); if (length << targetStride > TStringConstants.MAX_ARRAY_SIZE) { outOfMemoryProfile.enter(); throw InternalErrors.outOfMemory(); } return (int) length; } private static TruffleString asTruffleStringLoose(AbstractTruffleString a, Encoding encoding) { if (a.isImmutable()) { return (TruffleString) a; } return TStringInternalNodes.FromBufferWithStringCompactionKnownAttributesNode.getUncached().execute( a.data(), a.offset(), a.length() << a.stride(), encoding, TStringInternalNodes.GetCodePointLengthNode.getUncached().execute(a), TStringInternalNodes.GetCodeRangeNode.getUncached().execute(a)); } /** * Create a new {@link ConcatNode}. * * @since 22.1 */ public static ConcatNode create() { return TruffleStringFactory.ConcatNodeGen.create(); } /** * Get the uncached version of {@link ConcatNode}. * * @since 22.1 */ public static ConcatNode getUncached() { return TruffleStringFactory.ConcatNodeGen.getUncached(); } } /** * Node to repeat a given string {@code N} times. See * {@link #execute(AbstractTruffleString, int, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class RepeatNode extends Node { RepeatNode() { } /** * Create a new string by repeating {@code n} times string {@code a}. * * @since 22.1 */ public abstract TruffleString execute(AbstractTruffleString a, int n, Encoding expectedEncoding); @Specialization TruffleString repeat(AbstractTruffleString a, int n, Encoding expectedEncoding, @Cached AsTruffleStringNode asTruffleStringNode, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.CalcStringAttributesNode calcStringAttributesNode, @Cached ConditionProfile brokenProfile, @Cached BranchProfile outOfMemoryProfile) { a.checkEncoding(expectedEncoding); if (n < 0) { throw InternalErrors.illegalArgument("n must be positive"); } if (a.isEmpty() || n == 0) { return expectedEncoding.getEmpty(); } if (n == 1) { return asTruffleStringNode.execute(a, expectedEncoding); } Object arrayA = toIndexableNode.execute(a, a.data()); int codeRangeA = getCodeRangeNode.execute(a); int codePointLengthA = getCodePointLengthNode.execute(a); int byteLengthA = (a.length()) << a.stride(); long byteLength = ((long) byteLengthA) * n; if (Long.compareUnsigned(byteLength, TStringConstants.MAX_ARRAY_SIZE) > 0) { outOfMemoryProfile.enter(); throw InternalErrors.outOfMemory(); } byte[] array = new byte[(int) byteLength]; int offsetB = 0; for (int i = 0; i < n; i++) { TStringOps.arraycopyWithStride(this, arrayA, a.offset(), 0, 0, array, offsetB, 0, 0, byteLengthA); offsetB += byteLengthA; TStringConstants.truffleSafePointPoll(this, i + 1); } int length = (int) (byteLength >> a.stride()); if (brokenProfile.profile(isBrokenFixedWidth(codeRangeA) || isBrokenMultiByte(codeRangeA))) { long attrs = calcStringAttributesNode.execute(null, array, 0, length, a.stride(), expectedEncoding, TSCodeRange.getUnknown()); codeRangeA = StringAttributes.getCodeRange(attrs); codePointLengthA = StringAttributes.getCodePointLength(attrs); } else { codePointLengthA *= n; } return createFromByteArray(array, length, a.stride(), expectedEncoding, codePointLengthA, codeRangeA); } /** * Create a new {@link RepeatNode}. * * @since 22.1 */ public static RepeatNode create() { return TruffleStringFactory.RepeatNodeGen.create(); } /** * Get the uncached version of {@link RepeatNode}. * * @since 22.1 */ public static RepeatNode getUncached() { return TruffleStringFactory.RepeatNodeGen.getUncached(); } } /** * Node to create a substring of a given string. See * {@link #execute(AbstractTruffleString, int, int, TruffleString.Encoding, boolean)} for * details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class SubstringNode extends Node { SubstringNode() { } /** * Create a substring of {@code a}, starting from codepoint index {@code fromIndex}, with * codepoint length {@code length}. If {@code lazy} is {@code true}, {@code a}'s internal * storage will be re-used instead of creating a copy of the requested range. Since the * resulting string will have a reference to {@code a}'s internal storage, and * {@link TruffleString} currently does not resize/trim the substring's internal * storage at any point, the {@code lazy} variant effectively creates a memory leak! The * caller is responsible for deciding whether this is acceptable or not. * * @since 22.1 */ public abstract TruffleString execute(AbstractTruffleString a, int fromIndex, int length, Encoding expectedEncoding, boolean lazy); @Specialization static TruffleString substring(AbstractTruffleString a, int fromIndex, int length, Encoding expectedEncoding, boolean lazy, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.CodePointIndexToRawNode translateIndexNode, @Cached TStringInternalNodes.SubstringNode substringNode) { a.checkEncoding(expectedEncoding); a.boundsCheckRegion(fromIndex, length, getCodePointLengthNode); if (length == 0) { return expectedEncoding.getEmpty(); } Object arrayA = toIndexableNode.execute(a, a.data()); final int codeRangeA = getCodeRangeANode.execute(a); int fromIndexRaw = translateIndexNode.execute(a, arrayA, codeRangeA, expectedEncoding, 0, fromIndex, false); int lengthRaw = translateIndexNode.execute(a, arrayA, codeRangeA, expectedEncoding, fromIndexRaw, length, true); return substringNode.execute(a, arrayA, codeRangeA, expectedEncoding, fromIndexRaw, lengthRaw, lazy && a.isImmutable()); } /** * Create a new {@link SubstringNode}. * * @since 22.1 */ public static SubstringNode create() { return TruffleStringFactory.SubstringNodeGen.create(); } /** * Get the uncached version of {@link SubstringNode}. * * @since 22.1 */ public static SubstringNode getUncached() { return TruffleStringFactory.SubstringNodeGen.getUncached(); } } /** * {@link SubstringNode}, but with byte indices. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class SubstringByteIndexNode extends Node { SubstringByteIndexNode() { } /** * {@link SubstringNode}, but with byte indices. * * @since 22.1 */ public abstract TruffleString execute(AbstractTruffleString a, int fromByteIndex, int byteLength, Encoding expectedEncoding, boolean lazy); static boolean isSame(int v0, int v1) { return v0 == v1; } @Specialization(guards = "isSame(byteLength, 0)") static TruffleString substringEmpty(AbstractTruffleString a, int fromByteIndex, @SuppressWarnings("unused") int byteLength, Encoding expectedEncoding, @SuppressWarnings("unused") boolean lazy) { a.checkEncoding(expectedEncoding); final int fromIndex = rawIndex(fromByteIndex, expectedEncoding); a.boundsCheckRegionRaw(fromIndex, 0); return expectedEncoding.getEmpty(); } @Specialization(guards = "byteLength != 0") static TruffleString substringRaw(AbstractTruffleString a, int fromByteIndex, int byteLength, Encoding expectedEncoding, boolean lazy, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.SubstringNode substringNode) { a.checkEncoding(expectedEncoding); final int codeRangeA = getCodeRangeANode.execute(a); final int fromIndex = rawIndex(fromByteIndex, expectedEncoding); final int length = rawIndex(byteLength, expectedEncoding); a.boundsCheckRegionRaw(fromIndex, length); return substringNode.execute(a, toIndexableNode.execute(a, a.data()), codeRangeA, expectedEncoding, fromIndex, length, lazy && a.isImmutable()); } /** * Create a new {@link SubstringByteIndexNode}. * * @since 22.1 */ public static SubstringByteIndexNode create() { return TruffleStringFactory.SubstringByteIndexNodeGen.create(); } /** * Get the uncached version of {@link SubstringByteIndexNode}. * * @since 22.1 */ public static SubstringByteIndexNode getUncached() { return TruffleStringFactory.SubstringByteIndexNodeGen.getUncached(); } } /** * Node to check two strings for equality. *

* The {@link TruffleString#equals(Object)}-method delegates to this node. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class EqualNode extends Node { EqualNode() { } /** * Returns {@code true} if {@code a} and {@code b} are byte-by-byte equal when considered in * {@code expectedEncoding}. Note that this method requires both strings to be * {@link #isCompatibleTo(TruffleString.Encoding) compatible} to the * {@code expectedEncoding}, just like all other operations with an {@code expectedEncoding} * parameter! *

* The {@link TruffleString#equals(Object)}-method delegates to this method. * * @since 22.1 */ public abstract boolean execute(AbstractTruffleString a, AbstractTruffleString b, Encoding expectedEncoding); @SuppressWarnings("unused") @Specialization(guards = "identical(a, b)") static boolean sameObject(AbstractTruffleString a, AbstractTruffleString b, Encoding expectedEncoding) { return true; } @Specialization(guards = "!identical(a, b)") boolean check(AbstractTruffleString a, AbstractTruffleString b, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNodeA, @Cached ToIndexableNode toIndexableNodeB, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeBNode, @Cached ConditionProfile lengthAndCodeRangeCheckProfile, @Cached BranchProfile compareHashProfile, @Cached ConditionProfile checkFirstByteProfile) { final int codeRangeA = getCodeRangeANode.execute(a); final int codeRangeB = getCodeRangeBNode.execute(b); a.looseCheckEncoding(expectedEncoding, codeRangeA); b.looseCheckEncoding(expectedEncoding, codeRangeB); return checkContentEquals(a, codeRangeA, b, codeRangeB, toIndexableNodeA, toIndexableNodeB, lengthAndCodeRangeCheckProfile, compareHashProfile, checkFirstByteProfile, this); } static boolean checkContentEquals( AbstractTruffleString a, int codeRangeA, AbstractTruffleString b, int codeRangeB, ToIndexableNode toIndexableNodeA, ToIndexableNode toIndexableNodeB, ConditionProfile lengthAndCodeRangeCheckProfile, BranchProfile compareHashProfile, ConditionProfile checkFirstByteProfile, EqualNode equalNode) { assert TSCodeRange.isKnown(codeRangeA, codeRangeB); int lengthCMP = a.length(); if (lengthAndCodeRangeCheckProfile.profile(lengthCMP != b.length() || codeRangeA != codeRangeB)) { return false; } if (a.isHashCodeCalculated() && b.isHashCodeCalculated()) { compareHashProfile.enter(); if (a.getHashCodeUnsafe() != b.getHashCodeUnsafe()) { return false; } } if (lengthCMP == 0) { return true; } Object arrayA = toIndexableNodeA.execute(a, a.data()); Object arrayB = toIndexableNodeB.execute(b, b.data()); int strideA = a.stride(); int strideB = b.stride(); if (checkFirstByteProfile.profile(arrayA instanceof byte[] && arrayB instanceof byte[] && (strideA | strideB) == 0)) { // fast path: check first byte if (((byte[]) arrayA)[a.offset()] != ((byte[]) arrayB)[b.offset()]) { return false; } else if (lengthCMP == 1) { return true; } } return TStringOps.regionEqualsWithOrMaskWithStride(equalNode, a, arrayA, strideA, 0, b, arrayB, strideB, 0, null, lengthCMP); } /** * Create a new {@link EqualNode}. * * @since 22.1 */ public static EqualNode create() { return TruffleStringFactory.EqualNodeGen.create(); } /** * Get the uncached version of {@link EqualNode}. * * @since 22.1 */ public static EqualNode getUncached() { return TruffleStringFactory.EqualNodeGen.getUncached(); } } /** * This exception may be thrown by {@link ParseIntNode}, {@link ParseLongNode} or * {@link ParseDoubleNode} to indicate that the given string cannot be parsed as an integer, * long or double value. This exception does not record stack traces for performance reasons. * * @since 22.1 */ @SuppressWarnings("serial") public static final class NumberFormatException extends Exception { private static final long serialVersionUID = 0x016db657faff57a2L; /** * All {@link NumberFormatException}s contain one of the following exception reason values, * which may be used to build custom error messages in language implementations. * * @since 22.3 */ enum Reason { /** * The string was empty, or contained no digits. */ EMPTY("no digits found"), /** * An invalid codepoint was encountered during parsing. */ INVALID_CODEPOINT("invalid codepoint"), /** * A '+' or '-' sign without any subsequent digits was encountered. */ LONE_SIGN("lone '+' or '-'"), /** * The parsed number was too large to fit in an {@code int}/{@code long}. */ OVERFLOW("overflow"), /** * Invalid codepoints encountered when parsing a hex number. */ MALFORMED_HEX_ESCAPE("malformed hex escape sequence"), /** * Multiple decimal points encountered. */ MULTIPLE_DECIMAL_POINTS("multiple decimal points"), /** * The given radix is unsupported. */ UNSUPPORTED_RADIX("unsupported radix"); private final String message; Reason(String message) { this.message = message; } /** * Returns a short error description. * * @since 22.3 */ public String getMessage() { return message; } } private final AbstractTruffleString string; private final int regionOffset; private final int regionLength; private final Reason reason; NumberFormatException(AbstractTruffleString string, Reason reason) { this(string, -1, -1, reason); } NumberFormatException(AbstractTruffleString string, int regionOffset, int regionLength, Reason reason) { super(); this.string = string; this.regionOffset = regionOffset; this.regionLength = regionLength; this.reason = reason; } /** * Returns the {@link Reason} for this exception. Use this to build custom error messages. */ Reason getReason() { return reason; } /** * Returns the string that was attempted to parse. */ AbstractTruffleString getString() { return string; } /** * Returns the byte offset to error region, or -1 if not applicable. */ int getRegionByteOffset() { return regionOffset < 0 ? regionOffset : regionOffset << string.stride(); } /** * Returns the error region's length in bytes, or -1 if not applicable. */ int getRegionByteLength() { return regionLength < 0 ? regionLength : regionLength << string.stride(); } /** * Returns a detailed error message. Not designed to be used on fast paths. * * @since 22.3 */ @TruffleBoundary @Override public String getMessage() { StringBuilder sb = new StringBuilder(); sb.append("error parsing \"").append(getString()).append("\": "); sb.append(getReason().message); if (regionOffset >= 0) { if (regionLength == 1) { sb.append(" at byte index ").append(getRegionByteOffset()); } else { sb.append(" from byte index ").append(getRegionByteOffset()).append(" to ").append(getRegionByteOffset() + getRegionByteLength()); } } return sb.toString(); } /** * No stack trace for this exception. * * @since 22.1 */ @SuppressWarnings("sync-override") @Override public Throwable fillInStackTrace() { return this; } } /** * This exception is thrown when any operation tries to create a {@link Encoding#UTF_16 UTF-16} * or {@link Encoding#UTF_32 UTF-32} string with an invalid byte length (not a multiple of 2/4 * bytes). * * @since 22.3 */ public static final class IllegalByteArrayLengthException extends IllegalArgumentException { private static final long serialVersionUID = 0x27d918e593fcf85aL; IllegalByteArrayLengthException(String msg) { super(msg); } } /** * Node to parse a given string as an int value. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class ParseIntNode extends Node { ParseIntNode() { } /** * Parse the given string as an int value, or throw {@link NumberFormatException}. * * @since 22.1 */ public abstract int execute(AbstractTruffleString a, int radix) throws NumberFormatException; @Specialization(guards = {"a.isLazyLong()", "radix == 10"}) static int doLazyLong(AbstractTruffleString a, @SuppressWarnings("unused") int radix, @Cached BranchProfile errorProfile) throws NumberFormatException { long value = ((LazyLong) a.data()).value; if (value < Integer.MIN_VALUE || value > Integer.MAX_VALUE) { errorProfile.enter(); throw NumberConversion.numberFormatException(a, NumberFormatException.Reason.OVERFLOW); } return (int) value; } @Specialization(guards = {"!a.isLazyLong() || radix != 10"}) static int doParse(AbstractTruffleString a, int radix, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.ParseIntNode parseIntNode, @Cached("createIdentityProfile()") IntValueProfile radixProfile) throws NumberFormatException { final int codeRangeA = getCodeRangeANode.execute(a); return parseIntNode.execute(a, toIndexableNode.execute(a, a.data()), codeRangeA, Encoding.get(a.encoding()), radixProfile.profile(radix)); } /** * Create a new {@link ParseIntNode}. * * @since 22.1 */ public static ParseIntNode create() { return TruffleStringFactory.ParseIntNodeGen.create(); } /** * Get the uncached version of {@link ParseIntNode}. * * @since 22.1 */ public static ParseIntNode getUncached() { return TruffleStringFactory.ParseIntNodeGen.getUncached(); } } /** * Node to parse a given string as a long value. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class ParseLongNode extends Node { ParseLongNode() { } /** * Parse the given string as a long value, or throw {@link NumberFormatException}. * * @since 22.1 */ public abstract long execute(AbstractTruffleString a, int radix) throws TruffleString.NumberFormatException; @Specialization(guards = {"a.isLazyLong()", "radix == 10"}) static long doLazyLong(AbstractTruffleString a, @SuppressWarnings("unused") int radix) { return ((LazyLong) a.data()).value; } @Specialization(guards = {"!a.isLazyLong() || radix != 10"}) static long doParse(AbstractTruffleString a, int radix, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode, @Cached TStringInternalNodes.ParseLongNode parseLongNode, @Cached("createIdentityProfile()") IntValueProfile radixProfile) throws NumberFormatException { final int codeRangeA = getCodeRangeANode.execute(a); return parseLongNode.execute(a, toIndexableNode.execute(a, a.data()), codeRangeA, Encoding.get(a.encoding()), radixProfile.profile(radix)); } /** * Create a new {@link ParseLongNode}. * * @since 22.1 */ public static ParseLongNode create() { return TruffleStringFactory.ParseLongNodeGen.create(); } /** * Get the uncached version of {@link ParseLongNode}. * * @since 22.1 */ public static ParseLongNode getUncached() { return TruffleStringFactory.ParseLongNodeGen.getUncached(); } } /** * Node to parse a given string as a double value. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class ParseDoubleNode extends Node { ParseDoubleNode() { } /** * Parse the given string as a double value, or throw {@link NumberFormatException}. * * @since 22.1 */ public abstract double execute(AbstractTruffleString a) throws NumberFormatException; @Specialization(guards = "isLazyLongSafeInteger(a)") static double doLazyLong(AbstractTruffleString a) { return ((LazyLong) a.data()).value; } @Specialization(guards = "!isLazyLongSafeInteger(a)") static double parseDouble(AbstractTruffleString a, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.ParseDoubleNode parseDoubleNode) throws NumberFormatException { return parseDoubleNode.execute(a, toIndexableNode.execute(a, a.data())); } static boolean isLazyLongSafeInteger(AbstractTruffleString a) { return a.isLazyLong() && NumberConversion.isSafeInteger(((LazyLong) a.data()).value); } /** * Create a new {@link ParseDoubleNode}. * * @since 22.1 */ public static ParseDoubleNode create() { return TruffleStringFactory.ParseDoubleNodeGen.create(); } /** * Get the uncached version of {@link ParseDoubleNode}. * * @since 22.1 */ public static ParseDoubleNode getUncached() { return TruffleStringFactory.ParseDoubleNodeGen.getUncached(); } } /** * Node to get a string's internal byte array. See * {@link #execute(AbstractTruffleString, TruffleString.Encoding)} for details. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class GetInternalByteArrayNode extends Node { GetInternalByteArrayNode() { } /** * Get the given string's internal byte array. The returned byte array must not be modified. * Note that this operation may also return a copy of the string's internal storage, if the * internal format does not match the regular encoded string format; compacted and native * strings will always yield a copy. * * CAUTION: TruffleString re-uses internal byte arrays whenever possible, DO NOT modify the * arrays returned by this operation. Use this operation only when absolutely necessary. * Reading a string's contents should always be done via nodes like {@link ReadByteNode}, * {@link ReadCharUTF16Node}, {@link CodePointAtIndexNode}, {@link CodePointAtByteIndexNode} * etc., if at all possible. If mutability is required, use {@link MutableTruffleString} * instead. * * @since 22.1 */ public abstract InternalByteArray execute(AbstractTruffleString a, Encoding expectedEncoding); @Specialization InternalByteArray getInternalByteArray(AbstractTruffleString a, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNode, @Cached ConditionProfile utf16Profile, @Cached ConditionProfile utf16S0Profile, @Cached ConditionProfile utf32Profile, @Cached ConditionProfile utf32S0Profile, @Cached ConditionProfile utf32S1Profile, @Cached ConditionProfile isByteArrayProfile) { if (a.isEmpty()) { return InternalByteArray.EMPTY; } a.checkEncoding(expectedEncoding); Object arrayA = toIndexableNode.execute(a, a.data()); if (utf16Profile.profile(isUTF16(expectedEncoding))) { if (utf16S0Profile.profile(isStride0(a))) { return inflate(a, arrayA, 0, 1); } } else if (utf32Profile.profile(isUTF32(expectedEncoding))) { if (utf32S0Profile.profile(isStride0(a))) { return inflate(a, arrayA, 0, 2); } if (utf32S1Profile.profile(isStride1(a))) { return inflate(a, arrayA, 1, 2); } } int byteLength = a.length() << a.stride(); if (isByteArrayProfile.profile(arrayA instanceof byte[])) { return new InternalByteArray((byte[]) arrayA, a.offset(), byteLength); } else { return new InternalByteArray(TStringOps.arraycopyOfWithStride(this, arrayA, a.offset(), byteLength, 0, byteLength, 0), 0, byteLength); } } private InternalByteArray inflate(AbstractTruffleString a, Object arrayA, int strideA, int strideB) { assert a.stride() == strideA; CompilerAsserts.partialEvaluationConstant(strideA); CompilerAsserts.partialEvaluationConstant(strideB); return new InternalByteArray(TStringOps.arraycopyOfWithStride(this, arrayA, a.offset(), a.length(), strideA, a.length(), strideB), 0, a.length() << strideB); } /** * Create a new {@link GetInternalByteArrayNode}. * * @since 22.1 */ public static GetInternalByteArrayNode create() { return TruffleStringFactory.GetInternalByteArrayNodeGen.create(); } /** * Get the uncached version of {@link GetInternalByteArrayNode}. * * @since 22.1 */ public static GetInternalByteArrayNode getUncached() { return TruffleStringFactory.GetInternalByteArrayNodeGen.getUncached(); } } /** * Node to get a {@link AbstractTruffleString#isNative() native} string's pointer object. See * {@link #execute(AbstractTruffleString, TruffleString.Encoding)} for details. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class GetInternalNativePointerNode extends Node { GetInternalNativePointerNode() { } /** * Get the given string's pointer object which was passed to {@link FromNativePointerNode}. * If the string is not backed by a native pointer, this node will throw an * {@link UnsupportedOperationException}. Use {@link AbstractTruffleString#isNative()} to * check whether the string is actually backed by a native pointer before calling this node. * Caution: If the given string is a {@link TruffleString}, the native pointer must not be * modified as long as the string is used. * * @since 22.1 */ public abstract Object execute(AbstractTruffleString a, Encoding expectedEncoding); @Specialization static Object getNativePointer(AbstractTruffleString a, Encoding expectedEncoding) { a.checkEncoding(expectedEncoding); if (!a.isNative()) { throw InternalErrors.unsupportedOperation("string is not backed by a native pointer!"); } return ((NativePointer) a.data()).getPointerObject(); } /** * Create a new {@link GetInternalNativePointerNode}. * * @since 22.1 */ public static GetInternalNativePointerNode create() { return TruffleStringFactory.GetInternalNativePointerNodeGen.create(); } /** * Get the uncached version of {@link GetInternalNativePointerNode}. * * @since 22.1 */ public static GetInternalNativePointerNode getUncached() { return TruffleStringFactory.GetInternalNativePointerNodeGen.getUncached(); } } /** * Node to copy a region of a string into a byte array. See * {@link #execute(AbstractTruffleString, int, byte[], int, int, TruffleString.Encoding)} for * details. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class CopyToByteArrayNode extends Node { CopyToByteArrayNode() { } /** * Copy the entire string to a byte[] and return it. * * @since 22.2 */ public final byte[] execute(AbstractTruffleString string, Encoding expectedEncoding) { int byteLength = string.byteLength(expectedEncoding); byte[] copy = new byte[byteLength]; execute(string, 0, copy, 0, byteLength, expectedEncoding); return copy; } /** * Copy a region of the given {@link TruffleString} {@code a}, bounded by * {@code byteFromIndexA} and {@code byteLength} into the given byte array, at starting at * {@code byteFromIndexDst}. * * @since 22.1 */ public abstract void execute(AbstractTruffleString a, int byteFromIndexA, byte[] dst, int byteFromIndexDst, int byteLength, Encoding expectedEncoding); @Specialization void doCopy(AbstractTruffleString a, int byteFromIndexA, byte[] arrayB, int byteFromIndexB, int byteLength, Encoding expectedEncoding, @Cached ToIndexableNode toIndexableNode, @Cached ConditionProfile utf16Profile, @Cached ConditionProfile utf16S0Profile, @Cached ConditionProfile utf32Profile, @Cached ConditionProfile utf32S0Profile, @Cached ConditionProfile utf32S1Profile) { boundsCheckRegionI(byteFromIndexB, byteLength, arrayB.length); doCopyInternal(this, a, byteFromIndexA, arrayB, byteFromIndexB, byteLength, expectedEncoding, toIndexableNode, utf16Profile, utf16S0Profile, utf32Profile, utf32S0Profile, utf32S1Profile); } private static void doCopyInternal(Node location, AbstractTruffleString a, int byteFromIndexA, Object arrayB, int byteFromIndexB, int byteLength, Encoding expectedEncoding, ToIndexableNode toIndexableNode, ConditionProfile utf16Profile, ConditionProfile utf16S0Profile, ConditionProfile utf32Profile, ConditionProfile utf32S0Profile, ConditionProfile utf32S1Profile) { if (byteLength == 0) { return; } a.checkEncoding(expectedEncoding); final int offsetA = a.offset(); final int offsetB = 0; Object arrayA = toIndexableNode.execute(a, a.data()); if (utf16Profile.profile(isUTF16(expectedEncoding))) { a.boundsCheckByteIndexUTF16(byteFromIndexA); checkByteLengthUTF16(byteLength); final int fromIndexA = rawIndex(byteFromIndexA, expectedEncoding); final int fromIndexB = rawIndex(byteFromIndexB, expectedEncoding); final int length = rawIndex(byteLength, expectedEncoding); a.boundsCheckRegionRaw(fromIndexA, length); if (utf16S0Profile.profile(isStride0(a))) { TStringOps.arraycopyWithStride(location, arrayA, offsetA, 0, fromIndexA, arrayB, offsetB, 1, fromIndexB, length); return; } } else if (utf32Profile.profile(isUTF32(expectedEncoding))) { a.boundsCheckByteIndexUTF32(byteFromIndexA); checkByteLengthUTF32(byteLength); final int fromIndexA = rawIndex(byteFromIndexA, expectedEncoding); final int fromIndexB = rawIndex(byteFromIndexB, expectedEncoding); final int length = rawIndex(byteLength, expectedEncoding); a.boundsCheckRegionRaw(fromIndexA, length); if (utf32S0Profile.profile(isStride0(a))) { TStringOps.arraycopyWithStride(location, arrayA, offsetA, 0, fromIndexA, arrayB, offsetB, 2, fromIndexB, length); return; } if (utf32S1Profile.profile(isStride1(a))) { TStringOps.arraycopyWithStride(location, arrayA, offsetA, 1, fromIndexA, arrayB, offsetB, 2, fromIndexB, length); return; } } final int byteLengthA = a.length() << a.stride(); boundsCheckRegionI(byteFromIndexA, byteLength, byteLengthA); TStringOps.arraycopyWithStride(location, arrayA, offsetA, 0, byteFromIndexA, arrayB, offsetB, 0, byteFromIndexB, byteLength); } /** * Create a new {@link CopyToByteArrayNode}. * * @since 22.1 */ public static CopyToByteArrayNode create() { return TruffleStringFactory.CopyToByteArrayNodeGen.create(); } /** * Get the uncached version of {@link CopyToByteArrayNode}. * * @since 22.1 */ public static CopyToByteArrayNode getUncached() { return TruffleStringFactory.CopyToByteArrayNodeGen.getUncached(); } } /** * Node to copy a region of a string into native memory. See * {@link #execute(AbstractTruffleString, int, Object, int, int, TruffleString.Encoding)} for * details. * * @since 22.1 */ @ImportStatic(TStringAccessor.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CopyToNativeMemoryNode extends Node { CopyToNativeMemoryNode() { } /** * Copy a region of the given {@link TruffleString} {@code a}, bounded by * {@code byteFromIndexA} and {@code byteLength} into the given interop object representing * a native pointer ({@code isPointer(pointerObject)} must return {@code true}), starting at * {@code byteFromIndexDst}. *

* This operation requires native access permissions * ({@code TruffleLanguage.Env#isNativeAccessAllowed()}). * * @since 22.1 */ public abstract void execute(AbstractTruffleString a, int byteFromIndexA, Object pointerObject, int byteFromIndexDst, int byteLength, Encoding expectedEncoding); @Specialization void doCopy(AbstractTruffleString a, int byteFromIndexA, Object pointerObject, int byteFromIndexB, int byteLength, Encoding expectedEncoding, @Cached(value = "createInteropLibrary()", uncached = "getUncachedInteropLibrary()") Node interopLibrary, @Cached ToIndexableNode toIndexableNode, @Cached ConditionProfile utf16Profile, @Cached ConditionProfile utf16S0Profile, @Cached ConditionProfile utf32Profile, @Cached ConditionProfile utf32S0Profile, @Cached ConditionProfile utf32S1Profile) { CopyToByteArrayNode.doCopyInternal(this, a, byteFromIndexA, NativePointer.create(this, pointerObject, interopLibrary, byteFromIndexB), byteFromIndexB, byteLength, expectedEncoding, toIndexableNode, utf16Profile, utf16S0Profile, utf32Profile, utf32S0Profile, utf32S1Profile); } /** * Create a new {@link CopyToNativeMemoryNode}. * * @since 22.1 */ public static CopyToNativeMemoryNode create() { return TruffleStringFactory.CopyToNativeMemoryNodeGen.create(); } /** * Get the uncached version of {@link CopyToNativeMemoryNode}. * * @since 22.1 */ public static CopyToNativeMemoryNode getUncached() { return TruffleStringFactory.CopyToNativeMemoryNodeGen.getUncached(); } } /** * Node to get a {@link java.lang.String} representation of a {@link TruffleString}. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class ToJavaStringNode extends Node { ToJavaStringNode() { } /** * Return a {@link java.lang.String} representation of the given {@link TruffleString}. * * @since 22.1 */ public abstract String execute(AbstractTruffleString a); @Specialization static String doUTF16(TruffleString a, @Cached ConditionProfile cacheHit, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.ToJavaStringNode toJavaStringNode) { if (a.isEmpty()) { return ""; } TruffleString cur = a.next; if (cur != null) { while (cur != a && !cur.isJavaString()) { cur = cur.next; } if (cacheHit.profile(cur.isJavaString())) { return (String) cur.data(); } } cur = a.next; if (cur != null) { while (cur != a && !cur.isCompatibleTo(Encoding.UTF_16)) { cur = cur.next; } } else { cur = a; } if (cur.isJavaString()) { // java string was inserted in parallel return (String) cur.data(); } TruffleString s = toJavaStringNode.execute(cur, toIndexableNode.execute(cur, cur.data())); a.cacheInsert(s); return (String) s.data(); } @Specialization static String doMutable(MutableTruffleString a, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached TStringInternalNodes.TransCodeNode transCodeNode, @Cached TStringInternalNodes.CreateJavaStringNode createJavaStringNode) { if (a.isEmpty()) { return ""; } final AbstractTruffleString utf16String; final int codeRangeA; if (isUTF16(a.encoding()) || (codeRangeA = getCodeRangeNode.execute(a)) < Encoding.UTF_16.maxCompatibleCodeRange) { utf16String = a; } else { utf16String = transCodeNode.execute(a, a.data(), getCodePointLengthNode.execute(a), codeRangeA, Encoding.UTF_16); } return createJavaStringNode.execute(utf16String, utf16String.data()); } /** * Create a new {@link ToJavaStringNode}. * * @since 22.1 */ public static ToJavaStringNode create() { return TruffleStringFactory.ToJavaStringNodeGen.create(); } /** * Get the uncached version of {@link ToJavaStringNode}. * * @since 22.1 */ public static ToJavaStringNode getUncached() { return TruffleStringFactory.ToJavaStringNodeGen.getUncached(); } } /** * Node to get a given string in a specific encoding. See * {@link #execute(AbstractTruffleString, TruffleString.Encoding)} for details. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class SwitchEncodingNode extends Node { SwitchEncodingNode() { } /** * Returns a version of string {@code a} that is encoded in the given encoding, which may be * the string itself or a converted version. Note that the string itself may be returned * even if it was originally created using a different encoding, if the string is * byte-equivalent in both encodings. *

* If no lossless conversion is possible, the string is converted on a best-effort basis; no * exception is thrown and characters which cannot be mapped in the target encoding are * replaced by {@code '\ufffd'} (for UTF-*) or {@code '?'}. * * @since 22.1 */ public abstract TruffleString execute(AbstractTruffleString a, Encoding encoding); @Specialization(guards = "a.isCompatibleTo(encoding)") static TruffleString compatibleImmutable(TruffleString a, @SuppressWarnings("unused") Encoding encoding) { assert !a.isJavaString(); return a; } @Specialization(guards = "a.isCompatibleTo(encoding)") static TruffleString compatibleMutable(MutableTruffleString a, Encoding encoding, @Cached AsTruffleStringNode asTruffleStringNode) { return asTruffleStringNode.execute(a, encoding); } @Specialization(guards = "!a.isCompatibleTo(encoding)") static TruffleString transCode(TruffleString a, Encoding encoding, @Cached ConditionProfile cacheHit, @Cached ToIndexableNode toIndexableNode, @Cached @Shared("transCodeNode") TStringInternalNodes.TransCodeNode transCodeNode) { if (a.isEmpty()) { return encoding.getEmpty(); } TruffleString cur = a.next; assert !a.isJavaString(); if (cur != null) { while (cur != a && cur.encoding() != encoding.id || (isUTF16(encoding) && cur.isJavaString())) { cur = cur.next; } if (cacheHit.profile(cur.encoding() == encoding.id)) { assert !cur.isJavaString(); return cur; } } TruffleString transCoded = transCodeNode.execute(a, toIndexableNode.execute(a, a.data()), a.codePointLength(), a.codeRange(), encoding); if (!transCoded.isCacheHead()) { a.cacheInsert(transCoded); } return transCoded; } @Specialization(guards = "!a.isCompatibleTo(encoding)") TruffleString transCodeMutable(MutableTruffleString a, Encoding encoding, @Cached TStringInternalNodes.GetCodePointLengthNode getCodePointLengthNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeNode, @Cached @Shared("transCodeNode") TStringInternalNodes.TransCodeNode transCodeNode, @Cached ConditionProfile isCompatibleProfile) { if (a.isEmpty()) { return encoding.getEmpty(); } final int codePointLengthA = getCodePointLengthNode.execute(a); final int codeRangeA = getCodeRangeNode.execute(a); if (isCompatibleProfile.profile(codeRangeA < encoding.maxCompatibleCodeRange)) { int strideDst = Stride.fromCodeRange(codeRangeA, encoding); byte[] arrayDst = new byte[a.length() << strideDst]; TStringOps.arraycopyWithStride(this, a.data(), a.offset(), a.stride(), 0, arrayDst, 0, strideDst, 0, a.length()); return createFromByteArray(arrayDst, a.length(), strideDst, encoding, codePointLengthA, codeRangeA); } else { return transCodeNode.execute(a, a.data(), codePointLengthA, codeRangeA, encoding); } } /** * Create a new {@link SwitchEncodingNode}. * * @since 22.1 */ public static SwitchEncodingNode create() { return TruffleStringFactory.SwitchEncodingNodeGen.create(); } /** * Get the uncached version of {@link SwitchEncodingNode}. * * @since 22.1 */ public static SwitchEncodingNode getUncached() { return TruffleStringFactory.SwitchEncodingNodeGen.getUncached(); } } /** * Node to forcibly assign any encoding to a string. See * {@link #execute(AbstractTruffleString, TruffleString.Encoding, TruffleString.Encoding)} for * details. * * @since 22.1 */ @GeneratePackagePrivate @GenerateUncached public abstract static class ForceEncodingNode extends Node { ForceEncodingNode() { } /** * Returns a version of string {@code a} assigned to the given encoding, which may be the * string itself or a new string. The string itself may be returned even if it was * originally created using a different encoding, if the string is byte-equivalent in both * encodings. If the string is not byte-equivalent in both encodings, a new string * containing the same bytes but assigned to the new encoding is returned. This node does * not transcode the string's contents in any way, it is the "encoding-equivalent" to a * C-style reinterpret-cast. * * @since 22.1 */ public abstract TruffleString execute(AbstractTruffleString a, Encoding expectedEncoding, Encoding targetEncoding); @Specialization(guards = "isCompatibleAndNotCompacted(a, expectedEncoding, targetEncoding)") static TruffleString compatibleImmutable(TruffleString a, @SuppressWarnings("unused") Encoding expectedEncoding, @SuppressWarnings("unused") Encoding targetEncoding) { assert !a.isJavaString(); return a; } @Specialization(guards = "isCompatibleAndNotCompacted(a, expectedEncoding, targetEncoding)") static TruffleString compatibleMutable(MutableTruffleString a, @SuppressWarnings("unused") Encoding expectedEncoding, Encoding targetEncoding, @Cached AsTruffleStringNode asTruffleStringNode) { return asTruffleStringNode.execute(a, targetEncoding); } @Specialization(guards = "!isCompatibleAndNotCompacted(a, expectedEncoding, targetEncoding)") static TruffleString reinterpret(AbstractTruffleString a, Encoding expectedEncoding, Encoding targetEncoding, @Cached ToIndexableNode toIndexableNode, @Cached ConditionProfile managedProfile, @Cached ConditionProfile inflateProfile, @Cached TruffleString.CopyToByteArrayNode copyToByteArrayNode, @Cached TStringInternalNodes.FromBufferWithStringCompactionNode fromBufferWithStringCompactionNode, @Cached TStringInternalNodes.FromNativePointerNode fromNativePointerNode) { Object arrayA = toIndexableNode.execute(a, a.data()); int byteLength = a.length() << expectedEncoding.naturalStride; if (managedProfile.profile(arrayA instanceof byte[] || a.isMutable())) { final Object arrayNoCompaction; final int offset; if (inflateProfile.profile(isUTF16Or32(expectedEncoding) && a.stride() != expectedEncoding.naturalStride)) { byte[] inflated = new byte[byteLength]; copyToByteArrayNode.execute(a, 0, inflated, 0, byteLength, expectedEncoding); arrayNoCompaction = inflated; offset = 0; } else { arrayNoCompaction = arrayA; offset = a.offset(); } return fromBufferWithStringCompactionNode.execute(arrayNoCompaction, offset, byteLength, targetEncoding, a.isMutable(), true); } else { assert arrayA instanceof NativePointer; return fromNativePointerNode.execute((NativePointer) arrayA, a.offset(), byteLength, targetEncoding, true); } } static boolean isCompatibleAndNotCompacted(AbstractTruffleString a, Encoding expectedEncoding, Encoding targetEncoding) { return expectedEncoding.naturalStride == targetEncoding.naturalStride && (a.encoding() == targetEncoding.id || a.stride() == targetEncoding.naturalStride && a.isCompatibleTo(targetEncoding)); } /** * Create a new {@link ForceEncodingNode}. * * @since 22.1 */ public static ForceEncodingNode create() { return TruffleStringFactory.ForceEncodingNodeGen.create(); } /** * Get the uncached version of {@link ForceEncodingNode}. * * @since 22.1 */ public static ForceEncodingNode getUncached() { return TruffleStringFactory.ForceEncodingNodeGen.getUncached(); } } /** * Node to create a {@link TruffleStringIterator}. See * {@link #execute(AbstractTruffleString, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CreateCodePointIteratorNode extends Node { CreateCodePointIteratorNode() { } /** * Returns a {@link TruffleStringIterator}, which allows iterating this string's code * points, with {@link ErrorHandling#BEST_EFFORT best-effort error handling}. * * @since 22.1 */ public final TruffleStringIterator execute(AbstractTruffleString a, Encoding expectedEncoding) { return execute(a, expectedEncoding, ErrorHandling.BEST_EFFORT); } /** * Returns a {@link TruffleStringIterator}, which allows iterating this string's code * points. The iterator is initialized to begin iteration at the start of the string, use * {@link TruffleStringIterator.NextNode} to iterate. * * @param errorHandling analogous to {@link CodePointAtIndexNode}. * * @since 22.3 */ public abstract TruffleStringIterator execute(AbstractTruffleString a, Encoding expectedEncoding, ErrorHandling errorHandling); @Specialization static TruffleStringIterator createIterator(AbstractTruffleString a, Encoding expectedEncoding, ErrorHandling errorHandling, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode) { CompilerAsserts.partialEvaluationConstant(errorHandling); a.checkEncoding(expectedEncoding); return forwardIterator(a, toIndexableNode.execute(a, a.data()), getCodeRangeANode.execute(a), expectedEncoding, errorHandling); } /** * Create a new {@link CreateCodePointIteratorNode}. * * @since 22.1 */ public static CreateCodePointIteratorNode create() { return TruffleStringFactory.CreateCodePointIteratorNodeGen.create(); } /** * Get the uncached version of {@link CreateCodePointIteratorNode}. * * @since 22.1 */ public static CreateCodePointIteratorNode getUncached() { return TruffleStringFactory.CreateCodePointIteratorNodeGen.getUncached(); } } /** * Node to create a {@link TruffleStringIterator}. See * {@link #execute(AbstractTruffleString, TruffleString.Encoding)} for details. * * @since 22.1 */ @ImportStatic(TStringGuards.class) @GeneratePackagePrivate @GenerateUncached public abstract static class CreateBackwardCodePointIteratorNode extends Node { CreateBackwardCodePointIteratorNode() { } /** * Returns a {@link TruffleStringIterator}, which allows iterating this string's code * points, with {@link ErrorHandling#BEST_EFFORT best-effort error handling}. * * @since 22.1 */ public final TruffleStringIterator execute(AbstractTruffleString a, Encoding expectedEncoding) { return execute(a, expectedEncoding, ErrorHandling.BEST_EFFORT); } /** * Returns a {@link TruffleStringIterator}, which allows iterating this string's code * points. The iterator is initialized to begin iteration at the end of the string, use * {@link TruffleStringIterator.PreviousNode} to iterate in reverse order. * * @param errorHandling analogous to {@link CodePointAtIndexNode}. * * @since 22.3 */ public abstract TruffleStringIterator execute(AbstractTruffleString a, Encoding expectedEncoding, ErrorHandling errorHandling); @Specialization static TruffleStringIterator createIterator(AbstractTruffleString a, Encoding expectedEncoding, ErrorHandling errorHandling, @Cached ToIndexableNode toIndexableNode, @Cached TStringInternalNodes.GetCodeRangeNode getCodeRangeANode) { CompilerAsserts.partialEvaluationConstant(errorHandling); a.checkEncoding(expectedEncoding); return backwardIterator(a, toIndexableNode.execute(a, a.data()), getCodeRangeANode.execute(a), expectedEncoding, errorHandling); } /** * Create a new {@link CreateBackwardCodePointIteratorNode}. * * @since 22.1 */ public static CreateBackwardCodePointIteratorNode create() { return TruffleStringFactory.CreateBackwardCodePointIteratorNodeGen.create(); } /** * Get the uncached version of {@link CreateBackwardCodePointIteratorNode}. * * @since 22.1 */ public static CreateBackwardCodePointIteratorNode getUncached() { return TruffleStringFactory.CreateBackwardCodePointIteratorNodeGen.getUncached(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy