All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.oracle.truffle.api.strings.JCodingsImpl Maven / Gradle / Ivy

Go to download

Truffle is a multi-language framework for executing dynamic languages that achieves high performance when combined with Graal.

There is a newer version: 24.2.0
Show newest version
/*
 * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * The Universal Permissive License (UPL), Version 1.0
 *
 * Subject to the condition set forth below, permission is hereby granted to any
 * person obtaining a copy of this software, associated documentation and/or
 * data (collectively the "Software"), free of charge and under any and all
 * copyright rights in the Software, and any and all patent rights owned or
 * freely licensable by each licensor hereunder covering either (i) the
 * unmodified Software as contributed to or provided by such licensor, or (ii)
 * the Larger Works (as defined below), to deal in both
 *
 * (a) the Software, and
 *
 * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
 * one is included with the Software each a "Larger Work" to which the Software
 * is contributed by such licensors),
 *
 * without restriction, including without limitation the rights to copy, create
 * derivative works of, display, perform, and distribute the Software and make,
 * use, sell, offer for sale, import, export, have made, and have sold the
 * Software and the Larger Work(s), and to sublicense the foregoing rights on
 * either these or other terms.
 *
 * This license is subject to the following condition:
 *
 * The above copyright notice and either this complete permission notice or at a
 * minimum a reference to the UPL must be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package com.oracle.truffle.api.strings;

import static com.oracle.truffle.api.strings.AbstractTruffleString.checkArrayRange;
import static com.oracle.truffle.api.strings.TStringGuards.isBroken;
import static com.oracle.truffle.api.strings.TStringGuards.isReturnNegative;
import static com.oracle.truffle.api.strings.TStringGuards.isStride0;
import static com.oracle.truffle.api.strings.TStringGuards.isStride1;
import static com.oracle.truffle.api.strings.TStringGuards.isUTF16;
import static com.oracle.truffle.api.strings.TStringGuards.isUTF16Or32;
import static com.oracle.truffle.api.strings.TStringGuards.isUTF32;
import static com.oracle.truffle.api.strings.TStringGuards.isUTF8;

import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.api.nodes.Node;
import com.oracle.truffle.api.profiles.InlinedConditionProfile;
import com.oracle.truffle.api.strings.TruffleString.ErrorHandling;
import com.oracle.truffle.api.strings.provider.JCodingsProvider;
import com.oracle.truffle.api.strings.provider.JCodingsProvider.Encoding;
import com.oracle.truffle.api.strings.provider.JCodingsProvider.TranscodeResult;

final class JCodingsImpl implements JCodings {

    private final JCodingsProvider provider;

    @CompilationFinal(dimensions = 1) private final JCodingsProvider.Encoding[] jcodingsEncodings;

    JCodingsImpl(JCodingsProvider provider) {
        this.provider = provider;

        final var encodingValues = TruffleString.Encoding.values();
        this.jcodingsEncodings = new JCodingsProvider.Encoding[encodingValues.length];
        for (var e : encodingValues) {
            var jcodingsEncoding = provider.get(e.jCodingName);
            jcodingsEncodings[e.id] = jcodingsEncoding;
            assert jcodingsEncoding.isSingleByte() == e.isSingleByte() : e;
        }
    }

    private Encoding get(TruffleString.Encoding encoding) {
        return jcodingsEncodings[encoding.id];
    }

    @Override
    public int minLength(TruffleString.Encoding encoding) {
        return get(encoding).minLength();
    }

    @Override
    public int maxLength(TruffleString.Encoding encoding) {
        return get(encoding).maxLength();
    }

    @Override
    public boolean isFixedWidth(TruffleString.Encoding encoding) {
        var jCoding = get(encoding);
        return jCoding.isFixedWidth() && jCoding.isSingleByte();
    }

    @Override
    public boolean isSingleByte(TruffleString.Encoding encoding) {
        return get(encoding).isSingleByte();
    }

    @Override
    @TruffleBoundary
    public int getCodePointLength(TruffleString.Encoding encoding, int codepoint) {
        return get(encoding).codeToMbcLength(codepoint);
    }

    @Override
    @TruffleBoundary
    public int getPreviousCodePointIndex(TruffleString.Encoding encoding, byte[] array, int arrayBegin, int index, int arrayEnd) {
        return get(encoding).prevCharHead(array, arrayBegin, index, arrayEnd);
    }

    @Override
    @TruffleBoundary
    public int getCodePointLength(TruffleString.Encoding encoding, byte[] array, int index, int arrayLength) {
        return get(encoding).length(array, index, arrayLength);
    }

    @Override
    @TruffleBoundary
    public int readCodePoint(TruffleString.Encoding encoding, byte[] array, int index, int arrayEnd, DecodingErrorHandler errorHandler) {
        var jCoding = get(encoding);
        int codePoint = jCoding.mbcToCode(array, index, arrayEnd);
        if (jCoding.isUnicode() && Encodings.isUTF16Surrogate(codePoint)) {
            return isReturnNegative(errorHandler) ? -1 : Encodings.invalidCodepoint();
        }
        return codePoint;
    }

    @Override
    @TruffleBoundary
    public boolean isValidCodePoint(TruffleString.Encoding encoding, int codepoint) {
        return !get(encoding).isUnicode() || !Encodings.isUTF16Surrogate(codepoint);
    }

    @Override
    @TruffleBoundary
    public int writeCodePoint(TruffleString.Encoding encoding, int codepoint, byte[] array, int index) {
        return get(encoding).codeToMbc(codepoint, array, index);
    }

    @Override
    @TruffleBoundary
    public int codePointIndexToRaw(Node location, AbstractTruffleString a, byte[] arrayA, int extraOffsetRaw, int index, boolean isLength, TruffleString.Encoding encoding) {
        var jCoding = get(encoding);
        int minLength = jCoding.minLength();
        if (jCoding.isFixedWidth()) {
            return index * minLength;
        }
        int offset = a.byteArrayOffset() + extraOffsetRaw;
        int end = a.byteArrayOffset() + a.length();
        int cpi = 0;
        int i = 0;
        while (i < a.length() - extraOffsetRaw) {
            if (cpi == index) {
                return i;
            }
            int length = jCoding.length(arrayA, offset + i, end);
            if (length < 1) {
                if (length < -1) {
                    // broken multibyte codepoint at end of string
                    if (isLength) {
                        return a.length() - extraOffsetRaw;
                    } else {
                        throw InternalErrors.indexOutOfBounds();
                    }
                } else {
                    i += minLength;
                }
            } else {
                i += length;
            }
            cpi++;
            TStringConstants.truffleSafePointPoll(location, cpi);
        }
        return TStringInternalNodes.CodePointIndexToRawNode.atEnd(a, extraOffsetRaw, index, isLength, cpi);
    }

    @Override
    public int decode(AbstractTruffleString a, byte[] arrayA, int rawIndex, TruffleString.Encoding encoding, ErrorHandling errorHandling) {
        int p = a.byteArrayOffset() + rawIndex;
        int end = a.byteArrayOffset() + a.length();
        int length = getCodePointLength(encoding, arrayA, p, end);
        if (length < 1) {
            return Encodings.invalidCodepointReturnValue(errorHandling);
        }
        return readCodePoint(encoding, arrayA, p, end, errorHandling.errorHandler);
    }

    @Override
    public long calcStringAttributes(Node location, Object array, int offset, int length, TruffleString.Encoding encoding, int fromIndex, InlinedConditionProfile validCharacterProfile,
                    InlinedConditionProfile fixedWidthProfile) {
        if (TStringGuards.is7BitCompatible(encoding) && TStringOps.calcStringAttributesLatin1(location, array, offset + fromIndex, length) == TSCodeRange.get7Bit()) {
            return StringAttributes.create(length, TSCodeRange.get7Bit());
        }
        byte[] bytes = JCodings.asByteArray(array);
        int offsetBytes = array instanceof AbstractTruffleString.NativePointer ? fromIndex : offset + fromIndex;
        Encoding enc = get(encoding);
        int codeRange = TSCodeRange.getValid(enc.isSingleByte());
        int characters = 0;
        int p = offsetBytes;
        final int end = offsetBytes + length;
        int loopCount = 0;
        for (; p < end; characters++) {
            final int lengthOfCurrentCharacter = enc.length(bytes, p, end);
            if (validCharacterProfile.profile(location, lengthOfCurrentCharacter > 0 && p + lengthOfCurrentCharacter <= end)) {
                p += lengthOfCurrentCharacter;
            } else {
                codeRange = TSCodeRange.getBroken(enc.isSingleByte());
                // If a string is detected as broken, and we already know the character length
                // due to a fixed width encoding, we can break here.
                if (fixedWidthProfile.profile(location, enc.isFixedWidth())) {
                    characters = (length + enc.minLength() - 1) / enc.minLength();
                    return StringAttributes.create(characters, codeRange);
                } else {
                    p += enc.minLength();
                }
            }
            TStringConstants.truffleSafePointPoll(location, ++loopCount);
        }
        return StringAttributes.create(characters, codeRange);
    }

    private static final byte[] CONVERSION_REPLACEMENT = {'?'};
    private static final byte[] CONVERSION_REPLACEMENT_UTF_16 = TStringGuards.littleEndian() ? new byte[]{(byte) 0xFD, (byte) 0xFF} : new byte[]{(byte) 0xFF, (byte) 0xFD};
    private static final byte[] CONVERSION_REPLACEMENT_UTF_32 = TStringGuards.littleEndian() ? new byte[]{(byte) 0xFD, (byte) 0xFF, 0, 0} : new byte[]{0, 0, (byte) 0xFF, (byte) 0xFD};

    private static byte[] getConversionReplacement(TruffleString.Encoding targetEncoding) {
        if (isUTF8(targetEncoding)) {
            return Encodings.CONVERSION_REPLACEMENT_UTF_8;
        } else if (isUTF16(targetEncoding)) {
            return CONVERSION_REPLACEMENT_UTF_16;
        } else if (isUTF32(targetEncoding)) {
            return CONVERSION_REPLACEMENT_UTF_32;
        } else {
            return CONVERSION_REPLACEMENT;
        }
    }

    private static Encoding getBytesEncoding(AbstractTruffleString a) {
        JCodingsImpl impl = (JCodingsImpl) JCodings.getInstance();
        if (isUTF16Or32(a.encoding()) && isStride0(a)) {
            return impl.get(TruffleString.Encoding.ISO_8859_1);
        } else if (isUTF32(a.encoding()) && isStride1(a)) {
            return impl.get(TruffleString.Encoding.UTF_16);
        } else {
            return impl.get(TruffleString.Encoding.get(a.encoding()));
        }
    }

    @Override
    public TruffleString transcode(Node location, AbstractTruffleString a, Object arrayA, int codePointLengthA, TruffleString.Encoding targetEncoding,
                    TStringInternalNodes.FromBufferWithStringCompactionNode fromBufferWithStringCompactionNode,
                    TranscodingErrorHandler errorHandler) {
        final Encoding jCodingSrc = getBytesEncoding(a);
        final Encoding jCodingDst = get(targetEncoding);
        final byte[] replacement = getConversionReplacement(targetEncoding);
        TranscodeResult result = provider.transcode(a, codePointLengthA, a.byteArrayOffset(), a.length() << a.stride(),
                        targetEncoding, jCodingSrc, jCodingDst,
                        replacement,
                        errorHandler,
                        JCodingsImpl::asBytesMaterializeNative,
                        JCodingsImpl::getBytesEncoding);
        checkArrayRange(result.buffer(), 0, result.length());
        return fromBufferWithStringCompactionNode.execute(location,
                        result.buffer(), 0, result.length(), targetEncoding, result.length() != result.buffer().length || targetEncoding.isSupported(),
                        isBroken(a.codeRange()) || result.undefinedConversion() || a.isMutable());
    }

    private static byte[] asBytesMaterializeNative(AbstractTruffleString replacementString) {
        return asBytesMaterializeNative(replacementString, TruffleString.ToIndexableNode.getUncached().execute(null, replacementString, replacementString.data()));
    }

    private static byte[] asBytesMaterializeNative(AbstractTruffleString a, Object arrayA) {
        if (arrayA instanceof AbstractTruffleString.NativePointer) {
            ((AbstractTruffleString.NativePointer) arrayA).materializeByteArray(null, a, InlinedConditionProfile.getUncached());
        }
        return JCodings.asByteArray(arrayA);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy