All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jruby.truffle.stdlib.psych.PsychParserNodes Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2015, 2017 Oracle and/or its affiliates. All rights reserved. This
 * code is released under a tri EPL/GPL/LGPL license. You can use it,
 * redistribute it and/or modify it under the terms of the:
 *
 * Eclipse Public License version 1.0
 * GNU General Public License version 2
 * GNU Lesser General Public License version 2.1
 *
 * This code is modified from the Psych JRuby extension module
 * implementation with the following header:
 *
 * Version: EPL 1.0/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Eclipse Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.eclipse.org/legal/epl-v10.html
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * Copyright (C) 2010 Charles O Nutter 
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the EPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the EPL, the GPL or the LGPL.
 */
package org.jruby.truffle.stdlib.psych;

import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.api.dsl.Cached;
import com.oracle.truffle.api.dsl.Specialization;
import com.oracle.truffle.api.frame.VirtualFrame;
import com.oracle.truffle.api.nodes.Node;
import com.oracle.truffle.api.object.DynamicObject;
import com.oracle.truffle.api.profiles.BranchProfile;
import org.jcodings.Encoding;
import org.jcodings.Ptr;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.UTF8Encoding;
import org.jcodings.transcode.EConv;
import org.jcodings.transcode.EConvResult;
import org.jcodings.transcode.TranscodingManager;
import org.jcodings.unicode.UnicodeEncoding;
import org.jruby.truffle.RubyContext;
import org.jruby.truffle.builtins.CoreClass;
import org.jruby.truffle.builtins.CoreMethod;
import org.jruby.truffle.builtins.CoreMethodArrayArgumentsNode;
import org.jruby.truffle.collections.BoundaryIterable;
import org.jruby.truffle.core.adapaters.InputStreamAdapter;
import org.jruby.truffle.core.cast.ToStrNode;
import org.jruby.truffle.core.cast.ToStrNodeGen;
import org.jruby.truffle.core.rope.CodeRange;
import org.jruby.truffle.core.rope.Rope;
import org.jruby.truffle.core.rope.RopeOperations;
import org.jruby.truffle.core.string.ByteList;
import org.jruby.truffle.core.string.StringOperations;
import org.jruby.truffle.language.NotProvided;
import org.jruby.truffle.language.RubyGuards;
import org.jruby.truffle.language.SnippetNode;
import org.jruby.truffle.language.dispatch.CallDispatchHeadNode;
import org.jruby.truffle.language.dispatch.DoesRespondDispatchHeadNode;
import org.jruby.truffle.language.objects.ReadObjectFieldNode;
import org.jruby.truffle.language.objects.ReadObjectFieldNodeGen;
import org.jruby.truffle.language.objects.TaintNode;
import org.yaml.snakeyaml.DumperOptions;
import org.yaml.snakeyaml.error.Mark;
import org.yaml.snakeyaml.events.AliasEvent;
import org.yaml.snakeyaml.events.DocumentEndEvent;
import org.yaml.snakeyaml.events.DocumentStartEvent;
import org.yaml.snakeyaml.events.Event;
import org.yaml.snakeyaml.events.Event.ID;
import org.yaml.snakeyaml.events.MappingStartEvent;
import org.yaml.snakeyaml.events.ScalarEvent;
import org.yaml.snakeyaml.events.SequenceStartEvent;
import org.yaml.snakeyaml.parser.Parser;
import org.yaml.snakeyaml.parser.ParserException;
import org.yaml.snakeyaml.parser.ParserImpl;
import org.yaml.snakeyaml.reader.ReaderException;
import org.yaml.snakeyaml.reader.StreamReader;
import org.yaml.snakeyaml.scanner.ScannerException;

import java.io.ByteArrayInputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

@CoreClass("Psych::Parser")
public abstract class PsychParserNodes {

    @CoreMethod(names = "parse", required = 1, optional = 1)
    public abstract static class ParseNode extends CoreMethodArrayArgumentsNode {

        @Node.Child private ToStrNode toStrNode = ToStrNodeGen.create(null);

        public abstract Object executeParse(VirtualFrame frame, DynamicObject parserObject, DynamicObject yaml, Object path);

        @Specialization
        public Object parse(VirtualFrame frame, DynamicObject parserObject, DynamicObject yaml, NotProvided path) {
            return executeParse(frame, parserObject, yaml, nil());
        }

        @Specialization
        public Object parse(
                VirtualFrame frame,
                DynamicObject parserObject,
                DynamicObject yaml,
                DynamicObject path,
                @Cached("new()") SnippetNode taintedNode,
                @Cached("create()") DoesRespondDispatchHeadNode respondToReadNode,
                @Cached("create()") DoesRespondDispatchHeadNode respondToPathNode,
                @Cached("createMethodCall()") CallDispatchHeadNode callPathNode,
                @Cached("createReadHandlerNode()") ReadObjectFieldNode readHandlerNode,
                @Cached("createMethodCall()") CallDispatchHeadNode callStartStreamNode,
                @Cached("createMethodCall()") CallDispatchHeadNode callStartDocumentNode,
                @Cached("createMethodCall()") CallDispatchHeadNode callEndDocumentNode,
                @Cached("createMethodCall()") CallDispatchHeadNode callAliasNode,
                @Cached("createMethodCall()") CallDispatchHeadNode callScalarNode,
                @Cached("createMethodCall()") CallDispatchHeadNode callStartSequenceNode,
                @Cached("createMethodCall()") CallDispatchHeadNode callEndSequenceNode,
                @Cached("createMethodCall()") CallDispatchHeadNode callStartMappingNode,
                @Cached("createMethodCall()") CallDispatchHeadNode callEndMappingNode,
                @Cached("createMethodCall()") CallDispatchHeadNode callEndStreamNode,
                @Cached("new()") SnippetNode raiseSyntaxErrorSnippetNode,
                @Cached("new()") SnippetNode tagPushNode,
                @Cached("create()") TaintNode taintNode,
                @Cached("create()") BranchProfile errorProfile) {

            final boolean tainted = (boolean) taintedNode.execute(frame, "yaml.tainted? || yaml.is_a?(IO)", "yaml", yaml);

            final StreamReader reader;

            if (!RubyGuards.isRubyString(yaml) && respondToReadNode.doesRespondTo(frame, "read", yaml)) {
                reader = newStreamReader(yaml);
            } else {
                Rope rope = StringOperations.rope(toStrNode.executeToStr(frame, yaml));
                reader = newStringReader(rope);
            }

            final Parser parser = newParser(reader);

            try {
                if (isNil(path) && respondToPathNode.doesRespondTo(frame, "path", yaml)) {
                    path = (DynamicObject) callPathNode.call(frame, yaml, "path");
                }

                final Object handler = readHandlerNode.execute(parserObject);

                while (true) {
                    Event event = getParserEvent(parser);

                    if (isEvent(event, Event.ID.StreamStart)) {
                        callStartStreamNode.call(frame, handler, "start_stream", YAMLEncoding.YAML_ANY_ENCODING.ordinal());

                    } else if (isEvent(event, Event.ID.DocumentStart)) {
                        final DocumentStartEvent startEvent = (DocumentStartEvent) event;

                        final DumperOptions.Version versionOptions = startEvent.getVersion();
                        final Integer[] versionInts = versionOptions == null ? null : versionOptions.getArray();

                        final DynamicObject versionArray;

                        if (versionInts == null) {
                            versionArray = createArray(null, 0);
                        } else {
                            versionArray = createArray(new Object[] {
                                    versionInts[0], versionInts[1]
                            }, 2);
                        }

                        Map tagsMap = startEvent.getTags();
                        DynamicObject tags = createArray(null, 0);

                        if (tagsMap != null && size(tagsMap) > 0) {
                            for (Map.Entry tag : BoundaryIterable.wrap(entrySet(tagsMap))) {
                                Object key = stringFor(getKey(tag), tainted, taintNode);
                                Object value = stringFor(getValue(tag), tainted, taintNode);
                                tagPushNode.execute(frame,
                                        "tags.push [key, value]",
                                        "tags", tags,
                                        "key", key,
                                        "value", value);
                            }
                        }

                        Object notExplicit = !startEvent.getExplicit();
                        callStartDocumentNode.call(frame, handler, "start_document", versionArray, tags, notExplicit);

                    } else if (isEvent(event, Event.ID.DocumentEnd)) {
                        final DocumentEndEvent endEvent = (DocumentEndEvent) event;
                        Object notExplicit = !endEvent.getExplicit();
                        callEndDocumentNode.call(frame, handler, "end_document", notExplicit);

                    } else if (isEvent(event, Event.ID.Alias)) {
                        final AliasEvent aliasEvent = (AliasEvent) event;
                        Object alias = stringOrNilFor(aliasEvent.getAnchor(), tainted, taintNode);
                        callAliasNode.call(frame, handler, "alias", alias);

                    } else if (isEvent(event, Event.ID.Scalar)) {
                        final ScalarEvent scalarEvent = (ScalarEvent) event;

                        Object anchor = stringOrNilFor(scalarEvent.getAnchor(), tainted, taintNode);
                        Object tag = stringOrNilFor(scalarEvent.getTag(), tainted, taintNode);
                        Object plain_implicit = scalarEvent.getImplicit().canOmitTagInPlainScalar();
                        Object quoted_implicit = scalarEvent.getImplicit().canOmitTagInNonPlainScalar();
                        Object style = translateStyle(scalarEvent.getStyle());
                        Object val = stringFor(scalarEvent.getValue(), tainted, taintNode);

                        callScalarNode.call(frame, handler, "scalar", val, anchor, tag, plain_implicit, quoted_implicit, style);

                    } else if (isEvent(event, Event.ID.SequenceStart)) {
                        final SequenceStartEvent sequenceStartEvent = (SequenceStartEvent) event;

                        Object anchor = stringOrNilFor(sequenceStartEvent.getAnchor(), tainted, taintNode);
                        Object tag = stringOrNilFor(sequenceStartEvent.getTag(), tainted, taintNode);
                        Object implicit = sequenceStartEvent.getImplicit();
                        Object style = translateFlowStyle(sequenceStartEvent.getFlowStyle());

                        callStartSequenceNode.call(frame, handler, "start_sequence", anchor, tag, implicit, style);

                    } else if (isEvent(event, Event.ID.SequenceEnd)) {
                        callEndSequenceNode.call(frame, handler, "end_sequence");

                    } else if (isEvent(event, Event.ID.MappingStart)) {
                        final MappingStartEvent mappingStartEvent = (MappingStartEvent) event;

                        Object anchor = stringOrNilFor(mappingStartEvent.getAnchor(), tainted, taintNode);
                        Object tag = stringOrNilFor(mappingStartEvent.getTag(), tainted, taintNode);
                        Object implicit = mappingStartEvent.getImplicit();
                        Object style = translateFlowStyle(mappingStartEvent.getFlowStyle());

                        callStartMappingNode.call(frame, handler, "start_mapping", anchor, tag, implicit, style);

                    } else if (isEvent(event, Event.ID.MappingEnd)) {
                        callEndMappingNode.call(frame, handler, "end_mapping");

                    } else if (isEvent(event, Event.ID.StreamEnd)) {
                        callEndStreamNode.call(frame, handler, "end_stream");
                        break;
                    }
                }
            } catch (ParserException | ScannerException pe) {
                errorProfile.enter();
                final Mark mark = pe.getProblemMark();

                raiseSyntaxErrorSnippetNode.execute(frame,
                        "raise Psych::SyntaxError.new(file, line, col, offset, problem, context)",
                        "file", path,
                        "line", mark.getLine(),
                        "col", mark.getColumn(),
                        "offset", mark.getIndex(),
                        "problem", pe.getProblem() == null ? nil() : createUTF8String(pe.getProblem()),
                        "context", pe.getContext() == null ? nil() : createUTF8String(pe.getContext()));
            } catch (ReaderException re) {
                errorProfile.enter();

                raiseSyntaxErrorSnippetNode.execute(frame,
                        "raise Psych::SyntaxError.new(file, line, col, offset, problem, context)",
                        "file", path,
                        "line", 0,
                        "col", 0,
                        "offset", re.getPosition(),
                        "problem", re.getName() == null ? nil() : createUTF8String(re.getName()),
                        "context", toString(re) == null ? nil() : createUTF8String(toString(re)));
            } catch (Throwable t) {
                errorProfile.enter();
                throwException(t);
                return parserObject;
            }

            return parserObject;
        }

        public static void throwException(final Throwable e) {
            throwsUnchecked(e);
        }

        @SuppressWarnings("unchecked")
        private static  void throwsUnchecked(final Throwable e) throws T {
            throw (T) e;
        }

        @TruffleBoundary
        private StreamReader newStreamReader(DynamicObject yaml) {
            final Encoding enc = UTF8Encoding.INSTANCE;
            final Charset charset = enc.getCharset();
            return new StreamReader(new InputStreamReader(new InputStreamAdapter(getContext(), yaml), charset));
        }

        @TruffleBoundary
        private StreamReader newStringReader(Rope rope) {
            Encoding encoding = rope.getEncoding();

            if (!(encoding instanceof UnicodeEncoding)) {
                rope = strConvEnc(getContext(), rope, encoding);

                encoding = UTF8Encoding.INSTANCE;
            }

            return new StreamReader(
                    new InputStreamReader(
                            new ByteArrayInputStream(
                                    rope.getBytes(), 0, rope.byteLength()),
                            encoding.getCharset()));
        }

        @TruffleBoundary
        private ParserImpl newParser(StreamReader reader) {
            return new ParserImpl(reader);
        }

        @TruffleBoundary
        private Event getParserEvent(Parser parser) {
            return parser.getEvent();
        }

        @TruffleBoundary
        private boolean isEvent(Event event, ID id) {
            return event.is(id);
        }

        private DynamicObject createUTF8String(String value) {
            return createString(StringOperations.encodeRope(value, UTF8Encoding.INSTANCE));
        }

        @TruffleBoundary
        private int size(Map tagsMap) {
            return tagsMap.size();
        }

        @TruffleBoundary
        private Set> entrySet(Map tagsMap) {
            return tagsMap.entrySet();
        }

        @TruffleBoundary
        private String getKey(Map.Entry tag) {
            return tag.getKey();
        }

        @TruffleBoundary
        private String getValue(Map.Entry tag) {
            return tag.getValue();
        }

        @TruffleBoundary
        private String toString(ReaderException re) {
            return re.toString();
        }

        protected ReadObjectFieldNode createReadHandlerNode() {
            return ReadObjectFieldNodeGen.create("@handler", nil());
        }

        private static final int STYLE_PLAIN = 1;
        private static final int STYLE_SINGLE_QUOTED = 2;
        private static final int STYLE_DOUBLE_QUOTED = 3;
        private static final int STYLE_LITERAL = 4;
        private static final int STYLE_FOLDED = 5;
        private static final int STYLE_ANY = 0;
        private static final int STYLE_FLOW = 2;
        private static final int STYLE_NOT_FLOW = 1;

        private static int translateStyle(Character style) {
            switch (style) {
                case 0:
                    return STYLE_PLAIN;
                case '\'':
                    return STYLE_SINGLE_QUOTED;
                case '"':
                    return STYLE_DOUBLE_QUOTED;
                case '|':
                    return STYLE_LITERAL;
                case '>':
                    return STYLE_FOLDED;
                default:
                    return STYLE_ANY;
            }
        }

        private static int translateFlowStyle(Boolean flowStyle) {
            if (flowStyle == null) {
                return STYLE_ANY;
            } else if (flowStyle) {
                return STYLE_FLOW;
            } else {
                return STYLE_NOT_FLOW;
            }
        }

        @TruffleBoundary
        private Object stringOrNilFor(String value, boolean tainted, TaintNode taintNode) {
            if (value == null) {
                return nil();
            } else {
                return stringFor(value, tainted, taintNode);
            }
        }

        @TruffleBoundary
        private Object stringFor(String value, boolean tainted, TaintNode taintNode) {
            Encoding encoding = getContext().getEncodingManager().getDefaultInternalEncoding();

            if (encoding == null) {
                encoding = UTF8Encoding.INSTANCE;
            }

            Charset charset = StandardCharsets.UTF_8;

            if (encoding.getCharset() != null) {
                charset = encoding.getCharset();
            }

            final Object string = createString(value.getBytes(charset), encoding);

            if (tainted) {
                taintNode.executeTaint(string);
            }

            return string;
        }

        private Rope strConvEnc(RubyContext context, Rope rope, Encoding encoding) {
            return strConvEnc2(context, rope, encoding, UTF8Encoding.INSTANCE);
        }

        private static Rope strConvEnc2(RubyContext context, Rope value, Encoding fromEncoding, Encoding toEncoding) {
            return strConvEncOpts(context, value, fromEncoding, toEncoding, 0, null);
        }

        private static Rope strConvEncOpts(RubyContext context, Rope str, Encoding fromEncoding,
                                                Encoding toEncoding, int ecflags, Object ecopts) {
            if (toEncoding == null) return str;
            if (fromEncoding == null) fromEncoding = str.getEncoding();
            if (fromEncoding == toEncoding) return str;
            if ((toEncoding.isAsciiCompatible() && isAsciiOnly(str)) ||
                    toEncoding == ASCIIEncoding.INSTANCE) {
                if (str.getEncoding() != toEncoding) {
                    return str.withEncoding(toEncoding, CodeRange.CR_7BIT);
                }
                return str;
            }

            int len = str.byteLength();
            ByteList newStr = new ByteList(len);
            int olen = len;

            EConv ec = econvOpenOpts(context, fromEncoding, toEncoding, ecflags, ecopts);
            if (ec == null) return str;

            byte[] sbytes = str.getBytes();
            Ptr sp = new Ptr(0);
            int start = sp.p;

            byte[] destbytes;
            Ptr dp = new Ptr(0);
            EConvResult ret;
            int convertedOutput = 0;

            // these are in the while clause in MRI
            destbytes = newStr.getUnsafeBytes();
            int dest = newStr.begin();
            dp.p = dest + convertedOutput;
            ret = TranscodingManager.convert(ec, sbytes, sp, start + len, destbytes, dp, dest + olen, 0);

            while (ret == EConvResult.DestinationBufferFull) {
                int convertedInput = sp.p - start;
                int rest = len - convertedInput;
                convertedOutput = dp.p - dest;
                newStr.setRealSize(convertedOutput);
                if (convertedInput != 0 && convertedOutput != 0 &&
                        rest < (Integer.MAX_VALUE / convertedOutput)) {
                    rest = (rest * convertedOutput) / convertedInput;
                } else {
                    rest = olen;
                }
                olen += rest < 2 ? 2 : rest;
                newStr.ensure(olen);

                // these are the while clause in MRI
                destbytes = newStr.getUnsafeBytes();
                dest = newStr.begin();
                dp.p = dest + convertedOutput;
                ret = TranscodingManager.convert(ec, sbytes, sp, start + len, destbytes, dp, dest + olen, 0);
            }
            ec.close();

            switch (ret) {
                case Finished:
                    len = dp.p;
                    newStr.setRealSize(len);
                    newStr.setEncoding(toEncoding);
                    return RopeOperations.ropeFromByteList(newStr);

                default:
                    // some error, return original
                    return str;
            }
        }

        private static EConv econvOpenOpts(RubyContext context, Encoding sourceEncoding, Encoding destinationEncoding, int ecflags, Object opthash) {
            EConv ec = TranscodingManager.create(sourceEncoding, destinationEncoding, ecflags);
            return ec;
        }

        private static boolean isAsciiOnly(Rope string) {
            return string.getEncoding().isAsciiCompatible() && string.getCodeRange() == CodeRange.CR_7BIT;
        }

    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy