All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.palantir.javaformat.java.JavacTokens Maven / Gradle / Ivy

There is a newer version: 2.50.0
Show newest version
/*
 * Copyright 2016 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.palantir.javaformat.java;

import static com.google.common.base.Preconditions.checkArgument;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.sun.tools.javac.parser.JavaTokenizer;
import com.sun.tools.javac.parser.Scanner;
import com.sun.tools.javac.parser.ScannerFactory;
import com.sun.tools.javac.parser.Tokens.Comment;
import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
import com.sun.tools.javac.parser.Tokens.Token;
import com.sun.tools.javac.parser.Tokens.TokenKind;
import com.sun.tools.javac.parser.UnicodeReader;
import com.sun.tools.javac.util.Context;
import java.util.Set;

/** A wrapper around javac's lexer. */
class JavacTokens {

    /** The lexer eats terminal comments, so feed it one we don't care about. */
    // TODO(b/33103797): fix javac and remove the work-around
    private static final CharSequence EOF_COMMENT = "\n//EOF";

    /** An unprocessed input token, including whitespace and comments. */
    static class RawTok {
        private final String stringVal;
        private final TokenKind kind;
        private final int pos;
        private final int endPos;

        RawTok(String stringVal, TokenKind kind, int pos, int endPos) {
            this.stringVal = stringVal;
            this.kind = kind;
            this.pos = pos;
            this.endPos = endPos;
        }

        /** The token kind, or {@code null} for whitespace and comments. */
        public TokenKind kind() {
            return kind;
        }

        /** The start position. */
        public int pos() {
            return pos;
        }

        /** The end position. */
        public int endPos() {
            return endPos;
        }

        /** The escaped string value of a literal, or {@code null} for other tokens. */
        public String stringVal() {
            return stringVal;
        }
    }

    /** Lex the input and return a list of {@link RawTok}s. */
    public static ImmutableList getTokens(String source, Context context, Set stopTokens) {
        if (source == null) {
            return ImmutableList.of();
        }
        ScannerFactory fac = ScannerFactory.instance(context);
        char[] buffer = (source + EOF_COMMENT).toCharArray();
        Scanner scanner = new AccessibleScanner(fac, new CommentSavingTokenizer(fac, buffer, buffer.length));
        ImmutableList.Builder tokens = ImmutableList.builder();
        int end = source.length();
        int last = 0;
        do {
            scanner.nextToken();
            Token t = scanner.token();
            if (t.comments != null) {
                for (Comment c : Lists.reverse(t.comments)) {
                    if (last < c.getSourcePos(0)) {
                        tokens.add(new RawTok(null, null, last, c.getSourcePos(0)));
                    }
                    tokens.add(new RawTok(
                            null,
                            null,
                            c.getSourcePos(0),
                            c.getSourcePos(0) + c.getText().length()));
                    last = c.getSourcePos(0) + c.getText().length();
                }
            }
            if (stopTokens.contains(t.kind)) {
                if (t.kind != TokenKind.EOF) {
                    end = t.pos;
                }
                break;
            }
            if (last < t.pos) {
                tokens.add(new RawTok(null, null, last, t.pos));
            }
            tokens.add(new RawTok(
                    t.kind == TokenKind.STRINGLITERAL ? "\"" + t.stringVal() + "\"" : null, t.kind, t.pos, t.endPos));
            last = t.endPos;
        } while (scanner.token().kind != TokenKind.EOF);
        if (last < end) {
            tokens.add(new RawTok(null, null, last, end));
        }
        return tokens.build();
    }

    /** A {@link JavaTokenizer} that saves comments. */
    static class CommentSavingTokenizer extends JavaTokenizer {
        CommentSavingTokenizer(ScannerFactory fac, char[] buffer, int length) {
            super(fac, buffer, length);
        }

        @Override
        protected Comment processComment(int pos, int endPos, CommentStyle style) {
            char[] buf = getRawCharactersReflectively(pos, endPos);
            return new CommentWithTextAndPosition(pos, endPos, new AccessibleReader(fac, buf, buf.length), style);
        }

        private char[] getRawCharactersReflectively(int beginIndex, int endIndex) {
            Object instance;
            try {
                instance = JavaTokenizer.class.getDeclaredField("reader").get(this);
            } catch (ReflectiveOperationException e) {
                instance = this;
            }
            try {
                return (char[]) instance.getClass()
                        .getMethod("getRawCharacters", int.class, int.class)
                        .invoke(instance, beginIndex, endIndex);
            } catch (ReflectiveOperationException e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    }

    /** A {@link Comment} that saves its text and start position. */
    static class CommentWithTextAndPosition implements Comment {

        private final int pos;
        private final int endPos;
        private final AccessibleReader reader;
        private final CommentStyle style;

        private String text = null;

        public CommentWithTextAndPosition(int pos, int endPos, AccessibleReader reader, CommentStyle style) {
            this.pos = pos;
            this.endPos = endPos;
            this.reader = reader;
            this.style = style;
        }

        /**
         * Returns the source position of the character at index {@code index} in the comment text.
         *
         * 

The handling of javadoc comments in javac has more logic to skip over leading whitespace and '*' * characters when indexing into doc comments, but we don't need any of that. */ @Override public int getSourcePos(int index) { checkArgument( 0 <= index && index < (endPos - pos), "Expected %s in the range [0, %s)", index, endPos - pos); return pos + index; } @Override public CommentStyle getStyle() { return style; } @Override public String getText() { String text = this.text; if (text == null) { this.text = text = new String(reader.getRawCharacters()); } return text; } /** * We don't care about {@code @deprecated} javadoc tags (see the DepAnn check). * * @return false */ @Override public boolean isDeprecated() { return false; } @Override public String toString() { return String.format("Comment: '%s'", getText()); } } // Scanner(ScannerFactory, JavaTokenizer) is package-private static class AccessibleScanner extends Scanner { protected AccessibleScanner(ScannerFactory fac, JavaTokenizer tokenizer) { super(fac, tokenizer); } } // UnicodeReader(ScannerFactory, char[], int) is package-private static class AccessibleReader extends UnicodeReader { protected AccessibleReader(ScannerFactory fac, char[] buffer, int length) { super(fac, buffer, length); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy