com.palantir.javaformat.java.JavaInput Maven / Gradle / Ivy
/*
* Copyright 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.palantir.javaformat.java;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.Iterables.getLast;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.base.MoreObjects;
import com.google.common.base.Suppliers;
import com.google.common.base.Verify;
import com.google.common.collect.DiscreteDomain;
import com.google.common.collect.ImmutableCollection;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableRangeMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import com.google.common.collect.TreeRangeSet;
import com.google.errorprone.annotations.Immutable;
import com.palantir.javaformat.Input;
import com.palantir.javaformat.Newlines;
import com.palantir.javaformat.java.JavacTokens.RawTok;
import com.sun.tools.javac.file.JavacFileManager;
import com.sun.tools.javac.parser.Tokens.TokenKind;
import com.sun.tools.javac.tree.JCTree.JCCompilationUnit;
import com.sun.tools.javac.util.Context;
import com.sun.tools.javac.util.Log;
import com.sun.tools.javac.util.Log.DeferredDiagnosticHandler;
import com.sun.tools.javac.util.Options;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.function.Supplier;
import javax.tools.Diagnostic;
import javax.tools.DiagnosticCollector;
import javax.tools.DiagnosticListener;
import javax.tools.JavaFileObject;
import javax.tools.JavaFileObject.Kind;
import javax.tools.SimpleJavaFileObject;
/** {@code JavaInput} extends {@link Input} to represent a Java input document. */
public final class JavaInput extends Input {
/**
* A {@code JavaInput} is a sequence of {@link Tok}s that cover the Java input. A {@link Tok} is either a token (if
* {@code isToken()}), or a non-token, which is a comment (if {@code isComment()}) or a newline (if
* {@code isNewline()}) or a maximal sequence of other whitespace characters (if {@code isSpaces()}). Each
* {@link Tok} contains a sequence of characters, an index (sequential starting at {@code 0} for tokens and
* comments, else {@code -1}), and a ({@code 0}-origin) position in the input. The concatenation of the texts of all
* the {@link Tok}s equals the input. Each Input ends with a token EOF {@link Tok}, with empty text.
*
* A {@code /*} comment possibly contains newlines; a {@code //} comment does not contain the terminating newline
* character, but is followed by a newline {@link Tok}.
*/
@Immutable
static final class Tok implements Input.Tok {
private final int index;
private final String originalText;
private final String text;
private final int position;
private final int columnI;
private final boolean isToken;
private final TokenKind kind;
/**
* The {@code Tok} constructor.
*
* @param index its index
* @param originalText its original text, before removing Unicode escapes
* @param text its text after removing Unicode escapes
* @param position its {@code 0}-origin position in the input
* @param columnI its {@code 0}-origin column number in the input
* @param isToken whether the {@code Tok} is a token
* @param kind the token kind
*/
Tok(int index, String originalText, String text, int position, int columnI, boolean isToken, TokenKind kind) {
this.index = index;
this.originalText = originalText;
this.text = text;
this.position = position;
this.columnI = columnI;
this.isToken = isToken;
this.kind = kind;
}
@Override
public int getIndex() {
return index;
}
@Override
public String getText() {
return text;
}
@Override
public String getOriginalText() {
return originalText;
}
@Override
public int length() {
return originalText.length();
}
@Override
public int getPosition() {
return position;
}
@Override
public int getColumn() {
return columnI;
}
boolean isToken() {
return isToken;
}
@Override
public boolean isNewline() {
return Newlines.isNewline(text);
}
@Override
public boolean isSlashSlashComment() {
return text.startsWith("//");
}
@Override
public boolean isSlashStarComment() {
return text.startsWith("/*");
}
@Override
public boolean isJavadocComment() {
return text.startsWith("/**") && text.length() > 4;
}
@Override
public boolean isComment() {
return isSlashSlashComment() || isSlashStarComment();
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("index", index)
.add("text", text)
.add("position", position)
.add("columnI", columnI)
.add("isToken", isToken)
.toString();
}
public TokenKind kind() {
return kind;
}
}
/**
* A {@link Token} contains a token {@link Tok} and its associated non-tokens; each non-token {@link Tok} belongs to
* one {@link Token}. Each {@link Token} has an immutable list of its non-tokens that appear before it, and another
* list of its non-tokens that appear after it. The concatenation of the texts of all the {@link Token}s'
* {@link Tok}s, each preceded by the texts of its {@code toksBefore} and followed by the texts of its
* {@code toksAfter}, equals the input.
*/
@Immutable
static final class Token implements Input.Token {
private final Tok tok;
private final ImmutableList toksBefore;
private final ImmutableList toksAfter;
/**
* Token constructor.
*
* @param toksBefore the earlier non-token {link Tok}s assigned to this {@code Token}
* @param tok this token {@link Tok}
* @param toksAfter the later non-token {link Tok}s assigned to this {@code Token}
*/
Token(List toksBefore, Tok tok, List toksAfter) {
this.toksBefore = ImmutableList.copyOf(toksBefore);
this.tok = tok;
this.toksAfter = ImmutableList.copyOf(toksAfter);
}
/**
* Get the token's {@link Tok}.
*
* @return the token's {@link Tok}
*/
@Override
public Tok getTok() {
return tok;
}
/**
* Get the earlier {@link Tok}s assigned to this {@code Token}.
*
* @return the earlier {@link Tok}s assigned to this {@code Token}
*/
@Override
public ImmutableList extends Input.Tok> getToksBefore() {
return toksBefore;
}
/**
* Get the later {@link Tok}s assigned to this {@code Token}.
*
* @return the later {@link Tok}s assigned to this {@code Token}
*/
@Override
public ImmutableList extends Input.Tok> getToksAfter() {
return toksAfter;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("tok", tok)
.add("toksBefore", toksBefore)
.add("toksAfter", toksAfter)
.toString();
}
}
private final String text; // The input.
private final Supplier lineSeparator = Suppliers.memoize(() -> Newlines.guessLineSeparator(getText()));
private int kN; // The number of numbered toks (tokens or comments), excluding the EOF.
/*
* The following lists record the sequential indices of the {@code Tok}s on each input line. (Only
* tokens and comments have sequential indices.) Tokens and {@code //} comments lie on just one
* line; {@code /*} comments can lie on multiple lines. These data structures (along with
* equivalent ones for the formatted output) let us compute correspondences between the input and
* output.
*/
private final ImmutableMap positionToColumnMap; // Map Tok position to column.
private final ImmutableList tokens; // The Tokens for this input.
private final ImmutableRangeMap positionTokenMap; // Map position to Token.
/** Map from Tok index to the associated Token. */
private final Token[] kToToken;
/**
* Input constructor.
*
* @param text the input text
* @throws FormatterException if the input cannot be parsed
*/
public JavaInput(String text) throws FormatterException {
this.text = checkNotNull(text);
setLines(ImmutableList.copyOf(Newlines.lineIterator(text)));
ImmutableList toks = buildToks(text);
positionToColumnMap = makePositionToColumnMap(toks);
tokens = buildTokens(toks);
ImmutableRangeMap.Builder tokenLocations = ImmutableRangeMap.builder();
for (Token token : tokens) {
Input.Tok end = JavaOutput.endTok(token);
int upper = end.getPosition();
if (!end.getText().isEmpty()) {
upper += end.length() - 1;
}
tokenLocations.put(Range.closed(JavaOutput.startTok(token).getPosition(), upper), token);
}
positionTokenMap = tokenLocations.build();
// adjust kN for EOF
kToToken = new Token[kN + 1];
for (Token token : tokens) {
for (Input.Tok tok : token.getToksBefore()) {
if (tok.getIndex() < 0) {
continue;
}
kToToken[tok.getIndex()] = token;
}
kToToken[token.getTok().getIndex()] = token;
for (Input.Tok tok : token.getToksAfter()) {
if (tok.getIndex() < 0) {
continue;
}
kToToken[tok.getIndex()] = token;
}
}
}
private static ImmutableMap makePositionToColumnMap(List toks) {
ImmutableMap.Builder builder = ImmutableMap.builder();
for (Tok tok : toks) {
builder.put(tok.getPosition(), tok.getColumn());
}
return builder.buildOrThrow();
}
/**
* Get the input text.
*
* @return the input text
*/
@Override
public String getText() {
return text;
}
@Override
public ImmutableMap getPositionToColumnMap() {
return positionToColumnMap;
}
public String getLineSeparator() {
return lineSeparator.get();
}
/** Lex the input and build the list of toks. */
private ImmutableList buildToks(String text) throws FormatterException {
ImmutableList toks = buildToks(text, ImmutableSet.of());
kN = getLast(toks).getIndex();
computeRanges(toks);
return toks;
}
/**
* Lex the input and build the list of toks.
*
* @param text the text to be lexed.
* @param stopTokens a set of tokens which should cause lexing to stop. If one of these is found, the returned list
* will include tokens up to but not including that token.
*/
static ImmutableList buildToks(String text, ImmutableSet stopTokens) throws FormatterException {
stopTokens = ImmutableSet.builder()
.addAll(stopTokens)
.add(TokenKind.EOF)
.build();
Context context = new Context();
Options.instance(context).put("--enable-preview", "true");
new JavacFileManager(context, true, UTF_8);
DiagnosticCollector diagnosticCollector = new DiagnosticCollector<>();
context.put(DiagnosticListener.class, diagnosticCollector);
Log log = Log.instance(context);
log.useSource(new SimpleJavaFileObject(URI.create("Source.java"), Kind.SOURCE) {
@Override
public CharSequence getCharContent(boolean ignoreEncodingErrors) throws IOException {
return text;
}
});
DeferredDiagnosticHandler diagnostics = new DeferredDiagnosticHandler(log);
ImmutableList rawToks = JavacTokens.getTokens(text, context, stopTokens);
if (diagnostics.getDiagnostics().stream().anyMatch(d -> d.getKind() == Diagnostic.Kind.ERROR)) {
return ImmutableList.of(new Tok(0, "", "", 0, 0, true, null)); // EOF
}
int kN = 0;
List toks = new ArrayList<>();
int charI = 0;
int columnI = 0;
for (RawTok t : rawToks) {
if (stopTokens.contains(t.kind())) {
break;
}
int charI0 = t.pos();
// Get string, possibly with Unicode escapes.
String originalTokText = text.substring(charI0, t.endPos());
String tokText = t.kind() == TokenKind.STRINGLITERAL
? t.stringVal() // Unicode escapes removed.
: originalTokText;
char tokText0 = tokText.charAt(0); // The token's first character.
final boolean isToken; // Is this tok a token?
final boolean isNumbered; // Is this tok numbered? (tokens and comments)
String extraNewline = null; // Extra newline at end?
List strings = new ArrayList<>();
if (Character.isWhitespace(tokText0)) {
isToken = false;
isNumbered = false;
Iterator it = Newlines.lineIterator(originalTokText);
while (it.hasNext()) {
String line = it.next();
String newline = Newlines.getLineEnding(line);
if (newline != null) {
String spaces = line.substring(0, line.length() - newline.length());
if (!spaces.isEmpty()) {
strings.add(spaces);
}
strings.add(newline);
} else if (!line.isEmpty()) {
strings.add(line);
}
}
} else if (tokText.startsWith("'") || tokText.startsWith("\"")) {
isToken = true;
isNumbered = true;
strings.add(originalTokText);
} else if (tokText.startsWith("//") || tokText.startsWith("/*")) {
// For compatibility with an earlier lexer, the newline after a // comment is its own tok.
if (tokText.startsWith("//") && (originalTokText.endsWith("\n") || originalTokText.endsWith("\r"))) {
extraNewline = Newlines.getLineEnding(originalTokText);
tokText = tokText.substring(0, tokText.length() - extraNewline.length());
originalTokText = originalTokText.substring(0, originalTokText.length() - extraNewline.length());
}
isToken = false;
isNumbered = true;
strings.add(originalTokText);
} else if (Character.isJavaIdentifierStart(tokText0)
|| Character.isDigit(tokText0)
|| (tokText0 == '.' && tokText.length() > 1 && Character.isDigit(tokText.charAt(1)))) {
// Identifier, keyword, or numeric literal (a dot may begin a number, as in .2D).
isToken = true;
isNumbered = true;
strings.add(tokText);
} else {
// Other tokens ("+" or "++" or ">>" are broken into one-character toks, because ">>"
// cannot be lexed without syntactic knowledge. This implementation fails if the token
// contains Unicode escapes.
isToken = true;
isNumbered = true;
for (int i = 0; i < tokText.length(); i++) {
char c = tokText.charAt(i);
strings.add(String.valueOf(c));
}
}
if (strings.size() == 1) {
toks.add(new Tok(isNumbered ? kN++ : -1, originalTokText, tokText, charI, columnI, isToken, t.kind()));
charI += originalTokText.length();
columnI = updateColumn(columnI, originalTokText);
} else {
if (strings.size() != 1 && !tokText.equals(originalTokText)) {
throw new FormatterException(
"Unicode escapes not allowed in whitespace or multi-character operators");
}
for (String str : strings) {
toks.add(new Tok(isNumbered ? kN++ : -1, str, str, charI, columnI, isToken, null));
charI += str.length();
columnI = updateColumn(columnI, originalTokText);
}
}
if (extraNewline != null) {
toks.add(new Tok(-1, extraNewline, extraNewline, charI, columnI, false, null));
columnI = 0;
charI += extraNewline.length();
}
}
toks.add(new Tok(kN, "", "", charI, columnI, true, null)); // EOF tok.
return ImmutableList.copyOf(toks);
}
private static int updateColumn(int columnI, String originalTokText) {
Integer last = Iterators.getLast(Newlines.lineOffsetIterator(originalTokText));
if (last > 0) {
columnI = originalTokText.length() - last;
} else {
columnI += originalTokText.length();
}
return columnI;
}
private static ImmutableList buildTokens(List toks) {
ImmutableList.Builder tokens = ImmutableList.builder();
int k = 0;
int kN = toks.size();
// Remaining non-tokens before the token go here.
ImmutableList.Builder toksBefore = ImmutableList.builder();
OUTERMOST:
while (k < kN) {
while (!toks.get(k).isToken()) {
Tok tok = toks.get(k++);
toksBefore.add(tok);
if (isParamComment(tok)) {
while (toks.get(k).isNewline()) {
// drop newlines after parameter comments
k++;
}
}
}
Tok tok = toks.get(k++);
// Non-tokens starting on the same line go here too.
ImmutableList.Builder toksAfter = ImmutableList.builder();
OUTER:
while (k < kN && !toks.get(k).isToken()) {
// Don't attach inline comments to certain leading tokens, e.g. for `f(/*flag1=*/true).
//
// Attaching inline comments to the right token is hard, and this barely
// scratches the surface. But it's enough to do a better job with parameter
// name comments.
//
// TODO(cushon): find a better strategy.
if (toks.get(k).isSlashStarComment()) {
switch (tok.getText()) {
case "(":
case "<":
case ".":
break OUTER;
default:
break;
}
}
if (toks.get(k).isJavadocComment()) {
switch (tok.getText()) {
case ";":
break OUTER;
default:
break;
}
}
if (isParamComment(toks.get(k))) {
tokens.add(new Token(toksBefore.build(), tok, toksAfter.build()));
toksBefore = ImmutableList.builder().add(toks.get(k++));
// drop newlines after parameter comments
while (toks.get(k).isNewline()) {
k++;
}
continue OUTERMOST;
}
Tok nonTokenAfter = toks.get(k++);
toksAfter.add(nonTokenAfter);
if (Newlines.containsBreaks(nonTokenAfter.getText())) {
break;
}
}
tokens.add(new Token(toksBefore.build(), tok, toksAfter.build()));
toksBefore = ImmutableList.builder();
}
return tokens.build();
}
private static boolean isParamComment(Tok tok) {
return tok.isSlashStarComment() && tok.getText().matches("\\/\\*[A-Za-z0-9\\s_\\-]+=\\s*\\*\\/");
}
/**
* Convert from an offset and length flag pair to a token range.
*
* @param offset the {@code 0}-based offset in characters
* @param length the length in characters
* @return the {@code 0}-based {@link Range} of tokens
* @throws FormatterException on formatting errors
*/
Range characterRangeToTokenRange(int offset, int length) throws FormatterException {
int requiredLength = offset + length;
if (requiredLength > text.length()) {
throw new FormatterException(String.format(
"error: invalid length %d, offset + length (%d) is outside the file", length, requiredLength));
}
if (length < 0) {
return EMPTY_RANGE;
}
if (length == 0) {
// 0 stands for "format the line under the cursor"
length = 1;
}
ImmutableCollection enclosed = getPositionTokenMap()
.subRangeMap(Range.closedOpen(offset, offset + length))
.asMapOfRanges()
.values();
if (enclosed.isEmpty()) {
return EMPTY_RANGE;
}
return Range.closedOpen(
enclosed.iterator().next().getTok().getIndex(),
getLast(enclosed).getTok().getIndex() + 1);
}
/**
* Get the number of toks.
*
* @return the number of toks, including the EOF tok
*/
int getkN() {
return kN;
}
/**
* Get the Token by index.
*
* @param k the token index
*/
Token getToken(int k) {
return kToToken[k];
}
/**
* Get the input tokens.
*
* @return the input tokens
*/
@Override
public ImmutableList extends Input.Token> getTokens() {
return tokens;
}
/**
* Get the navigable map from position to {@link Token}. Used to look for tokens following a given one, and to
* implement the --offset and --length flags to reformat a character range in the input file.
*
* @return the navigable map from position to {@link Token}
*/
@Override
public ImmutableRangeMap getPositionTokenMap() {
return positionTokenMap;
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("tokens", tokens)
.add("super", super.toString())
.toString();
}
private JCCompilationUnit unit;
@Override
public int getLineNumber(int inputPosition) {
Verify.verifyNotNull(unit, "Expected compilation unit to be set.");
return unit.getLineMap().getLineNumber(inputPosition);
}
@Override
public int getColumnNumber(int inputPosition) {
Verify.verifyNotNull(unit, "Expected compilation unit to be set.");
return unit.getLineMap().getColumnNumber(inputPosition);
}
// TODO(cushon): refactor JavaInput so the CompilationUnit can be passed into
// the constructor.
public void setCompilationUnit(JCCompilationUnit unit) {
this.unit = unit;
}
public RangeSet characterRangesToTokenRanges(Collection> characterRanges)
throws FormatterException {
RangeSet tokenRangeSet = TreeRangeSet.create();
for (Range characterRange0 : characterRanges) {
Range characterRange = characterRange0.canonical(DiscreteDomain.integers());
tokenRangeSet.add(characterRangeToTokenRange(
characterRange.lowerEndpoint(), characterRange.upperEndpoint() - characterRange.lowerEndpoint()));
}
return tokenRangeSet;
}
}