net.sourceforge.pmd.lang.cs.cpd.CsCpdLexer Maven / Gradle / Ivy
/*
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.lang.cs.cpd;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.Lexer;
import net.sourceforge.pmd.cpd.CpdLanguageProperties;
import net.sourceforge.pmd.cpd.impl.AntlrCpdLexer;
import net.sourceforge.pmd.cpd.impl.AntlrTokenFilter;
import net.sourceforge.pmd.cpd.impl.BaseTokenFilter;
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.antlr4.AntlrToken;
import net.sourceforge.pmd.lang.cs.ast.CSharpLexer;
/**
* The C# tokenizer.
*
* Note: This class has been called CsTokenizer in PMD 6
.
*/
public class CsCpdLexer extends AntlrCpdLexer {
private final boolean ignoreUsings;
private final boolean ignoreLiteralSequences;
private final boolean ignoreAttributes;
public CsCpdLexer(LanguagePropertyBundle properties) {
ignoreUsings = properties.getProperty(CpdLanguageProperties.CPD_IGNORE_IMPORTS);
ignoreLiteralSequences = properties.getProperty(CpdLanguageProperties.CPD_IGNORE_LITERAL_SEQUENCES);
ignoreAttributes = properties.getProperty(CpdLanguageProperties.CPD_IGNORE_METADATA);
}
@Override
protected Lexer getLexerForSource(final CharStream charStream) {
return new CSharpLexer(charStream);
}
@Override
protected TokenManager filterTokenStream(TokenManager tokenManager) {
return new CsTokenFilter(tokenManager, ignoreUsings, ignoreLiteralSequences, ignoreAttributes);
}
/**
* The {@link CsTokenFilter} extends the {@link AntlrTokenFilter} to discard
* C#-specific tokens.
*
* By default, it enables annotation-based CPD suppression.
* If the --ignoreUsings flag is provided, using directives are filtered out.
*
*/
private static class CsTokenFilter extends BaseTokenFilter {
private enum UsingState {
KEYWORD, // just encountered the using keyword
IDENTIFIER, // just encountered an identifier or var keyword
}
private final boolean ignoreUsings;
private final boolean ignoreLiteralSequences;
private final boolean ignoreAttributes;
private boolean discardingUsings = false;
private boolean discardingNL = false;
private boolean isDiscardingAttribute = false;
private AntlrToken discardingLiteralsUntil = null;
private boolean discardCurrent = false;
CsTokenFilter(final TokenManager tokenManager, boolean ignoreUsings, boolean ignoreLiteralSequences, boolean ignoreAttributes) {
super(tokenManager);
this.ignoreUsings = ignoreUsings;
this.ignoreLiteralSequences = ignoreLiteralSequences;
this.ignoreAttributes = ignoreAttributes;
}
@Override
protected void analyzeToken(final AntlrToken currentToken) {
skipNewLines(currentToken);
}
@Override
protected void analyzeTokens(final AntlrToken currentToken, final Iterable remainingTokens) {
discardCurrent = false;
skipUsingDirectives(currentToken, remainingTokens);
skipLiteralSequences(currentToken, remainingTokens);
skipAttributes(currentToken);
}
private void skipUsingDirectives(final AntlrToken currentToken, final Iterable remainingTokens) {
if (ignoreUsings) {
final int type = currentToken.getKind();
if (type == CSharpLexer.USING && isUsingDirective(remainingTokens)) {
discardingUsings = true;
} else if (type == CSharpLexer.SEMICOLON && discardingUsings) {
discardingUsings = false;
discardCurrent = true;
}
}
}
private boolean isUsingDirective(final Iterable remainingTokens) {
UsingState usingState = UsingState.KEYWORD;
for (final AntlrToken token : remainingTokens) {
final int type = token.getKind();
if (usingState == UsingState.KEYWORD) {
// The previous token was a using keyword.
switch (type) {
case CSharpLexer.STATIC:
// Definitely a using directive.
// Example: using static System.Math;
return true;
case CSharpLexer.VAR:
// Definitely a using statement.
// Example: using var font1 = new Font("Arial", 10.0f);
return false;
case CSharpLexer.OPEN_PARENS:
// Definitely a using statement.
// Example: using (var font1 = new Font("Arial", 10.0f);
return false;
case CSharpLexer.IDENTIFIER:
// This is either a type for a using statement or an alias for a using directive.
// Example (directive): using Project = PC.MyCompany.Project;
// Example (statement): using Font font1 = new Font("Arial", 10.0f);
usingState = UsingState.IDENTIFIER;
break;
default:
// Some unknown construct?
return false;
}
} else if (usingState == UsingState.IDENTIFIER) {
// The previous token was an identifier.
switch (type) {
case CSharpLexer.ASSIGNMENT:
// Definitely a using directive.
// Example: using Project = PC.MyCompany.Project;
return true;
case CSharpLexer.IDENTIFIER:
// Definitely a using statement.
// Example: using Font font1 = new Font("Arial", 10.0f);
return false;
case CSharpLexer.DOT:
// This should be considered part of the same type; revert to previous state.
// Example (directive): using System.Text;
// Example (statement): using System.Drawing.Font font1 = new Font("Arial", 10.0f);
usingState = UsingState.KEYWORD;
break;
case CSharpLexer.SEMICOLON:
// End of using directive.
return true;
default:
// Some unknown construct?
return false;
}
}
}
return false;
}
private void skipNewLines(final AntlrToken currentToken) {
discardingNL = currentToken.getKind() == CSharpLexer.NL;
}
private void skipAttributes(final AntlrToken currentToken) {
if (ignoreAttributes) {
switch (currentToken.getKind()) {
case CSharpLexer.OPEN_BRACKET:
// Start of an attribute.
isDiscardingAttribute = true;
break;
case CSharpLexer.CLOSE_BRACKET:
// End of an attribute.
isDiscardingAttribute = false;
discardCurrent = true;
break;
default:
// Skip any other token.
break;
}
}
}
private void skipLiteralSequences(final AntlrToken currentToken, final Iterable remainingTokens) {
if (ignoreLiteralSequences) {
final int type = currentToken.getKind();
if (isDiscardingLiterals()) {
if (currentToken == discardingLiteralsUntil) { // NOPMD - intentional check for reference equality
discardingLiteralsUntil = null;
discardCurrent = true;
}
} else if (type == CSharpLexer.OPEN_BRACE) {
final AntlrToken finalToken = findEndOfSequenceOfLiterals(remainingTokens);
discardingLiteralsUntil = finalToken;
}
}
}
private AntlrToken findEndOfSequenceOfLiterals(final Iterable remainingTokens) {
boolean seenLiteral = false;
int braceCount = 0;
for (final AntlrToken token : remainingTokens) {
switch (token.getKind()) {
case CSharpLexer.BIN_INTEGER_LITERAL:
case CSharpLexer.CHARACTER_LITERAL:
case CSharpLexer.HEX_INTEGER_LITERAL:
case CSharpLexer.INTEGER_LITERAL:
case CSharpLexer.REAL_LITERAL:
seenLiteral = true;
break; // can be skipped; continue to the next token
case CSharpLexer.COMMA:
break; // can be skipped; continue to the next token
case CSharpLexer.OPEN_BRACE:
braceCount++;
break; // curly braces are allowed, as long as they're balanced
case CSharpLexer.CLOSE_BRACE:
braceCount--;
if (braceCount < 0) {
// end of the list; skip all contents
return seenLiteral ? token : null;
} else {
// curly braces are not yet balanced; continue to the next token
break;
}
default:
// some other token than the expected ones; this is not a sequence of literals
return null;
}
}
return null;
}
public boolean isDiscardingLiterals() {
return discardingLiteralsUntil != null;
}
@Override
protected boolean isLanguageSpecificDiscarding() {
return discardingUsings || discardingNL || isDiscardingAttribute || isDiscardingLiterals() || discardCurrent;
}
}
}