
org.sonarsource.analyzer.commons.regex.finders.VerboseRegexFinder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sonar-regex-parsing Show documentation
Show all versions of sonar-regex-parsing Show documentation
Logic useful to read and analyze regular expressions
The newest version!
/*
* SonarSource Analyzers Regex Parsing Commons
* Copyright (C) 2009-2024 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonarsource.analyzer.commons.regex.finders;
import java.util.Collections;
import java.util.regex.Pattern;
import org.sonarsource.analyzer.commons.regex.RegexIssueReporter;
import org.sonarsource.analyzer.commons.regex.ast.CharacterClassElementTree;
import org.sonarsource.analyzer.commons.regex.ast.CharacterClassTree;
import org.sonarsource.analyzer.commons.regex.ast.CharacterClassUnionTree;
import org.sonarsource.analyzer.commons.regex.ast.EscapedCharacterClassTree;
import org.sonarsource.analyzer.commons.regex.ast.Quantifier;
import org.sonarsource.analyzer.commons.regex.ast.RegexBaseVisitor;
import org.sonarsource.analyzer.commons.regex.ast.RepetitionTree;
import static org.sonarsource.analyzer.commons.regex.ast.CharacterClassElementTree.Kind.CHARACTER_RANGE;
import static org.sonarsource.analyzer.commons.regex.ast.CharacterClassElementTree.Kind.ESCAPED_CHARACTER_CLASS;
import static org.sonarsource.analyzer.commons.regex.ast.CharacterClassElementTree.Kind.PLAIN_CHARACTER;
import static org.sonarsource.analyzer.commons.regex.ast.CharacterClassElementTree.Kind.UNION;
public class VerboseRegexFinder extends RegexBaseVisitor {
private static final String VERBOSE_QUANTIFIER_MESSAGE = "Use concise quantifier syntax '%s' instead of '%s'.";
private static final String VERBOSE_CHARACTER_CLASS_MESSAGE = "Use concise character class syntax '%s' instead of '%s'.";
private final RegexIssueReporter.ElementIssue regexElementIssueReporter;
public VerboseRegexFinder(RegexIssueReporter.ElementIssue regexElementIssueReporter) {
this.regexElementIssueReporter = regexElementIssueReporter;
}
@Override
public void visitCharacterClass(CharacterClassTree tree) {
checkBulkyAlphaNumericCharacterClass(tree);
checkBulkyNumericCharacterClass(tree);
checkBulkyAnyCharacterClass(tree);
super.visitCharacterClass(tree);
}
@Override
public void visitRepetition(RepetitionTree tree) {
checkBulkyQuantifier(tree.getQuantifier());
super.visitRepetition(tree);
}
private void checkBulkyAlphaNumericCharacterClass(CharacterClassTree tree) {
CharacterClassElementTree element = tree.getContents();
if (element.is(UNION) && ((CharacterClassUnionTree) element).getCharacterClasses().size() == 4) {
boolean hasDigit = false;
boolean hasLowerCase = false;
boolean hasUpperCase = false;
boolean hasUnderscore = false;
for (CharacterClassElementTree subElement : ((CharacterClassUnionTree) element).getCharacterClasses()) {
String raw = subElement.getText();
if (subElement.is(CHARACTER_RANGE)) {
hasDigit |= "0-9".equals(raw);
hasLowerCase |= "a-z".equals(raw);
hasUpperCase |= "A-Z".equals(raw);
} else if (subElement.is(PLAIN_CHARACTER)) {
hasUnderscore |= "_".equals(raw);
}
}
if (hasDigit && hasLowerCase && hasUpperCase && hasUnderscore) {
String expected = backslash(tree) + (tree.isNegated() ? "W" : "w");
reportVerboseCharacterClass(expected, tree);
}
}
}
private void checkBulkyNumericCharacterClass(CharacterClassTree tree) {
CharacterClassElementTree element = tree.getContents();
if (element.is(CHARACTER_RANGE) && "0-9".equals(element.getText())) {
String expected = backslash(tree) + (tree.isNegated() ? "D" : "d");
reportVerboseCharacterClass(expected, tree);
}
}
private void checkBulkyAnyCharacterClass(CharacterClassTree tree) {
CharacterClassElementTree element = tree.getContents();
if (tree.isNegated() || !element.is(UNION) || ((CharacterClassUnionTree) element).getCharacterClasses().size() != 2) {
return;
}
boolean hasLowerEscapeW = false;
boolean hasUpperEscapeW = false;
boolean hasLowerEscapeD = false;
boolean hasUpperEscapeD = false;
boolean hasLowerEscapeS = false;
boolean hasUpperEscapeS = false;
for (CharacterClassElementTree subElement : ((CharacterClassUnionTree) element).getCharacterClasses()) {
if (subElement.is(ESCAPED_CHARACTER_CLASS)) {
char type = ((EscapedCharacterClassTree) subElement).getType();
hasLowerEscapeW |= 'w' == type;
hasUpperEscapeW |= 'W' == type;
hasLowerEscapeD |= 'd' == type;
hasUpperEscapeD |= 'D' == type;
hasLowerEscapeS |= 's' == type;
hasUpperEscapeS |= 'S' == type;
}
}
boolean isBulkyAnyCharacterClass = (hasLowerEscapeW && hasUpperEscapeW)
|| (hasLowerEscapeD && hasUpperEscapeD)
|| (hasLowerEscapeS && hasUpperEscapeS && tree.activeFlags().contains(Pattern.DOTALL));
if (isBulkyAnyCharacterClass) {
reportVerboseCharacterClass(".", tree);
}
}
private void reportVerboseCharacterClass(String expected, CharacterClassTree tree) {
String message = String.format(VERBOSE_CHARACTER_CLASS_MESSAGE, expected, tree.getText());
regexElementIssueReporter.report(tree, message, null, Collections.emptyList());
}
private void checkBulkyQuantifier(Quantifier quantifier) {
String raw = quantifier.getText();
BulkyQuantifier bulkyQuantifier = null;
if (Pattern.matches("\\{0,1}\\??$", raw)) {
bulkyQuantifier = new BulkyQuantifier("?", "{0,1}");
} else if (Pattern.matches("\\{0,}\\??", raw)) {
bulkyQuantifier = new BulkyQuantifier("*", "{0,}");
} else if (Pattern.matches("\\{1,}\\??$", raw)) {
bulkyQuantifier = new BulkyQuantifier("+", "{1,}");
} else if (Pattern.matches("\\{(\\d+),\\1}\\??$", raw)) {
int min = quantifier.getMinimumRepetitions();
bulkyQuantifier = new BulkyQuantifier(String.format("{%d}", min), String.format("{%d,%d}", min, min));
}
if (bulkyQuantifier != null) {
regexElementIssueReporter.report(quantifier, bulkyQuantifier.getMessage(), null, Collections.emptyList());
}
}
private static class BulkyQuantifier{
private final String concise;
private final String verbose;
protected BulkyQuantifier(String concise, String verbose) {
this.concise = concise;
this.verbose = verbose;
}
protected String getMessage() {
return String.format(VERBOSE_QUANTIFIER_MESSAGE, concise, verbose);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy