net.sourceforge.plantuml.regexdiagram.RegexExpression Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of plantuml-lgpl Show documentation
Show all versions of plantuml-lgpl Show documentation
PlantUML is a component that allows to quickly write diagrams from text.
// THIS FILE HAS BEEN GENERATED BY A PREPROCESSOR.
/* +=======================================================================
* |
* | PlantUML : a free UML diagram generator
* |
* +=======================================================================
*
* (C) Copyright 2009-2024, Arnaud Roques
*
* Project Info: https://plantuml.com
*
* If you like this project or if you find it useful, you can support us at:
*
* https://plantuml.com/patreon (only 1$ per month!)
* https://plantuml.com/liberapay (only 1€ per month!)
* https://plantuml.com/paypal
*
*
* PlantUML is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* PlantUML distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library. If not, see .
*
* PlantUML can occasionally display sponsored or advertising messages. Those
* messages are usually generated on welcome or error images and never on
* functional diagrams.
* See https://plantuml.com/professional if you want to remove them
*
* Images (whatever their format : PNG, SVG, EPS...) generated by running PlantUML
* are owned by the author of their corresponding sources code (that is, their
* textual description in PlantUML language). Those images are not covered by
* this LGPL license.
*
* The generated images can then be used without any reference to the LGPL license.
* It is not even necessary to stipulate that they have been generated with PlantUML,
* although this will be appreciated by the PlantUML team.
*
* There is an exception : if the textual description in PlantUML language is also covered
* by any license, then the generated images are logically covered
* by the very same license.
*
* This is the IGY distribution (Install GraphViz by Yourself).
* You have to install GraphViz and to setup the GRAPHVIZ_DOT environment variable
* (see https://plantuml.com/graphviz-dot )
*
* Icons provided by OpenIconic : https://useiconic.com/open
* Archimate sprites provided by Archi : http://www.archimatetool.com
* Stdlib AWS provided by https://github.com/milo-minderbinder/AWS-PlantUML
* Stdlib Icons provided https://github.com/tupadr3/plantuml-icon-font-sprites
* ASCIIMathML (c) Peter Jipsen http://www.chapman.edu/~jipsen
* ASCIIMathML (c) David Lippman http://www.pierce.ctc.edu/dlippman
* CafeUndZopfli ported by Eugene Klyuchnikov https://github.com/eustas/CafeUndZopfli
* Brotli (c) by the Brotli Authors https://github.com/google/brotli
* Themes (c) by Brett Schwarz https://github.com/bschwarz/puml-themes
* Twemoji (c) by Twitter at https://twemoji.twitter.com/
*
*/
package net.sourceforge.plantuml.regexdiagram;
import java.util.ArrayList;
import java.util.List;
import net.sourceforge.plantuml.utils.CharInspector;
public class RegexExpression {
// ::remove folder when __HAXE__
public static List parse(CharInspector it) throws RegexParsingException {
final List result = new ArrayList<>();
while (true) {
final char current = it.peek(0);
if (current == '\0')
break;
// System.err.println("current=" + current);
if (isStartAnchor(it)) {
final String s = readAnchor(it);
result.add(new ReToken(ReTokenType.ANCHOR, s));
} else if (isEscapedChar(it)) {
result.add(new ReToken(ReTokenType.ESCAPED_CHAR, "" + it.peek(1)));
it.jump();
it.jump();
} else if (current == '|') {
result.add(new ReToken(ReTokenType.ALTERNATIVE, "|"));
it.jump();
} else if (isStartPosixGroup(it)) {
final String s = readGroupPosix(it);
result.add(new ReToken(ReTokenType.CLASS, s));
} else if (current == '[') {
final String s = readGroup(it);
result.add(new ReToken(ReTokenType.GROUP, s));
} else if (isStartComment(it)) {
skipComment(it);
} else if (isStartLookAhead(it)) {
final ReToken token = readLookAhead(it);
result.add(token);
result.add(new ReToken(ReTokenType.PARENTHESIS_OPEN, "("));
} else if (isStartLookBehind(it)) {
final ReToken token = readLookBehind(it);
result.add(token);
result.add(new ReToken(ReTokenType.PARENTHESIS_OPEN, "("));
} else if (isStartNamedCapturingGroup(it)) {
final ReToken token = readNamedGroup(it);
result.add(token);
result.add(new ReToken(ReTokenType.PARENTHESIS_OPEN, "("));
} else if (isStartOpenParenthesis(it)) {
final ReToken token = readOpenParenthesis(it);
result.add(token);
} else if (current == ')') {
result.add(new ReToken(ReTokenType.PARENTHESIS_CLOSE, ")"));
it.jump();
} else if (isStartQuantifier(it)) {
final String s = readQuantifier(it);
result.add(new ReToken(ReTokenType.QUANTIFIER, s));
} else if (isStartOctalEscape(it)) {
final String s = readUnicodeOrOctalEscape(it, 4);
result.add(new ReToken(ReTokenType.CLASS, s));
} else if (isStartUnicodeEscape(it)) {
final String s = readUnicodeOrOctalEscape(it, 5);
result.add(new ReToken(ReTokenType.CLASS, s));
} else if (isStartUnicodeClass(it)) {
final String s = readUnicodeClass(it);
result.add(new ReToken(ReTokenType.CLASS, s));
} else if (isStartClass(it)) {
final String s = readClass(it);
result.add(new ReToken(ReTokenType.CLASS, s));
} else if (isSimpleLetter(current)) {
result.add(new ReToken(ReTokenType.SIMPLE_CHAR, "" + current));
it.jump();
} else {
throw new IllegalStateException();
}
}
// System.err.println("result=" + result);
return result;
}
private static boolean isStartLookAhead(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '(' && it.peek(1) == '?' && (it.peek(2) == '=' || it.peek(2) == '!'))
return true;
return false;
}
private static boolean isStartLookBehind(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '(' && it.peek(1) == '?' && it.peek(2) == '<' && (it.peek(3) == '=' || it.peek(3) == '!'))
return true;
return false;
}
private static boolean isStartOpenParenthesis(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '(')
return true;
return false;
}
private static boolean isStartPosixGroup(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '[' && it.peek(1) == '[' && it.peek(2) == ':')
return true;
return false;
}
private static boolean isStartNamedCapturingGroup(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '(' && it.peek(1) == '?' && it.peek(2) == '<') {
int i = 3;
while (it.peek(i) != 0) {
if (it.peek(i) == '>' && i == 3)
return false;
if (it.peek(i) == '>')
return true;
if (Character.isLetter(it.peek(i)) == false)
return false;
i++;
}
}
return false;
}
private static boolean isStartComment(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '(' && it.peek(1) == '?' && it.peek(2) == '#')
return true;
return false;
}
private static void skipComment(CharInspector it) throws RegexParsingException {
it.jump();
it.jump();
it.jump();
final StringBuilder comment = new StringBuilder();
while (true) {
if (it.peek(0) == 0)
throw new RegexParsingException("Unclosed comment");
if (it.peek(0) == ')') {
it.jump();
return;
}
comment.append(it.peek(0));
it.jump();
}
}
private static ReToken readLookAhead(CharInspector it) throws RegexParsingException {
it.jump();
it.jump();
final char ch = it.peek(0);
it.jump();
return new ReToken(ReTokenType.LOOK_AHEAD, "?" + ch);
}
private static ReToken readLookBehind(CharInspector it) throws RegexParsingException {
it.jump();
it.jump();
it.jump();
final char ch = it.peek(0);
it.jump();
return new ReToken(ReTokenType.LOOK_BEHIND, "?<" + ch);
}
private static ReToken readNamedGroup(CharInspector it) throws RegexParsingException {
it.jump();
it.jump();
it.jump();
final StringBuilder namedGroup = new StringBuilder();
while (true) {
if (it.peek(0) == 0)
throw new RegexParsingException("Unclosed named capturing group");
if (it.peek(0) == '>') {
it.jump();
return new ReToken(ReTokenType.NAMED_GROUP, namedGroup.toString());
}
namedGroup.append(it.peek(0));
it.jump();
}
}
private static ReToken readOpenParenthesis(CharInspector it) {
final char current0 = it.peek(0);
it.jump();
final StringBuilder result = new StringBuilder();
result.append(current0);
if (it.peek(0) == '?' && it.peek(1) == ':') {
it.jump();
it.jump();
result.append("?:");
}
if (it.peek(0) == '?' && it.peek(1) == '!') {
it.jump();
it.jump();
result.append("?!");
}
return new ReToken(ReTokenType.PARENTHESIS_OPEN, result.toString());
}
private static boolean isStartQuantifier(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '*' || current0 == '+' || current0 == '?' || current0 == '{')
return true;
return false;
}
private static String readQuantifier(CharInspector it) throws RegexParsingException {
final char current0 = it.peek(0);
it.jump();
final StringBuilder tmp = new StringBuilder();
tmp.append(current0);
if (current0 == '{')
while (it.peek(0) != 0) {
final char ch = it.peek(0);
tmp.append(ch);
it.jump();
if (ch == '}')
break;
}
if (it.peek(0) == '?') {
tmp.append('?');
it.jump();
}
// System.err.println("RESULT=" + tmp);
final String result = tmp.toString();
if (result.startsWith("{") && result.matches("^\\{[0-9,]+\\}$") == false)
throw new RegexParsingException("Bad quantifier " + result);
return result;
}
private static boolean isEscapedChar(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '\\') {
final char current1 = it.peek(1);
if (current1 == '.' || current1 == '*' || current1 == '\\' || current1 == '?' || current1 == '^'
|| current1 == '$' || current1 == '|' || current1 == '(' || current1 == ')' || current1 == '['
|| current1 == ']' || current1 == '{' || current1 == '}' || current1 == '<' || current1 == '>')
return true;
}
return false;
}
private static String readGroupPosix(CharInspector it) {
it.jump();
it.jump();
it.jump();
final StringBuilder result = new StringBuilder(":");
while (it.peek(0) != 0) {
char ch = it.peek(0);
it.jump();
result.append(ch);
if (ch == ':')
break;
}
it.jump();
it.jump();
return result.toString();
}
private static String readGroup(CharInspector it) {
final char current0 = it.peek(0);
if (current0 != '[')
throw new IllegalStateException();
it.jump();
final StringBuilder result = new StringBuilder();
while (it.peek(0) != 0) {
char ch = it.peek(0);
it.jump();
if (ch == ']')
break;
result.append(ch);
if (ch == '\\') {
ch = it.peek(0);
it.jump();
result.append(ch);
}
}
return result.toString();
}
private static String readUnicodeClass(CharInspector it) throws RegexParsingException {
final char current0 = it.peek(0);
if (current0 != '\\')
throw new IllegalStateException();
it.jump();
final StringBuilder result = new StringBuilder();
result.append(current0);
while (it.peek(0) != 0) {
final char ch = it.peek(0);
it.jump();
result.append(ch);
if (ch == '}')
return result.toString();
}
throw new RegexParsingException("Unexpected end of data");
}
private static String readUnicodeOrOctalEscape(CharInspector it, int nb) throws RegexParsingException {
final char current0 = it.peek(0);
if (current0 != '\\')
throw new IllegalStateException();
it.jump();
final StringBuilder result = new StringBuilder();
result.append(current0);
for (int i = 0; i < nb; i++) {
final char ch = it.peek(0);
if (ch == 0)
throw new RegexParsingException("Unexpected end of data");
result.append(ch);
it.jump();
}
return result.toString();
}
private static String readClass(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '.') {
it.jump();
return "" + current0;
}
if (current0 == '\\') {
it.jump();
final String result = "" + current0 + it.peek(0);
it.jump();
return result;
}
throw new IllegalStateException();
}
private static boolean isStartClass(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '.')
return true;
if (current0 == '\\')
return true;
return false;
}
private static boolean isStartUnicodeClass(CharInspector it) {
if (it.peek(0) == '\\' && it.peek(1) == 'p' && it.peek(2) == '{')
return true;
if (it.peek(0) == '\\' && it.peek(1) == 'x' && it.peek(2) == '{')
return true;
return false;
}
private static boolean isStartUnicodeEscape(CharInspector it) {
if (it.peek(0) == '\\' && it.peek(1) == 'u')
return true;
return false;
}
private static boolean isStartOctalEscape(CharInspector it) {
if (it.peek(0) == '\\' && it.peek(1) == '0')
return true;
return false;
}
private static boolean isSimpleLetter(char ch) {
if (ch == '\\' || ch == '.')
return false;
return true;
}
private static boolean isStartAnchor(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '^' || current0 == '$')
return true;
if (current0 == '\\') {
final char current1 = it.peek(1);
if (current1 == 'A' || current1 == 'Z' || current1 == 'z' || current1 == 'G' || current1 == 'b'
|| current1 == 'B')
return true;
}
return false;
}
private static String readAnchor(CharInspector it) {
final char current0 = it.peek(0);
if (current0 == '^' || current0 == '$') {
it.jump();
return "" + current0;
}
if (current0 == '\\') {
it.jump();
final String result = "" + current0 + it.peek(0);
it.jump();
return result;
}
throw new IllegalStateException();
}
}