com.vladsch.flexmark.ast.util.Parsing Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of flexmark Show documentation
Show all versions of flexmark Show documentation
Core of flexmark-java (implementation of CommonMark for parsing markdown and rendering to HTML)
The newest version!
package com.vladsch.flexmark.ast.util;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.data.DataHolder;
import com.vladsch.flexmark.util.format.TableFormatOptions;
import com.vladsch.flexmark.util.misc.CharPredicate;
import com.vladsch.flexmark.util.sequence.Escaping;
import com.vladsch.flexmark.util.sequence.SequenceUtils;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.util.HashMap;
import java.util.function.Function;
import java.util.regex.Pattern;
import static com.vladsch.flexmark.parser.Parser.*;
public class Parsing {
final public static char INTELLIJ_DUMMY_IDENTIFIER_CHAR = TableFormatOptions.INTELLIJ_DUMMY_IDENTIFIER_CHAR;
final public static String INTELLIJ_DUMMY_IDENTIFIER = TableFormatOptions.INTELLIJ_DUMMY_IDENTIFIER;
// final public static String XML_NAMESPACE_START = "[_A-Za-z]";
// final public static String XML_NAMESPACE_CHAR = XML_NAME_SPACE_START + "|-|.|[0-9]";
final public static String XML_NAMESPACE_START = "[_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]"; // excluded [#x10000-#xEFFFF]
final public static String XML_NAMESPACE_CHAR = XML_NAMESPACE_START + "|[.0-9\u00B7\u0300-\u036F\u203F-\u2040-]";
final public static String XML_NAMESPACE = "(?:(?:" + XML_NAMESPACE_START + ")(?:" + XML_NAMESPACE_CHAR + ")*:)?";
// save options for others to use when only parsing instance is available
final public DataHolder options;
final private static String ST_EOL = "(?:\r\n|\r|\n)";
final private static String ST_ESCAPED_CHAR = "\\\\" + Escaping.ESCAPABLE;
final private static Pattern ST_LINK_LABEL = Pattern.compile("^\\[(?:[^\\\\\\[\\]]|" + ST_ESCAPED_CHAR + "|\\\\){0,999}\\]");
final private static String ST_LINK_TITLE_STRING = "(?:\"(" + ST_ESCAPED_CHAR + "|[^\"\\x00])*\"" +
'|' +
"'(" + ST_ESCAPED_CHAR + "|[^'\\x00])*'" +
'|' +
"\\((" + ST_ESCAPED_CHAR + "|[^)\\x00])*\\))";
final private static Pattern ST_LINK_TITLE = Pattern.compile("^" + ST_LINK_TITLE_STRING);
final public String EOL = ST_EOL;
final public String ESCAPED_CHAR = ST_ESCAPED_CHAR;
final public Pattern LINK_LABEL = ST_LINK_LABEL;
final public Pattern LINK_DESTINATION_ANGLES;
final public String LINK_TITLE_STRING = ST_LINK_TITLE_STRING;
final public Pattern LINK_TITLE = ST_LINK_TITLE;
final public Pattern LINK_DESTINATION;
final public Pattern LINK_DESTINATION_MATCHED_PARENS;
final public Pattern LINK_DESTINATION_MATCHED_PARENS_NOSP;
final private static String ST_EXCLUDED_0_TO_SPACE_IDI = "\u0000-\u001e\u0020";
final private static String ST_EXCLUDED_0_TO_SPACE_NO_IDI = "\u0000-\u0020";
final private static String ST_ADDITIONAL_CHARS_IDI = "\u001f";
final private static String ST_ADDITIONAL_CHARS_NO_IDI = "";
final private static String ST_ADDITIONAL_CHARS_SET_IDI = "[\u001f]";
final private static String ST_ADDITIONAL_CHARS_SET_NO_IDI = "";
final public static String ST_HTMLCOMMENT = "|";
final public static String ST_PROCESSINGINSTRUCTION = "[<][?].*?[?][>]";
final public static String ST_CDATA = "";
final public static String ST_SINGLEQUOTEDVALUE = "'[^']*'";
final public static String ST_DOUBLEQUOTEDVALUE = "\"[^\"]*\"";
final public String HTMLCOMMENT = ST_HTMLCOMMENT;
final public String PROCESSINGINSTRUCTION = ST_PROCESSINGINSTRUCTION;
final public String CDATA = ST_CDATA;
final public String SINGLEQUOTEDVALUE = ST_SINGLEQUOTEDVALUE;
final public String DOUBLEQUOTEDVALUE = ST_DOUBLEQUOTEDVALUE;
final private static String ST_ASCII_PUNCTUATION = "'!\"#\\$%&\\*\\+,\\-\\./:;=\\?@\\\\\\^_`\\|~";
final private static String ST_ASCII_OPEN_PUNCTUATION = "\\(<\\[\\{";
final private static String ST_ASCII_CLOSE_PUNCTUATION = "\\)>\\]\\}";
final private static Pattern ST_PUNCTUATION = Pattern.compile(
"^[" + ST_ASCII_PUNCTUATION + ST_ASCII_OPEN_PUNCTUATION + ST_ASCII_CLOSE_PUNCTUATION + "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]");
final private static Pattern ST_PUNCTUATION_OPEN = Pattern.compile(
"^[" + ST_ASCII_PUNCTUATION + ST_ASCII_OPEN_PUNCTUATION + "]|[\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]&&[^" + ST_ASCII_CLOSE_PUNCTUATION + "]");
final private static Pattern ST_PUNCTUATION_CLOSE = Pattern.compile(
"^[" + ST_ASCII_PUNCTUATION + ST_ASCII_CLOSE_PUNCTUATION + "]|[\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]&&[^" + ST_ASCII_OPEN_PUNCTUATION + "]");
final private static Pattern ST_PUNCTUATION_ONLY = Pattern.compile(
"^[" + ST_ASCII_PUNCTUATION + "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]&&[^" + ST_ASCII_OPEN_PUNCTUATION + ST_ASCII_CLOSE_PUNCTUATION + "]");
final private static Pattern ST_PUNCTUATION_OPEN_ONLY = Pattern.compile(
"^[" + ST_ASCII_OPEN_PUNCTUATION + "]");
final private static Pattern ST_PUNCTUATION_CLOSE_ONLY = Pattern.compile(
"^[" + ST_ASCII_CLOSE_PUNCTUATION + "]");
final public String ASCII_PUNCTUATION = ST_ASCII_PUNCTUATION;
final public String ASCII_OPEN_PUNCTUATION = ST_ASCII_OPEN_PUNCTUATION;
final public String ASCII_CLOSE_PUNCTUATION = ST_ASCII_CLOSE_PUNCTUATION;
final public Pattern PUNCTUATION = ST_PUNCTUATION;
final public Pattern PUNCTUATION_OPEN = ST_PUNCTUATION_OPEN;
final public Pattern PUNCTUATION_CLOSE = ST_PUNCTUATION_CLOSE;
final public Pattern PUNCTUATION_ONLY = ST_PUNCTUATION_ONLY;
final public Pattern PUNCTUATION_OPEN_ONLY = ST_PUNCTUATION_OPEN_ONLY;
final public Pattern PUNCTUATION_CLOSE_ONLY = ST_PUNCTUATION_CLOSE_ONLY;
final private static Pattern ST_ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE);
final private static Pattern ST_TICKS = Pattern.compile("`+");
final private static Pattern ST_TICKS_HERE = Pattern.compile("^`+");
final private static Pattern ST_SPNL = Pattern.compile("^(?:[ \t])*(?:" + ST_EOL + "(?:[ \t])*)?");
final private static Pattern ST_SPNL_URL = Pattern.compile("^(?:[ \t])*" + ST_EOL);
final private static Pattern ST_SPNI = Pattern.compile("^ {0,3}");
final private static Pattern ST_SP = Pattern.compile("^(?:[ \t])*");
final private static Pattern ST_REST_OF_LINE = Pattern.compile("^.*" + ST_EOL);
final private static Pattern ST_UNICODE_WHITESPACE_CHAR = Pattern.compile("^[\\p{Zs}\t\r\n\f]");
final private static Pattern ST_WHITESPACE = Pattern.compile("\\s+");
final private static Pattern ST_FINAL_SPACE = Pattern.compile(" *$");
final private static Pattern ST_LINE_END = Pattern.compile("^[ \t]*(?:" + ST_EOL + "|$)");
final private static Pattern ST_LINK_DESTINATION_ANGLES_SPC = Pattern.compile("^(?:[<](?:[^<> \\t\\n\\\\\\x00]" + '|' + ST_ESCAPED_CHAR + '|' + "\\\\| (?![\"']))*[>])");
final private static Pattern ST_LINK_DESTINATION_ANGLES_NO_SPC = Pattern.compile("^(?:[<](?:[^<> \\t\\n\\\\\\x00]" + '|' + ST_ESCAPED_CHAR + '|' + "\\\\)*[>])");
final public Pattern ESCAPABLE = ST_ESCAPABLE;
final public Pattern TICKS = ST_TICKS;
final public Pattern TICKS_HERE = ST_TICKS_HERE;
final public Pattern EMAIL_AUTOLINK;
final public Pattern AUTOLINK;
final public Pattern WWW_AUTOLINK;
final public Pattern SPNL = ST_SPNL;
final public Pattern SPNL_URL = ST_SPNL_URL;
final public Pattern SPNI = ST_SPNI;
final public Pattern SP = ST_SP;
final public Pattern REST_OF_LINE = ST_REST_OF_LINE;
final public Pattern UNICODE_WHITESPACE_CHAR = ST_UNICODE_WHITESPACE_CHAR;
final public Pattern WHITESPACE = ST_WHITESPACE;
final public Pattern FINAL_SPACE = ST_FINAL_SPACE;
final public Pattern LINE_END = ST_LINE_END;
// IntelliJDummyIdentifier dependent
final private static String ST_TAGNAME_IDI = "[A-Za-z" + ST_ADDITIONAL_CHARS_IDI + "][A-Za-z0-9" + ST_ADDITIONAL_CHARS_IDI + "-]*";
final private static String ST_TAGNAME_NO_IDI = "[A-Za-z" + ST_ADDITIONAL_CHARS_NO_IDI + "][A-Za-z0-9" + ST_ADDITIONAL_CHARS_NO_IDI + "-]*";
final private static String ST_UNQUOTEDVALUE_IDI = "[^\"'=<>{}`" + ST_EXCLUDED_0_TO_SPACE_IDI + "]+";
final private static String ST_UNQUOTEDVALUE_NO_IDI = "[^\"'=<>{}`" + ST_EXCLUDED_0_TO_SPACE_NO_IDI + "]+";
final private static String ST_ATTRIBUTENAME_IDI = "[a-zA-Z" + ST_ADDITIONAL_CHARS_IDI + "_:][a-zA-Z0-9" + ST_ADDITIONAL_CHARS_IDI + ":._-]*";
final private static String ST_ATTRIBUTENAME_NO_IDI = "[a-zA-Z" + ST_ADDITIONAL_CHARS_NO_IDI + "_:][a-zA-Z0-9" + ST_ADDITIONAL_CHARS_NO_IDI + ":._-]*";
final private static String ST_ATTRIBUTEVALUE_IDI = "(?:" + ST_UNQUOTEDVALUE_IDI + "|" + ST_SINGLEQUOTEDVALUE + "|" + ST_DOUBLEQUOTEDVALUE + ")";
final private static String ST_ATTRIBUTEVALUE_NO_IDI = "(?:" + ST_UNQUOTEDVALUE_NO_IDI + "|" + ST_SINGLEQUOTEDVALUE + "|" + ST_DOUBLEQUOTEDVALUE + ")";
final private static String ST_ATTRIBUTEVALUESPEC_IDI = "(?:" + "\\s*=" + "\\s*" + ST_ATTRIBUTEVALUE_IDI + ")";
final private static String ST_ATTRIBUTEVALUESPEC_NO_IDI = "(?:" + "\\s*=" + "\\s*" + ST_ATTRIBUTEVALUE_NO_IDI + ")";
final private static String ST_CLOSETAG_IDI = "" + ST_TAGNAME_IDI + "\\s*[>]";
final private static String ST_CLOSETAG_NO_IDI = "" + ST_TAGNAME_NO_IDI + "\\s*[>]";
final private static String ST_NS_CLOSETAG_IDI = "" + XML_NAMESPACE + ST_TAGNAME_IDI + "\\s*[>]";
final private static String ST_NS_CLOSETAG_NO_IDI = "" + XML_NAMESPACE + ST_TAGNAME_NO_IDI + "\\s*[>]";
final private static String ST_ATTRIBUTE_IDI = "(?:" + "\\s+" + ST_ATTRIBUTENAME_IDI + ST_ATTRIBUTEVALUESPEC_IDI + "?)";
final private static String ST_ATTRIBUTE_NO_IDI = "(?:" + "\\s+" + ST_ATTRIBUTENAME_NO_IDI + ST_ATTRIBUTEVALUESPEC_NO_IDI + "?)";
final private static String ST_DECLARATION_IDI = "]*>";
final private static String ST_DECLARATION_NO_IDI = "]*>";
final private static String ST_ENTITY_IDI = "&(?:#x[a-f0-9" + ST_ADDITIONAL_CHARS_IDI + "]{1,8}|#[0-9]{1,8}|[a-z" + ST_ADDITIONAL_CHARS_IDI + "][a-z0-9" + ST_ADDITIONAL_CHARS_IDI + "]{1,31});";
final private static String ST_ENTITY_NO_IDI = "&(?:#x[a-f0-9" + ST_ADDITIONAL_CHARS_NO_IDI + "]{1,8}|#[0-9]{1,8}|[a-z" + ST_ADDITIONAL_CHARS_NO_IDI + "][a-z0-9" + ST_ADDITIONAL_CHARS_NO_IDI + "]{1,31});";
final private static String ST_IN_BRACES_W_SP_IDI = "\\{\\{(?:[^{}\\\\" + ST_EXCLUDED_0_TO_SPACE_IDI + "]| |\t)*\\}\\}";
final private static String ST_IN_BRACES_W_SP_NO_IDI = "\\{\\{(?:[^{}\\\\" + ST_EXCLUDED_0_TO_SPACE_NO_IDI + "]| |\t)*\\}\\}";
final private static String ST_REG_CHAR_IDI = "[^\\\\()" + ST_EXCLUDED_0_TO_SPACE_IDI + "]";
final private static String ST_REG_CHAR_NO_IDI = "[^\\\\()" + ST_EXCLUDED_0_TO_SPACE_NO_IDI + "]";
final private static String ST_IN_MATCHED_PARENS_NOSP_IDI = "\\((" + ST_REG_CHAR_IDI + '|' + ST_ESCAPED_CHAR + ")*\\)";
final private static String ST_IN_MATCHED_PARENS_NOSP_NO_IDI = "\\((" + ST_REG_CHAR_NO_IDI + '|' + ST_ESCAPED_CHAR + ")*\\)";
final private static String ST_REG_CHAR_SP_IDI = "[^\\\\()" + ST_EXCLUDED_0_TO_SPACE_IDI + "]| (?![\"'])";
final private static String ST_REG_CHAR_SP_NO_IDI = "[^\\\\()" + ST_EXCLUDED_0_TO_SPACE_NO_IDI + "]| (?![\"'])";
final private static String ST_IN_MATCHED_PARENS_W_SP_IDI = "\\((" + ST_REG_CHAR_SP_IDI + '|' + ST_ESCAPED_CHAR + ")*\\)";
final private static String ST_IN_MATCHED_PARENS_W_SP_NO_IDI = "\\((" + ST_REG_CHAR_SP_NO_IDI + '|' + ST_ESCAPED_CHAR + ")*\\)";
final private static String ST_IN_PARENS_NOSP_IDI = "\\((" + ST_REG_CHAR_IDI + '|' + ST_ESCAPED_CHAR + ")*\\)";
final private static String ST_IN_PARENS_NOSP_NO_IDI = "\\((" + ST_REG_CHAR_NO_IDI + '|' + ST_ESCAPED_CHAR + ")*\\)";
final private static String ST_IN_PARENS_W_SP_IDI = "\\((" + ST_REG_CHAR_SP_IDI + '|' + ST_ESCAPED_CHAR + ")*\\)";
final private static String ST_IN_PARENS_W_SP_NO_IDI = "\\((" + ST_REG_CHAR_SP_NO_IDI + '|' + ST_ESCAPED_CHAR + ")*\\)";
final private static String ST_OPENTAG_IDI = "<" + ST_TAGNAME_IDI + ST_ATTRIBUTE_IDI + "*" + "\\s*/?>";
final private static String ST_OPENTAG_NO_IDI = "<" + ST_TAGNAME_NO_IDI + ST_ATTRIBUTE_NO_IDI + "*" + "\\s*/?>";
final private static String ST_NS_OPENTAG_IDI = "<" + XML_NAMESPACE + ST_TAGNAME_IDI + ST_ATTRIBUTE_IDI + "*" + "\\s*/?>";
final private static String ST_NS_OPENTAG_NO_IDI = "<" + XML_NAMESPACE + ST_TAGNAME_NO_IDI + ST_ATTRIBUTE_NO_IDI + "*" + "\\s*/?>";
final private static String ST_REG_CHAR_PARENS_IDI = "[^\\\\" + ST_EXCLUDED_0_TO_SPACE_IDI + "]";
final private static String ST_REG_CHAR_PARENS_NO_IDI = "[^\\\\" + ST_EXCLUDED_0_TO_SPACE_NO_IDI + "]";
final private static String ST_REG_CHAR_SP_PARENS_IDI = "[^\\\\" + ST_EXCLUDED_0_TO_SPACE_IDI + "]| (?![\"'])";
final private static String ST_REG_CHAR_SP_PARENS_NO_IDI = "[^\\\\" + ST_EXCLUDED_0_TO_SPACE_NO_IDI + "]| (?![\"'])";
final private static Pattern ST_ENTITY_HERE_IDI = Pattern.compile('^' + ST_ENTITY_IDI, Pattern.CASE_INSENSITIVE);
final private static Pattern ST_ENTITY_HERE_NO_IDI = Pattern.compile('^' + ST_ENTITY_NO_IDI, Pattern.CASE_INSENSITIVE);
final public String ADDITIONAL_CHARS;
final public String EXCLUDED_0_TO_SPACE;
final public String REG_CHAR;
final public String REG_CHAR_PARENS;
final public String REG_CHAR_SP;
final public String REG_CHAR_SP_PARENS;
final public String IN_PARENS_NOSP;
final public String IN_PARENS_W_SP;
final public String IN_MATCHED_PARENS_NOSP;
final public String IN_MATCHED_PARENS_W_SP;
final public String IN_BRACES_W_SP;
final public String DECLARATION;
final public String ENTITY;
final public String TAGNAME;
final public String ATTRIBUTENAME;
final public String UNQUOTEDVALUE;
final public String ATTRIBUTEVALUE;
final public String ATTRIBUTEVALUESPEC;
final public String ATTRIBUTE;
final public String OPENTAG;
final public String CLOSETAG;
final public String HTMLTAG;
final public Pattern ENTITY_HERE;
final public Pattern HTML_TAG;
final public Pattern LIST_ITEM_MARKER;
final public int CODE_BLOCK_INDENT;
final public boolean intellijDummyIdentifier;
final public boolean htmlForTranslator;
final public String translationHtmlInlineTagPattern;
final public String translationAutolinkTagPattern;
final public boolean spaceInLinkUrl;
final public boolean parseJekyllMacroInLinkUrl;
final public String itemPrefixChars;
final public boolean listsItemMarkerSpace;
final public boolean listsOrderedItemDotOnly;
final public boolean allowNameSpace;
private static class PatternTypeFlags {
final @Nullable Boolean intellijDummyIdentifier;
final @Nullable Boolean htmlForTranslator;
final @Nullable String translationHtmlInlineTagPattern;
final @Nullable String translationAutolinkTagPattern;
final @Nullable Boolean spaceInLinkUrl;
final @Nullable Boolean parseJekyllMacroInLinkUrl;
final @Nullable String itemPrefixChars;
final @Nullable Boolean listsItemMarkerSpace;
final @Nullable Boolean listsOrderedItemDotOnly;
final @Nullable Boolean allowNameSpace;
PatternTypeFlags(DataHolder options) {
this.intellijDummyIdentifier = Parser.INTELLIJ_DUMMY_IDENTIFIER.get(options);
this.htmlForTranslator = Parser.HTML_FOR_TRANSLATOR.get(options);
this.translationHtmlInlineTagPattern = Parser.TRANSLATION_HTML_INLINE_TAG_PATTERN.get(options);
this.translationAutolinkTagPattern = Parser.TRANSLATION_AUTOLINK_TAG_PATTERN.get(options);
this.spaceInLinkUrl = Parser.SPACE_IN_LINK_URLS.get(options);
this.parseJekyllMacroInLinkUrl = Parser.PARSE_JEKYLL_MACROS_IN_URLS.get(options);
this.itemPrefixChars = LISTS_ITEM_PREFIX_CHARS.get(options);
this.listsItemMarkerSpace = LISTS_ITEM_MARKER_SPACE.get(options);
this.listsOrderedItemDotOnly = LISTS_ORDERED_ITEM_DOT_ONLY.get(options);
this.allowNameSpace = HTML_ALLOW_NAME_SPACE.get(options);
}
public PatternTypeFlags(
@Nullable Boolean intellijDummyIdentifier
, @Nullable Boolean htmlForTranslator
, @Nullable String translationHtmlInlineTagPattern
, @Nullable String translationAutolinkTagPattern
, @Nullable Boolean spaceInLinkUrl
, @Nullable Boolean parseJekyllMacroInLinkUrl
, @Nullable String itemPrefixChars
, @Nullable Boolean listsItemMarkerSpace
, @Nullable Boolean listsOrderedItemDotOnly
, @Nullable Boolean allowNameSpace
) {
this.intellijDummyIdentifier = intellijDummyIdentifier;
this.htmlForTranslator = htmlForTranslator;
this.translationHtmlInlineTagPattern = translationHtmlInlineTagPattern;
this.translationAutolinkTagPattern = translationAutolinkTagPattern;
this.spaceInLinkUrl = spaceInLinkUrl;
this.parseJekyllMacroInLinkUrl = parseJekyllMacroInLinkUrl;
this.itemPrefixChars = itemPrefixChars;
this.listsItemMarkerSpace = listsItemMarkerSpace;
this.listsOrderedItemDotOnly = listsOrderedItemDotOnly;
this.allowNameSpace = allowNameSpace;
}
PatternTypeFlags withJekyllMacroInLinkUrl() { return new PatternTypeFlags(intellijDummyIdentifier, null, null, null, null, parseJekyllMacroInLinkUrl, null, null, null, null); }
PatternTypeFlags withJekyllMacroSpaceInLinkUrl() { return new PatternTypeFlags(intellijDummyIdentifier, null, null, null, spaceInLinkUrl, parseJekyllMacroInLinkUrl, null, null, null, null); }
PatternTypeFlags withHtmlTranslator() { return new PatternTypeFlags(intellijDummyIdentifier, htmlForTranslator, translationHtmlInlineTagPattern, translationAutolinkTagPattern, null, null, null, null, null, null); }
PatternTypeFlags withItemPrefixChars() { return new PatternTypeFlags(null, null, null, null, null, null, itemPrefixChars, listsItemMarkerSpace, listsOrderedItemDotOnly, null); }
PatternTypeFlags withAllowNameSpace() { return new PatternTypeFlags(null, null, null, null, null, null, null, null, null, allowNameSpace); }
/**
* Compare where null entry equals any other value
*
* @param o other
* @return true if equal
*/
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
PatternTypeFlags that = (PatternTypeFlags) o;
if (intellijDummyIdentifier != null && !intellijDummyIdentifier.equals(that.intellijDummyIdentifier)) return false;
if (htmlForTranslator != null && !htmlForTranslator.equals(that.htmlForTranslator)) return false;
if (translationHtmlInlineTagPattern != null && !translationHtmlInlineTagPattern.equals(that.translationHtmlInlineTagPattern)) return false;
if (translationAutolinkTagPattern != null && !translationAutolinkTagPattern.equals(that.translationAutolinkTagPattern)) return false;
if (spaceInLinkUrl != null && !spaceInLinkUrl.equals(that.spaceInLinkUrl)) return false;
if (parseJekyllMacroInLinkUrl != null && !parseJekyllMacroInLinkUrl.equals(that.parseJekyllMacroInLinkUrl)) return false;
if (itemPrefixChars != null && !itemPrefixChars.equals(that.itemPrefixChars)) return false;
if (listsItemMarkerSpace != null && !listsItemMarkerSpace.equals(that.listsItemMarkerSpace)) return false;
if (allowNameSpace != null && !allowNameSpace.equals(that.allowNameSpace)) return false;
return listsOrderedItemDotOnly == null || listsOrderedItemDotOnly.equals(that.listsOrderedItemDotOnly);
}
@Override
public int hashCode() {
int result = intellijDummyIdentifier != null ? intellijDummyIdentifier.hashCode() : 0;
result = 31 * result + (htmlForTranslator != null ? htmlForTranslator.hashCode() : 0);
result = 31 * result + (translationHtmlInlineTagPattern != null ? translationHtmlInlineTagPattern.hashCode() : 0);
result = 31 * result + (translationAutolinkTagPattern != null ? translationAutolinkTagPattern.hashCode() : 0);
result = 31 * result + (spaceInLinkUrl != null ? spaceInLinkUrl.hashCode() : 0);
result = 31 * result + (parseJekyllMacroInLinkUrl != null ? parseJekyllMacroInLinkUrl.hashCode() : 0);
result = 31 * result + (itemPrefixChars != null ? itemPrefixChars.hashCode() : 0);
result = 31 * result + (listsItemMarkerSpace != null ? listsItemMarkerSpace.hashCode() : 0);
result = 31 * result + (listsOrderedItemDotOnly != null ? listsOrderedItemDotOnly.hashCode() : 0);
result = 31 * result + (allowNameSpace != null ? allowNameSpace.hashCode() : 0);
return result;
}
}
final static HashMap> cachedPatterns = new HashMap<>();
static Pattern getCachedPattern(@NotNull String patternName, @NotNull PatternTypeFlags cachedTypeFlags, @NotNull Function factory) {
HashMap patternMap = cachedPatterns.computeIfAbsent(patternName, (key) -> new HashMap<>());
return patternMap.computeIfAbsent(cachedTypeFlags, factory);
}
public Parsing(DataHolder options) {
this.options = options;
this.CODE_BLOCK_INDENT = Parser.CODE_BLOCK_INDENT.get(options); // make sure this is consistent with lists settings
PatternTypeFlags patternTypeFlags = new PatternTypeFlags(options);
this.intellijDummyIdentifier = Boolean.TRUE.equals(patternTypeFlags.intellijDummyIdentifier);
this.htmlForTranslator = Boolean.TRUE.equals(patternTypeFlags.htmlForTranslator);
this.translationHtmlInlineTagPattern = patternTypeFlags.translationHtmlInlineTagPattern;
this.translationAutolinkTagPattern = patternTypeFlags.translationAutolinkTagPattern;
this.spaceInLinkUrl = Boolean.TRUE.equals(patternTypeFlags.spaceInLinkUrl);
this.parseJekyllMacroInLinkUrl = Boolean.TRUE.equals(patternTypeFlags.parseJekyllMacroInLinkUrl);
this.itemPrefixChars = patternTypeFlags.itemPrefixChars;
this.listsItemMarkerSpace = Boolean.TRUE.equals(patternTypeFlags.listsItemMarkerSpace);
this.listsOrderedItemDotOnly = Boolean.TRUE.equals(patternTypeFlags.listsOrderedItemDotOnly);
this.allowNameSpace = Boolean.TRUE.equals(patternTypeFlags.allowNameSpace);
if (intellijDummyIdentifier) {
this.ADDITIONAL_CHARS = ST_ADDITIONAL_CHARS_IDI;
this.EXCLUDED_0_TO_SPACE = ST_EXCLUDED_0_TO_SPACE_IDI;
this.REG_CHAR = ST_REG_CHAR_IDI;
this.REG_CHAR_PARENS = ST_REG_CHAR_PARENS_IDI;
this.REG_CHAR_SP = ST_REG_CHAR_SP_IDI;
this.REG_CHAR_SP_PARENS = ST_REG_CHAR_SP_PARENS_IDI;
this.IN_PARENS_NOSP = ST_IN_PARENS_NOSP_IDI;
this.IN_PARENS_W_SP = ST_IN_PARENS_W_SP_IDI;
this.IN_MATCHED_PARENS_NOSP = ST_IN_MATCHED_PARENS_NOSP_IDI;
this.IN_MATCHED_PARENS_W_SP = ST_IN_MATCHED_PARENS_W_SP_IDI;
this.IN_BRACES_W_SP = ST_IN_BRACES_W_SP_IDI;
this.DECLARATION = ST_DECLARATION_IDI;
this.ENTITY = ST_ENTITY_IDI;
this.TAGNAME = ST_TAGNAME_IDI;
this.ATTRIBUTENAME = ST_ATTRIBUTENAME_IDI;
this.UNQUOTEDVALUE = ST_UNQUOTEDVALUE_IDI;
this.ATTRIBUTEVALUE = ST_ATTRIBUTEVALUE_IDI;
this.ATTRIBUTEVALUESPEC = ST_ATTRIBUTEVALUESPEC_IDI;
this.ATTRIBUTE = ST_ATTRIBUTE_IDI;
this.OPENTAG = allowNameSpace ? ST_NS_OPENTAG_IDI : ST_OPENTAG_IDI;
this.CLOSETAG = allowNameSpace ? ST_NS_CLOSETAG_IDI : ST_CLOSETAG_IDI;
} else {
this.ADDITIONAL_CHARS = ST_ADDITIONAL_CHARS_NO_IDI;
this.EXCLUDED_0_TO_SPACE = ST_EXCLUDED_0_TO_SPACE_NO_IDI;
this.REG_CHAR = ST_REG_CHAR_NO_IDI;
this.REG_CHAR_PARENS = ST_REG_CHAR_PARENS_NO_IDI;
this.REG_CHAR_SP = ST_REG_CHAR_SP_NO_IDI;
this.REG_CHAR_SP_PARENS = ST_REG_CHAR_SP_PARENS_NO_IDI;
this.IN_PARENS_NOSP = ST_IN_PARENS_NOSP_NO_IDI;
this.IN_PARENS_W_SP = ST_IN_PARENS_W_SP_NO_IDI;
this.IN_MATCHED_PARENS_NOSP = ST_IN_MATCHED_PARENS_NOSP_NO_IDI;
this.IN_MATCHED_PARENS_W_SP = ST_IN_MATCHED_PARENS_W_SP_NO_IDI;
this.IN_BRACES_W_SP = ST_IN_BRACES_W_SP_NO_IDI;
this.DECLARATION = ST_DECLARATION_NO_IDI;
this.ENTITY = ST_ENTITY_NO_IDI;
this.TAGNAME = ST_TAGNAME_NO_IDI;
this.ATTRIBUTENAME = ST_ATTRIBUTENAME_NO_IDI;
this.UNQUOTEDVALUE = ST_UNQUOTEDVALUE_NO_IDI;
this.ATTRIBUTEVALUE = ST_ATTRIBUTEVALUE_NO_IDI;
this.ATTRIBUTEVALUESPEC = ST_ATTRIBUTEVALUESPEC_NO_IDI;
this.ATTRIBUTE = ST_ATTRIBUTE_NO_IDI;
this.OPENTAG = allowNameSpace ? ST_NS_OPENTAG_NO_IDI : ST_OPENTAG_NO_IDI;
this.CLOSETAG = allowNameSpace ? ST_NS_CLOSETAG_NO_IDI : ST_CLOSETAG_NO_IDI;
}
// init flag based patterns
this.LINK_DESTINATION_ANGLES = spaceInLinkUrl ? ST_LINK_DESTINATION_ANGLES_SPC : ST_LINK_DESTINATION_ANGLES_NO_SPC;
this.ENTITY_HERE = intellijDummyIdentifier ? ST_ENTITY_HERE_IDI : ST_ENTITY_HERE_NO_IDI;
// init dynamic patterns
synchronized (cachedPatterns) {
this.LINK_DESTINATION_MATCHED_PARENS_NOSP = getCachedPattern("LINK_DESTINATION_MATCHED_PARENS_NOSP", patternTypeFlags.withJekyllMacroInLinkUrl(), entry -> Pattern.compile(
"^(?:" + (parseJekyllMacroInLinkUrl ? IN_BRACES_W_SP + "|" : "")
+ (REG_CHAR + "|") +
ESCAPED_CHAR + "|\\\\|\\(|\\))*"));
this.LINK_DESTINATION = getCachedPattern("LINK_DESTINATION", patternTypeFlags.withJekyllMacroSpaceInLinkUrl(), entry -> Pattern.compile(
"^(?:" + (parseJekyllMacroInLinkUrl ? IN_BRACES_W_SP + "|" : "") +
(spaceInLinkUrl ? "(?:" + REG_CHAR_SP + ")|" : REG_CHAR + "|") +
ESCAPED_CHAR + "|\\\\|" + (spaceInLinkUrl ? IN_PARENS_W_SP : IN_PARENS_NOSP) + ")*"));
this.LINK_DESTINATION_MATCHED_PARENS = getCachedPattern("LINK_DESTINATION_MATCHED_PARENS", patternTypeFlags.withJekyllMacroSpaceInLinkUrl(), entry -> Pattern.compile(
"^(?:" + (parseJekyllMacroInLinkUrl ? IN_BRACES_W_SP + "|" : "")
+ (spaceInLinkUrl ? "(?:" + REG_CHAR_SP + ")|" : REG_CHAR + "|") +
ESCAPED_CHAR + "|\\\\|\\(|\\))*"));
this.EMAIL_AUTOLINK = getCachedPattern("EMAIL_AUTOLINK", patternTypeFlags.withHtmlTranslator(), entry -> Pattern.compile(
"^<(" +
"(?:[a-zA-Z0-9" + ADDITIONAL_CHARS + ".!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9" + ADDITIONAL_CHARS + "](?:[a-zA-Z0-9" + ADDITIONAL_CHARS + "-]{0,61}[a-zA-Z0-9" + ADDITIONAL_CHARS + "])?(?:\\.[a-zA-Z0-9" + ADDITIONAL_CHARS + "](?:[a-zA-Z0-9" + ADDITIONAL_CHARS + "-]{0,61}[a-zA-Z0-9" + ADDITIONAL_CHARS + "])?)*)" +
(htmlForTranslator ? "|(?:" + translationAutolinkTagPattern + ")" : "") +
")>"));
this.AUTOLINK = getCachedPattern("AUTOLINK", patternTypeFlags.withHtmlTranslator(), entry -> Pattern.compile(
"^<(" +
"(?:[a-zA-Z][a-zA-Z0-9" + ADDITIONAL_CHARS + ".+-]{1,31}:[^<>" + EXCLUDED_0_TO_SPACE + "]*)" +
(htmlForTranslator ? "|(?:" + translationAutolinkTagPattern + ")" : "") +
")>"));
this.WWW_AUTOLINK = getCachedPattern("WWW_AUTOLINK", patternTypeFlags.withHtmlTranslator(), entry -> Pattern.compile(
"^<(" +
"(?:w" + ADDITIONAL_CHARS + "?){3,3}\\.[^<>" + EXCLUDED_0_TO_SPACE + "]*" +
(htmlForTranslator ? "|(?:" + translationAutolinkTagPattern + ")" : "") +
")>"));
this.HTML_TAG = getCachedPattern("HTML_TAG", patternTypeFlags.withHtmlTranslator(), entry -> Pattern.compile('^' + ("(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT
+ "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA +
(htmlForTranslator ? "|<(?:" + translationHtmlInlineTagPattern + ")>|(?:" + translationHtmlInlineTagPattern + ")>" : "") + ")"), Pattern.CASE_INSENSITIVE));
this.LIST_ITEM_MARKER = getCachedPattern("LIST_ITEM_MARKER", patternTypeFlags.withItemPrefixChars(), entry -> {
if (listsItemMarkerSpace) {
if (listsOrderedItemDotOnly) {
return Pattern.compile("^([\\Q" + itemPrefixChars + "\\E])(?=[ \t])|^(\\d{1,9})([.])(?=[ \t])");
} else {
return Pattern.compile("^([\\Q" + itemPrefixChars + "\\E])(?=[ \t])|^(\\d{1,9})([.)])(?=[ \t])");
}
} else {
if (listsOrderedItemDotOnly) {
return Pattern.compile("^([\\Q" + itemPrefixChars + "\\E])(?= |\t|$)|^(\\d{1,9})([.])(?= |\t|$)");
} else {
return Pattern.compile("^([\\Q" + itemPrefixChars + "\\E])(?= |\t|$)|^(\\d{1,9})([.)])(?= |\t|$)");
}
}
});
}
this.HTMLTAG = this.HTML_TAG.pattern();
}
/**
* @deprecated in version (0.62.2), to be removed
*/
@Deprecated
public String EXCLUDED_0_TO_SPACE() {
return intellijDummyIdentifier ? ST_EXCLUDED_0_TO_SPACE_IDI : ST_EXCLUDED_0_TO_SPACE_NO_IDI;
}
/**
* @deprecated in version (0.62.2), to be removed
*/
@Deprecated
public String ADDITIONAL_CHARS() {
return intellijDummyIdentifier ? ST_ADDITIONAL_CHARS_IDI : ST_ADDITIONAL_CHARS_NO_IDI;
}
/**
* @deprecated in version (0.62.2), to be removed
*/
@Deprecated
public String ADDITIONAL_CHARS_SET(String quantifier) {
return intellijDummyIdentifier ? ST_ADDITIONAL_CHARS_SET_IDI + quantifier : ST_ADDITIONAL_CHARS_SET_NO_IDI;
}
public static int columnsToNextTabStop(int column) {
// Tab stop is 4
return 4 - (column % 4);
}
public static int findLineBreak(CharSequence s, int startIndex) {
return SequenceUtils.indexOfAny(s, CharPredicate.ANY_EOL, startIndex);
}
public static boolean isBlank(CharSequence s) {
return SequenceUtils.indexOfAnyNot(s, CharPredicate.BLANKSPACE) == -1;
}
public static boolean isLetter(CharSequence s, int index) {
int codePoint = Character.codePointAt(s, index);
return Character.isLetter(codePoint);
}
public static boolean isSpaceOrTab(CharSequence s, int index) {
return CharPredicate.SPACE_TAB.test(SequenceUtils.safeCharAt(s, index));
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy