
com.greenpepper.shaded.com.vladsch.flexmark.internal.InlineParserImpl Maven / Gradle / Ivy
package com.greenpepper.shaded.com.vladsch.flexmark.internal;
import com.greenpepper.shaded.com.vladsch.flexmark.ast.*;
import com.greenpepper.shaded.com.vladsch.flexmark.ast.util.Parsing;
import com.greenpepper.shaded.com.vladsch.flexmark.ast.util.ReferenceRepository;
import com.greenpepper.shaded.com.vladsch.flexmark.ast.util.TextNodeConverter;
import com.greenpepper.shaded.com.vladsch.flexmark.internal.inline.AsteriskDelimiterProcessor;
import com.greenpepper.shaded.com.vladsch.flexmark.internal.inline.UnderscoreDelimiterProcessor;
import com.greenpepper.shaded.com.vladsch.flexmark.parser.*;
import com.greenpepper.shaded.com.vladsch.flexmark.parser.block.CharacterNodeFactory;
import com.greenpepper.shaded.com.vladsch.flexmark.parser.block.ParagraphPreProcessor;
import com.greenpepper.shaded.com.vladsch.flexmark.parser.block.ParserState;
import com.greenpepper.shaded.com.vladsch.flexmark.parser.delimiter.DelimiterProcessor;
import com.greenpepper.shaded.com.vladsch.flexmark.util.dependency.DependencyHandler;
import com.greenpepper.shaded.com.vladsch.flexmark.util.dependency.ResolvedDependencies;
import com.greenpepper.shaded.com.vladsch.flexmark.util.html.Escaping;
import com.greenpepper.shaded.com.vladsch.flexmark.util.options.DataHolder;
import com.greenpepper.shaded.com.vladsch.flexmark.util.sequence.BasedSequence;
import com.greenpepper.shaded.com.vladsch.flexmark.util.sequence.SegmentedSequence;
import java.util.*;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class InlineParserImpl implements InlineParser, ParagraphPreProcessor {
static class InlineParserExtensionDependencies extends ResolvedDependencies {
public InlineParserExtensionDependencies(List dependentStages) {
super(dependentStages);
}
}
static class InlineParserDependencyStage {
private final List dependents;
public InlineParserDependencyStage(List dependents) {
// compute mappings
this.dependents = dependents;
}
}
static class InlineParserExtensionDependencyHandler extends DependencyHandler {
@Override
protected Class extends InlineParserExtensionFactory> getDependentClass(InlineParserExtensionFactory dependent) {
return dependent.getClass();
}
@Override
protected InlineParserExtensionDependencies createResolvedDependencies(List stages) {
return new InlineParserExtensionDependencies(stages);
}
@Override
protected InlineParserDependencyStage createStage(List dependents) {
return new InlineParserDependencyStage(dependents);
}
}
public static Map> calculateInlineParserExtensions(DataHolder options, List extensionFactories) {
Map> extensionMap = new HashMap>();
for (InlineParserExtensionFactory factory : extensionFactories) {
CharSequence chars = factory.getCharacters();
for (int i = 0; i < chars.length(); i++) {
char c = chars.charAt(i);
List list = extensionMap.get(c);
if (list == null) {
list = new ArrayList();
extensionMap.put(c, list);
}
list.add(factory);
}
}
InlineParserExtensionDependencyHandler resolver = new InlineParserExtensionDependencyHandler();
Map> extensions = new HashMap>();
for (Character c : extensionMap.keySet()) {
List list = extensionMap.get(c);
List resolvedList = list;
if (list.size() > 1) {
InlineParserExtensionDependencies dependencies = resolver.resolveDependencies(list);
resolvedList = new ArrayList(list.size());
for (InlineParserDependencyStage stage : dependencies.getDependentStages()) {
resolvedList.addAll(stage.dependents);
}
}
extensions.put(c, resolvedList);
}
return extensions;
}
protected final BitSet originalSpecialCharacters;
protected final BitSet delimiterCharacters;
protected final Map delimiterProcessors;
protected final LinkRefProcessorData linkRefProcessorsData;
protected List linkRefProcessors = null;
protected Map> inlineParserExtensions = null;
protected List inlineParserExtensionFactories = null;
// used to temporarily override handling of special characters by custom ParagraphPreProcessors
protected BitSet specialCharacters;
protected BitSet customCharacters = null;
protected Map customSpecialCharacterFactoryMap = null;
protected ArrayList customSpecialCharacterNodes = null;
/**
* Link references by ID, needs to be built up using parseReference before calling parse.
*/
protected ReferenceRepository referenceRepository;
protected Node block;
protected BasedSequence input;
protected int index;
/**
* Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different
* from the algorithm described in the spec.)
*/
protected Delimiter lastDelimiter;
/**
* Top opening bracket ([
or ![)
).
*/
private Bracket lastBracket;
protected ArrayList currentText;
protected Document document;
protected final InlineParserOptions options;
@Override
public void initializeDocument(Parsing parsing, Document document) {
this.document = document;
this.referenceRepository = document.get(Parser.REFERENCES);
this.myParsing = parsing;
linkRefProcessors = new ArrayList(linkRefProcessorsData.processors.size());
for (LinkRefProcessorFactory factory : linkRefProcessorsData.processors) {
linkRefProcessors.add(factory.create(document));
}
// create custom processors
if (inlineParserExtensionFactories != null) {
Map> extensions = calculateInlineParserExtensions(document, inlineParserExtensionFactories);
inlineParserExtensions = new HashMap>(extensions.size());
for (Map.Entry> entry : extensions.entrySet()) {
List extensionList = new ArrayList(entry.getValue().size());
for (InlineParserExtensionFactory factory : entry.getValue()) {
extensionList.add(factory.create(this));
}
inlineParserExtensions.put(entry.getKey(), extensionList);
// set it as special character
specialCharacters.set(entry.getKey());
}
}
}
@Override
public void finalizeDocument(Document document) {
assert this.referenceRepository == document.get(Parser.REFERENCES);
if (inlineParserExtensions != null) {
for (List extensionList : inlineParserExtensions.values()) {
for (InlineParserExtension extension : extensionList) {
extension.finalizeDocument(this);
}
}
}
}
public ArrayList getCurrentText() {
if (currentText == null) {
currentText = new ArrayList();
}
return currentText;
}
protected Parsing myParsing;
public InlineParserImpl(
DataHolder options,
BitSet specialCharacters,
BitSet delimiterCharacters,
Map delimiterProcessors,
LinkRefProcessorData linkRefProcessorsData,
List inlineParserExtensionFactories
) {
this.myParsing = new Parsing(options);
this.options = new InlineParserOptions(options);
this.delimiterProcessors = delimiterProcessors;
this.linkRefProcessorsData = linkRefProcessorsData;
this.delimiterCharacters = delimiterCharacters;
this.originalSpecialCharacters = specialCharacters;
this.specialCharacters = specialCharacters;
this.inlineParserExtensionFactories = !inlineParserExtensionFactories.isEmpty() ? inlineParserExtensionFactories : null;
}
public static BitSet calculateDelimiterCharacters(DataHolder options, Set characters) {
BitSet bitSet = new BitSet();
for (Character character : characters) {
bitSet.set(character);
}
return bitSet;
}
public static BitSet calculateSpecialCharacters(DataHolder options, BitSet delimiterCharacters) {
BitSet bitSet = new BitSet();
bitSet.or(delimiterCharacters);
bitSet.set('\n');
bitSet.set('`');
bitSet.set('[');
bitSet.set(']');
bitSet.set('\\');
bitSet.set('!');
bitSet.set('<');
bitSet.set('&');
return bitSet;
}
public static Map calculateDelimiterProcessors(DataHolder options, List delimiterProcessors) {
Map map = new HashMap();
//addDelimiterProcessors(Arrays.asList(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map);
if (options.get(Parser.ASTERISK_DELIMITER_PROCESSOR)) {
addDelimiterProcessors(Collections.singletonList(new AsteriskDelimiterProcessor(Parser.STRONG_WRAPS_EMPHASIS.getFrom(options))), map);
}
if (options.get(Parser.UNDERSCORE_DELIMITER_PROCESSOR)) {
addDelimiterProcessors(Collections.singletonList(new UnderscoreDelimiterProcessor(Parser.STRONG_WRAPS_EMPHASIS.getFrom(options))), map);
}
addDelimiterProcessors(delimiterProcessors, map);
return map;
}
// nothing to add, this is for extensions.
public static LinkRefProcessorData calculateLinkRefProcessors(final DataHolder options, List linkRefProcessors) {
if (linkRefProcessors.size() > 1) {
List sortedLinkProcessors = new ArrayList(linkRefProcessors.size());
sortedLinkProcessors.addAll(linkRefProcessors);
final int[] maxNestingLevelRef = new int[] { 0 };
Collections.sort(sortedLinkProcessors, new Comparator() {
@Override
public int compare(LinkRefProcessorFactory p1, LinkRefProcessorFactory p2) {
int lv1 = p1.getBracketNestingLevel(options);
int lv2 = p2.getBracketNestingLevel(options);
int maxLevel = maxNestingLevelRef[0];
if (maxLevel < lv1) maxLevel = lv1;
if (maxLevel < lv2) maxLevel = lv2;
maxNestingLevelRef[0] = maxLevel;
if (lv1 == lv2) {
// processors that want exclamation before the [ have higher priority
if (!p1.getWantExclamationPrefix(options)) lv1++;
if (!p2.getWantExclamationPrefix(options)) lv2++;
}
return lv1 - lv2;
}
});
int maxNestingLevel = maxNestingLevelRef[0];
int maxReferenceLinkNesting = maxNestingLevel;
int[] nestingLookup = new int[maxNestingLevel + 1];
maxNestingLevel = -1;
int index = 0;
for (LinkRefProcessorFactory linkProcessor : sortedLinkProcessors) {
if (maxNestingLevel < linkProcessor.getBracketNestingLevel(options)) {
maxNestingLevel = linkProcessor.getBracketNestingLevel(options);
nestingLookup[maxNestingLevel] = index;
if (maxNestingLevel == maxReferenceLinkNesting) break;
}
index++;
}
return new LinkRefProcessorData(sortedLinkProcessors, maxReferenceLinkNesting, nestingLookup);
} else if (linkRefProcessors.size() > 0) {
int maxNesting = linkRefProcessors.get(0).getBracketNestingLevel(options);
int[] nestingLookup = new int[maxNesting + 1];
return new LinkRefProcessorData(linkRefProcessors, maxNesting, nestingLookup);
} else {
return new LinkRefProcessorData(linkRefProcessors, 0, new int[0]);
}
}
private static void addDelimiterProcessors(List extends DelimiterProcessor> delimiterProcessors, Map map) {
for (DelimiterProcessor delimiterProcessor : delimiterProcessors) {
char opening = delimiterProcessor.getOpeningCharacter();
addDelimiterProcessorForChar(opening, delimiterProcessor, map);
char closing = delimiterProcessor.getClosingCharacter();
if (opening != closing) {
addDelimiterProcessorForChar(closing, delimiterProcessor, map);
}
}
}
private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterProcessor toAdd, Map delimiterProcessors) {
DelimiterProcessor existing = delimiterProcessors.put(delimiterChar, toAdd);
if (existing != null) {
throw new IllegalArgumentException("Delimiter processor conflict with delimiter char '" + delimiterChar + "'");
}
}
@Override
public BasedSequence getInput() {
return input;
}
@Override
public int getIndex() {
return index;
}
@Override
public void setIndex(final int index) {
this.index = index;
}
@Override
public Delimiter getLastDelimiter() {
return lastDelimiter;
}
@Override
public Bracket getLastBracket() {
return lastBracket;
}
@Override
public Document getDocument() {
return document;
}
@Override
public InlineParserOptions getOptions() {
return options;
}
@Override
public Parsing getParsing() {
return myParsing;
}
@Override
public Node getBlock() {
return block;
}
@Override
public List parseCustom(BasedSequence input, Node node, BitSet customCharacters, Map nodeFactoryMap) {
this.customCharacters = customCharacters;
this.specialCharacters.or(customCharacters);
this.customSpecialCharacterFactoryMap = nodeFactoryMap;
this.customSpecialCharacterNodes = null;
parse(input, node);
this.specialCharacters = this.originalSpecialCharacters;
this.customSpecialCharacterFactoryMap = null;
this.customCharacters = null;
return this.customSpecialCharacterNodes;
}
/**
* Parse content in block into inline children, using reference map to resolve references.
*/
@Override
public void parse(BasedSequence content, Node block) {
this.block = block;
this.input = content.trim();
this.index = 0;
this.lastDelimiter = null;
this.lastBracket = null;
boolean moreToParse;
do {
moreToParse = parseInline();
} while (moreToParse);
processDelimiters(null);
flushTextNode();
if (inlineParserExtensions != null) {
for (List extensionList : inlineParserExtensions.values()) {
for (InlineParserExtension extension : extensionList) {
extension.finalizeBlock(this);
}
}
}
// merge nodes if needed
mergeTextNodes(block.getFirstChild(), block.getLastChild());
}
@Override
public void mergeTextNodes(Node fromNode, Node toNode) {
Text first = null;
Text last = null;
Node node = fromNode;
while (node != null) {
if (node instanceof Text) {
Text text = (Text) node;
if (first == null) {
first = text;
}
last = text;
} else {
mergeIfNeeded(first, last);
first = null;
last = null;
}
if (node == toNode) {
break;
}
node = node.getNext();
}
mergeIfNeeded(first, last);
}
@Override
public void mergeIfNeeded(Text first, Text last) {
if (first != null && last != null && first != last) {
ArrayList sb = new ArrayList();
sb.add(first.getChars());
Node node = first.getNext();
Node stop = last.getNext();
while (node != stop) {
sb.add(node.getChars());
Node unlink = node;
node = node.getNext();
unlink.unlink();
}
BasedSequence literal = SegmentedSequence.of(sb, first.getChars());
first.setChars(literal);
}
}
/*
* ParagraphPreProcessor implementation
*/
@Override
public int preProcessBlock(Paragraph block, ParserState state) {
BasedSequence contentChars = block.getChars();
// try parsing the beginning as link reference definitions:
int leadingSpaces = contentChars.countLeading(BasedSequence.WHITESPACE_NO_EOL_CHARS);
int length = contentChars.length();
while (leadingSpaces <= 3 && length > 3 + leadingSpaces && contentChars.charAt(leadingSpaces) == '[') {
if (leadingSpaces > 0) {
contentChars = contentChars.subSequence(leadingSpaces, length);
length -= leadingSpaces;
}
int pos = parseReference(block, contentChars);
if (pos == 0) break;
contentChars = contentChars.subSequence(pos, length);
length = contentChars.length();
leadingSpaces = contentChars.countLeading(BasedSequence.WHITESPACE_NO_EOL_CHARS);
}
return contentChars.getStartOffset() - block.getChars().getStartOffset();
}
@Override
public void moveNodes(Node fromNode, Node toNode) {
Node next = fromNode.getNext();
while (next != null) {
Node nextNode = next.getNext();
next.unlink();
fromNode.appendChild(next);
if (next == toNode) break;
next = nextNode;
}
fromNode.setCharsFromContent();
}
/**
* Attempt to parse a reference definition, modifying the internal reference map.
*
* @param block the block whose text is being parsed for references
* @param s sequence of the blocks characters
* @return number of characters were parsed as a reference from the start of the sequence, {@code 0} if none
*/
protected int parseReference(Block block, BasedSequence s) {
this.input = s;
this.index = 0;
BasedSequence dest;
BasedSequence title;
int matchChars;
int startIndex = index;
// label:
matchChars = parseLinkLabel();
if (matchChars == 0) {
return 0;
}
// colon:
if (peek() != ':') {
return 0;
}
BasedSequence rawLabel = input.subSequence(0, matchChars + 1);
index++;
// link url
spnl();
dest = parseLinkDestination();
if (dest == null || dest.length() == 0) {
return 0;
}
int beforeTitle = index;
spnl();
title = parseLinkTitle();
if (title == null) {
// rewind before spaces
index = beforeTitle;
}
boolean atLineEnd = true;
if (index != input.length() && match(myParsing.LINE_END) == null) {
if (title == null) {
atLineEnd = false;
} else {
// the potential title we found is not at the line end,
// but it could still be a legal link reference if we
// discard the title
title = null;
// rewind before spaces
index = beforeTitle;
// and instead check if the link URL is at the line end
atLineEnd = match(myParsing.LINE_END) != null;
}
}
if (!atLineEnd) {
return 0;
}
String normalizedLabel = Escaping.normalizeReferenceChars(rawLabel, true);
if (normalizedLabel.isEmpty()) {
return 0;
}
Reference reference = new Reference(rawLabel, dest, title);
// NOTE: whether first or last reference is kept is defined by the repository modify behavior setting
// for CommonMark this is set in the initializeDocument() function of the inline parser
referenceRepository.put(normalizedLabel, reference);
block.insertBefore(reference);
return index - startIndex;
}
public void appendText(BasedSequence text) {
getCurrentText().add(text);
}
@Override
public void appendText(BasedSequence text, int beginIndex, int endIndex) {
getCurrentText().add(text.subSequence(beginIndex, endIndex));
}
@Override
public void appendNode(Node node) {
flushTextNode();
block.appendChild(node);
}
// In some cases, we don't want the text to be appended to an existing node, we need it separate
@Override
public Text appendSeparateText(BasedSequence text) {
Text node = new Text(text);
appendNode(node);
return node;
}
@Override
public void flushTextNode() {
if (currentText != null) {
block.appendChild(new Text(SegmentedSequence.of(currentText, BasedSequence.NULL)));
currentText = null;
}
}
/**
* Parse the next inline element in subject, advancing input index.
* On success, add the result to block's children and return true.
* On failure, return false.
*
* @return false on failure true on success
*/
protected boolean parseInline() {
boolean res;
char c = peek();
if (c == '\0') {
return false;
}
if (inlineParserExtensions != null) {
List extensions = inlineParserExtensions.get(c);
if (extensions != null) {
for (InlineParserExtension extension : extensions) {
res = extension.parse(this);
if (res) return true;
}
}
}
if (customCharacters != null && customCharacters.get(c)) {
res = processCustomCharacters();
if (!res) {
index++;
// When we get here, it's only for a single special character that turned out to not have a special meaning.
// So we shouldn't have a single surrogate here, hence it should be ok to turn it into a String.
appendText(input.subSequence(index - 1, index));
}
return true;
}
switch (c) {
case '\n':
res = parseNewline();
break;
case '\\':
res = parseBackslash();
break;
case '`':
res = parseBackticks();
break;
case '[':
res = parseOpenBracket();
break;
case '!':
res = parseBang();
break;
case ']':
res = parseCloseBracket();
break;
case '<': {
// first we check custom special characters for < delimiters and only allow 2 consecutive ones to allow anchor links and HTML processing
boolean isDelimiter = delimiterCharacters.get(c);
if (isDelimiter && peek(1) == '<') {
DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c);
res = parseDelimiters(delimiterProcessor, c);
} else {
res = parseAutolink() || parseHtmlInline();
}
}
break;
case '&':
res = parseEntity();
break;
default: {
// first we check custom special characters
boolean isDelimiter = delimiterCharacters.get(c);
if (isDelimiter) {
DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c);
res = parseDelimiters(delimiterProcessor, c);
} else {
res = parseString();
}
}
break;
}
if (!res) {
index++;
// When we get here, it's only for a single special character that turned out to not have a special meaning.
// So we shouldn't have a single surrogate here, hence it should be ok to turn it into a String.
appendText(input.subSequence(index - 1, index));
}
return true;
}
private boolean processCustomCharacters() {
char c = peek();
CharacterNodeFactory factory = customSpecialCharacterFactoryMap.get(c);
if (factory == null) return false;
Node node = factory.create();
node.setChars(input.subSequence(index, index + 1));
if (currentText != null) {
BasedSequence prevText = SegmentedSequence.of(currentText, BasedSequence.NULL);
currentText = null;
// see if need to trim some off the end
int pos = prevText.length();
BasedSequence skipped = null;
while (pos > 0 && factory.skipPrev(prevText.charAt(pos - 1))) pos--;
if (pos < prevText.length()) {
skipped = prevText.subSequence(pos);
prevText = prevText.subSequence(0, pos);
}
block.appendChild(new Text(prevText));
if (skipped != null && factory.wantSkippedWhitespace()) {
block.appendChild(new WhiteSpace(skipped));
}
}
appendNode(node);
if (customSpecialCharacterNodes == null) customSpecialCharacterNodes = new ArrayList();
customSpecialCharacterNodes.add(node);
int pos = index + 1;
do {
index++;
c = peek();
}
while (c != '\0' && factory.skipNext(c));
if (pos < index && factory.wantSkippedWhitespace()) {
block.appendChild(new WhiteSpace(input.subSequence(pos, index)));
}
return true;
}
/**
* If RE matches at current index in the input, advance index and return the match; otherwise return null.
*
* @param re pattern to match
* @return sequence matched or null
*/
@Override
public BasedSequence match(Pattern re) {
if (index >= input.length()) {
return null;
}
Matcher matcher = re.matcher(input);
matcher.region(index, input.length());
boolean m = matcher.find();
if (m) {
index = matcher.end();
MatchResult result = matcher.toMatchResult();
return input.subSequence(result.start(), result.end());
} else {
return null;
}
}
/**
* If RE matches at current index in the input, advance index and return the match; otherwise return null.
*
* @param re pattern to match
* @return sequence matched or null
*/
@Override
public BasedSequence[] matchWithGroups(Pattern re) {
if (index >= input.length()) {
return null;
}
Matcher matcher = re.matcher(input);
matcher.region(index, input.length());
boolean m = matcher.find();
if (m) {
index = matcher.end();
MatchResult result = matcher.toMatchResult();
final int iMax = matcher.groupCount() + 1;
BasedSequence[] results = new BasedSequence[iMax];
results[0] = input.subSequence(result.start(), result.end());
for (int i = 1; i < iMax; i++) {
if (matcher.group(i) != null) {
results[i] = input.subSequence(result.start(i), result.end(i));
} else {
results[i] = null;
}
}
return results;
} else {
return null;
}
}
/**
* If RE matches at current index in the input, advance index and return the match; otherwise return null.
*
* @param re pattern to match
* @return matched matcher or null
*/
@Override
public Matcher matcher(Pattern re) {
if (index >= input.length()) {
return null;
}
Matcher matcher = re.matcher(input);
matcher.region(index, input.length());
boolean m = matcher.find();
if (m) {
index = matcher.end();
return matcher;
} else {
return null;
}
}
/**
* @return the char at the current input index, or {@code '\0'} in case there are no more characters.
*/
@Override
public char peek() {
if (index < input.length()) {
return input.charAt(index);
} else {
return '\0';
}
}
@Override
public char peek(int ahead) {
if (index + ahead < input.length()) {
return input.charAt(index + ahead);
} else {
return '\0';
}
}
/**
* Parse zero or more space characters, including at most one newline and zero or more spaces.
*
* @return true
*/
@Override
public boolean spnl() {
match(myParsing.SPNL);
return true;
}
/**
* Parse zero or more non-indent spaces
*
* @return true
*/
@Override
public boolean nonIndentSp() {
match(myParsing.SPNI);
return true;
}
/**
* Parse zero or more spaces
*
* @return true
*/
@Override
public boolean sp() {
match(myParsing.SP);
return true;
}
/**
* Parse zero or more space characters, including at one newline.
*
* @return true
*/
@Override
public boolean spnlUrl() {
return match(myParsing.SPNL_URL) != null;
}
/**
* Parse to end of line, including EOL
*
* @return characters parsed or null if no end of line
*/
@Override
public BasedSequence toEOL() {
return match(myParsing.REST_OF_LINE);
}
/**
* Parse a newline. If it was preceded by two spaces, append a hard line break; otherwise a soft line break.
*
* @return true
*/
@Override
public boolean parseNewline() {
boolean crLf = index > 0 && input.charAt(index - 1) == '\r';
int crLfDelta = crLf ? 1 : 0;
index++; // assume we're at a \n
// We're gonna add a new node in any case and we need to check the last text node, so flush outstanding text.
flushTextNode();
Node lastChild = block.getLastChild();
// Check previous text for trailing spaces.
// The "endsWith" is an optimization to avoid an RE match in the common case.
if (lastChild != null && lastChild instanceof Text && (lastChild.getChars().endsWith(" ") || crLf && lastChild.getChars().endsWith(" \r"))) {
Text text = (Text) lastChild;
BasedSequence literal = text.getChars();
Matcher matcher = myParsing.FINAL_SPACE.matcher(literal);
int spaces = matcher.find() ? matcher.end() - matcher.start() - crLfDelta : 0;
appendNode(spaces >= 2 ? new HardLineBreak(input.subSequence(index - (options.hardLineBreakLimit ? 3 + crLfDelta : spaces + 1 + crLfDelta), index)) : new SoftLineBreak(input.subSequence(index - 1 - crLfDelta, index)));
if (spaces + crLfDelta > 0) {
if (literal.length() > spaces) {
lastChild.setChars(literal.subSequence(0, literal.length() - spaces - crLfDelta).trimEnd());
} else {
lastChild.unlink();
}
}
} else {
if (crLf && lastChild != null && lastChild instanceof Text) {
Text text = (Text) lastChild;
BasedSequence literal = text.getChars();
if (literal.length() > 1) {
lastChild.setChars(literal.subSequence(0, literal.length() - crLfDelta).trimEnd());
} else {
lastChild.unlink();
}
}
appendNode(new SoftLineBreak(input.subSequence(index - 1 - crLfDelta, index)));
}
// gobble leading spaces in next line
while (peek() == ' ') {
index++;
}
return true;
}
/**
* Parse a backslash-escaped special character, adding either the escaped character, a hard line break
* (if the backslash is followed by a newline), or a literal backslash to the block's children.
*
* @return true
*/
protected boolean parseBackslash() {
index++;
if (peek() == '\n' || peek() == '\r') {
int charsMatched = peek(1) == '\n' ? 2 : 1;
appendNode(new HardLineBreak(input.subSequence(index - 1, index + charsMatched)));
index += charsMatched;
} else if (index < input.length() && myParsing.ESCAPABLE.matcher(input.subSequence(index, index + 1)).matches()) {
appendText(input, index - 1, index + 1);
index++;
} else {
appendText(input.subSequence(index - 1, index));
}
return true;
}
/**
* Attempt to parse backticks, adding either a backtick code span or a literal sequence of backticks.
*
* @return true if matched backticks, false otherwise
*/
protected boolean parseBackticks() {
BasedSequence ticks = match(myParsing.TICKS_HERE);
if (ticks == null) {
return false;
}
int afterOpenTicks = index;
BasedSequence matched;
while ((matched = match(myParsing.TICKS)) != null) {
if (matched.equals(ticks)) {
int ticksLength = ticks.length();
BasedSequence content = input.subSequence(afterOpenTicks - ticksLength, index - ticksLength);
final BasedSequence codeText = input.subSequence(afterOpenTicks, index - ticksLength);
Code node = new Code(input.subSequence(afterOpenTicks - ticksLength, afterOpenTicks), codeText, input.subSequence(index - ticksLength, index));
if (options.codeSoftLineBreaks) {
// add softbreaks to code ast
final int length = codeText.length();
int lastPos = 0;
while (lastPos < length) {
int softBreak = codeText.indexOfAny("\n\r", lastPos);
int pos = softBreak == -1 ? length : softBreak;
int lineBreak = pos;
final Text textNode = new Text(codeText.subSequence(lastPos, pos));
node.appendChild(textNode);
lastPos = pos;
if (lastPos >= length) break;
if (codeText.charAt(lastPos) == '\r') {
lastPos++;
if (lastPos >= length) break;
if (codeText.charAt(lastPos) == '\n') lastPos++;
} else {
lastPos++;
}
if (lastPos >= length) break;
if (lineBreak < lastPos) {
SoftLineBreak softLineBreak = new SoftLineBreak(codeText.subSequence(softBreak, lastPos));
node.appendChild(softLineBreak);
}
}
} else {
final Text textNode = new Text(codeText);
node.appendChild(textNode);
}
appendNode(node);
return true;
}
}
// If we got here, we didn't match a closing backtick sequence.
index = afterOpenTicks;
appendText(ticks);
return true;
}
private static class DelimiterData {
final int count;
final boolean canClose;
final boolean canOpen;
DelimiterData(int count, boolean canOpen, boolean canClose) {
this.count = count;
this.canOpen = canOpen;
this.canClose = canClose;
}
}
/**
* Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters.
*
* @param delimiterProcessor delimiter processor instance
* @param delimiterChar delimiter character being processed
* @return true if processed characters false otherwise
*/
protected boolean parseDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) {
DelimiterData res = scanDelimiters(delimiterProcessor, delimiterChar);
if (res == null) {
return false;
}
int numDelims = res.count;
int startIndex = index;
index += numDelims;
Text node = appendSeparateText(input.subSequence(startIndex, index));
// Add entry to stack for this opener
this.lastDelimiter = new Delimiter(input, node, delimiterChar, res.canOpen, res.canClose, this.lastDelimiter, startIndex);
this.lastDelimiter.numDelims = numDelims;
if (this.lastDelimiter.previous != null) {
this.lastDelimiter.previous.next = this.lastDelimiter;
}
return true;
}
/**
* Add open bracket to delimiter stack and add a text node to block's children.
*
* @return true
*/
protected boolean parseOpenBracket() {
int startIndex = index;
index++;
Text node = appendSeparateText(input.subSequence(index - 1, index));
// Add entry to stack for this opener
addBracket(Bracket.link(input, node, startIndex, lastBracket, lastDelimiter));
return true;
}
/**
* If next character is [, and ! delimiter to delimiter stack and add a text node to block's children.
* Otherwise just add a text node.
*
* @return true if processed characters false otherwise
*/
protected boolean parseBang() {
int startIndex = index;
index++;
if (peek() == '[') {
index++;
Text node = appendSeparateText(input.subSequence(index - 2, index));
// Add entry to stack for this opener
addBracket(Bracket.image(input, node, startIndex + 1, lastBracket, lastDelimiter));
} else {
appendText(input.subSequence(index - 1, index));
}
return true;
}
private void addBracket(Bracket bracket) {
if (lastBracket != null) {
lastBracket.bracketAfter = true;
}
lastBracket = bracket;
}
private void removeLastBracket() {
lastBracket = lastBracket.previous;
}
static class ReferenceProcessorMatch {
public final LinkRefProcessor processor;
public final BasedSequence nodeChars;
public final boolean wantExclamation;
public ReferenceProcessorMatch(LinkRefProcessor processor, boolean wantExclamation, BasedSequence nodeChars) {
this.processor = processor;
this.nodeChars = nodeChars;
this.wantExclamation = wantExclamation;
}
}
private ReferenceProcessorMatch matchLinkRef(Bracket opener, int startIndex, int lookAhead, int nesting) {
if (linkRefProcessorsData.nestingIndex.length == 0) return null;
ReferenceProcessorMatch match = null;
BasedSequence textNoBang = null;
BasedSequence textWithBang = null;
boolean wantBang;
int iMax = linkRefProcessorsData.processors.size();
int startProc = linkRefProcessorsData.nestingIndex[lookAhead + nesting];
for (int i = startProc; i < iMax; i++) {
LinkRefProcessor linkProcessor = linkRefProcessors.get(i);
BasedSequence nodeChars;
if (lookAhead + nesting < linkProcessor.getBracketNestingLevel()) break;
wantBang = linkProcessor.getWantExclamationPrefix();
// preview the link ref
if (opener.image && wantBang) {
// this one has index off by one for the leading !
if (textWithBang == null) textWithBang = input.subSequence(opener.index - 1 - lookAhead, startIndex + lookAhead);
nodeChars = textWithBang;
} else {
if (wantBang && opener.index >= lookAhead + 1 && input.charAt(opener.index - 1 - lookAhead) == '!') {
if (textWithBang == null) textWithBang = input.subSequence(opener.index - 1 - lookAhead, startIndex + lookAhead);
nodeChars = textWithBang;
} else {
if (textNoBang == null) textNoBang = input.subSequence(opener.index - lookAhead, startIndex + lookAhead);
nodeChars = textNoBang;
}
}
if (linkProcessor.isMatch(nodeChars)) {
match = new ReferenceProcessorMatch(linkProcessor, wantBang, nodeChars);
break;
}
}
return match;
}
/**
* Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a
* plain [ character, to block's children. If there is a matching delimiter, removeIndex it from the delimiter stack.
*
* Also handles custom link ref processing
*
* @return true
*/
protected boolean parseCloseBracket() {
index++;
int startIndex = index;
int nestedBrackets;
boolean hadBang = false;
// look through stack of delimiters for a [ or ![
Bracket opener = this.lastBracket;
if (opener == null) {
// No matching opener, just return a literal.
appendText(input.subSequence(index - 1, index));
return true;
}
if (!opener.allowed) {
// Matching opener but it's not allowed, just return a literal.
appendText(input.subSequence(index - 1, index));
removeLastBracket();
return true;
}
nestedBrackets = 0;
// Check to see if we have a link/image
BasedSequence dest = null;
BasedSequence title = null;
BasedSequence ref = null;
boolean isLinkOrImage = false;
boolean refIsBare = false;
ReferenceProcessorMatch linkRefProcessorMatch = null;
boolean refIsDefined = false;
BasedSequence linkOpener = BasedSequence.NULL;
BasedSequence linkCloser = BasedSequence.NULL;
BasedSequence bareRef = BasedSequence.NULL;
BasedSequence imageUrlContent = null;
// Inline link?
int preSpaceIndex = index;
// May need to skip spaces
if (options.spaceInLinkElements && peek() == ' ') {
sp();
}
if (peek() == '(') {
int savedIndex = index;
linkOpener = input.subSequence(index, index + 1);
index++;
spnl();
if ((dest = parseLinkDestination()) != null) {
if (options.parseMultiLineImageUrls && opener.image && !dest.startsWith("<") && dest.endsWith("?") && spnlUrl()) {
// possible multi-line image url
int contentStart = index;
int contentEnd = contentStart;
BasedSequence multiLineTitle;
while (true) {
sp();
multiLineTitle = parseLinkTitle();
if (multiLineTitle != null) sp();
if (peek() == ')') {
linkCloser = input.subSequence(index, index + 1);
index++;
imageUrlContent = input.subSequence(contentStart, contentEnd);
title = multiLineTitle;
isLinkOrImage = true;
break;
}
BasedSequence restOfLine = toEOL();
if (restOfLine == null) break;
contentEnd = index;
}
} else {
spnl();
// title needs a whitespace before
if (myParsing.WHITESPACE.matcher(input.subSequence(index - 1, index)).matches()) {
title = parseLinkTitle();
spnl();
}
// test for spaces in url making it invalid, otherwise anything else goes
if (peek() == ')') {
linkCloser = input.subSequence(index, index + 1);
index++;
isLinkOrImage = true;
} else {
// back out, no match
index = savedIndex;
}
}
} else {
index = savedIndex;
}
} else {
index = preSpaceIndex;
}
if (!isLinkOrImage) {
// maybe reference link, need to see if it matches a custom processor or need to skip this reference because it will be processed on the next char
// as something else, like a wiki link
if (!options.matchLookaheadFirst) {
linkRefProcessorMatch = matchLinkRef(opener, startIndex, 0, nestedBrackets);
}
if (linkRefProcessorMatch != null) {
// have a match, then no look ahead for next matches
} else {
// need to figure out max nesting we should test based on what is max processor desire and max available
// nested inner ones are always only []
int maxWanted = linkRefProcessorsData.maxNesting;
int maxAvail = 0;
if (maxWanted > nestedBrackets) {
// need to see what is available
Bracket nested = opener;
while (nested.previous != null && nested.index == nested.previous.index + 1 && peek(maxAvail) == ']') {
nested = nested.previous;
maxAvail++;
if (maxAvail + nestedBrackets == maxWanted || nested.image) break;
}
}
for (int nesting = maxAvail + 1; nesting-- > 0; ) {
linkRefProcessorMatch = matchLinkRef(opener, startIndex, nesting, nestedBrackets);
if (linkRefProcessorMatch != null) {
if (nesting > 0) {
while (nesting-- > 0) {
index++;
lastBracket.node.unlink();
removeLastBracket();
}
opener = lastBracket;
}
break;
}
}
}
if (linkRefProcessorMatch == null) {
// See if there's a link label
int beforeLabel = index;
int labelLength = parseLinkLabel();
if (labelLength > 2) {
ref = input.subSequence(beforeLabel, beforeLabel + labelLength);
} else if (!opener.bracketAfter) {
// Empty or missing second label can only be a reference if there's no unescaped bracket in it.
bareRef = input.subSequence(beforeLabel, beforeLabel + labelLength);
if (opener.image) {
// this one has index off by one for the leading !
ref = input.subSequence(opener.index - 1, startIndex);
} else {
ref = input.subSequence(opener.index, startIndex);
}
refIsBare = true;
}
if (ref != null) {
String normalizedLabel = Escaping.normalizeReferenceChars(ref, true);
if (referenceRepository.containsKey(normalizedLabel)) {
BasedSequence sequence = input.subSequence(opener.index, startIndex);
boolean containsLinks = containsLinkRefs(refIsBare ? ref : sequence, opener.node.getNext(), true);
isLinkOrImage = !containsLinks;
refIsDefined = true;
} else {
// need to test if we are cutting in the middle of some other delimiters matching, if we are not then we will make this into a tentative
if (!opener.isStraddling(ref)) {
// link ref, otherwise we will break
// it is the innermost ref and is bare, if not bare then we treat it as a ref
if (!refIsBare && peek() == '[') {
int beforeNext = index;
int nextLength = parseLinkLabel();
if (nextLength > 0) {
// not bare and not defined and followed by another [], roll back to before the label and make it just text
index = beforeLabel;
} else {
// undefined ref, create a tentative one but only if does not contain any other link refs
boolean containsLinks = containsLinkRefs(ref, opener.node.getNext(), null);
if (!containsLinks) {
refIsBare = true;
isLinkOrImage = true;
}
}
} else {
// undefined ref, bare or followed by empty [], create a tentative link ref but only if does not contain any other link refs
boolean containsLinks = containsLinkRefs(ref, opener.node.getNext(), null);
if (!containsLinks) {
isLinkOrImage = true;
}
}
}
}
}
}
}
if (isLinkOrImage || linkRefProcessorMatch != null) {
// If we got here, open is a potential opener
// Flush text now. We don't need to worry about combining it with adjacent text nodes, as we'll wrap it in a
// link or image node.
flushTextNode();
Node insertNode;
boolean isImage = opener.image;
if (linkRefProcessorMatch != null) {
if (!linkRefProcessorMatch.wantExclamation && isImage) {
appendText(input.subSequence(opener.index - 1, opener.index));
opener.node.setChars(opener.node.getChars().subSequence(1));
//opener.image = false;
isImage = false;
}
insertNode = linkRefProcessorMatch.processor.createNode(linkRefProcessorMatch.nodeChars);
} else {
insertNode = ref != null ? isImage ? new ImageRef() : new LinkRef() : isImage ? new Image() : new Link();
}
{
Node node = opener.node.getNext();
while (node != null) {
Node next = node.getNext();
insertNode.appendChild(node);
node = next;
}
}
if (linkRefProcessorMatch != null) {
// may need to adjust children's text because some characters were part of the processor's opener/closer
if (insertNode.hasChildren()) {
final BasedSequence original = insertNode.getChildChars();
final BasedSequence text = linkRefProcessorMatch.processor.adjustInlineText(document, insertNode);
// may need to remove some delimiters if they span across original and changed text boundary or if now they are outside text boundary
Delimiter delimiter = lastDelimiter;
while (delimiter != null) {
Delimiter prevDelimiter = delimiter.previous;
final BasedSequence delimiterChars = delimiter.getInput().subSequence(delimiter.getStartIndex(), delimiter.getEndIndex());
if (original.containsAllOf(delimiterChars)) {
if (!text.containsAllOf(delimiterChars) || !linkRefProcessorMatch.processor.allowDelimiters(delimiterChars, document, insertNode)) {
// remove it
removeDelimiterKeepNode(delimiter);
}
}
delimiter = prevDelimiter;
}
if (!text.containsAllOf(original)) {
// now need to truncate child text
for (Node node : insertNode.getChildren()) {
final BasedSequence nodeChars = node.getChars();
if (text.containsSomeOf(nodeChars)) {
if (!text.containsAllOf(nodeChars)) {
// truncate the contents to intersection of node's chars and adjusted chars
BasedSequence chars = text.intersect(nodeChars);
node.setChars(chars);
}
} else {
// remove the node
node.unlink();
}
}
}
}
}
appendNode(insertNode);
if (insertNode instanceof RefNode) {
// set up the parts
RefNode refNode = (RefNode) insertNode;
refNode.setReferenceChars(ref);
if (refIsDefined) refNode.setDefined(true);
if (!refIsBare) {
refNode.setTextChars(input.subSequence(opener.index, startIndex));
} else if (!bareRef.isEmpty()) {
refNode.setTextOpeningMarker(bareRef.subSequence(0, 1));
refNode.setTextClosingMarker(bareRef.endSequence(1));
}
insertNode.setCharsFromContent();
} else if (insertNode instanceof InlineLinkNode) {
// set dest and title
InlineLinkNode inlineLinkNode = (InlineLinkNode) insertNode;
inlineLinkNode.setUrlChars(dest);
inlineLinkNode.setTitleChars(title);
inlineLinkNode.setLinkOpeningMarker(linkOpener);
inlineLinkNode.setLinkClosingMarker(linkCloser);
inlineLinkNode.setTextChars(isImage ? input.subSequence(opener.index - 1, startIndex) : input.subSequence(opener.index, startIndex));
if (imageUrlContent != null) {
((Image) insertNode).setUrlContent(imageUrlContent);
}
insertNode.setCharsFromContent();
}
// Process delimiters such as emphasis inside link/image
processDelimiters(opener.previousDelimiter);
Node toRemove = opener.node;
removeLastBracket();
if (linkRefProcessorMatch != null) {
linkRefProcessorMatch.processor.updateNodeElements(document, insertNode);
}
// Links within links are not allowed. We found this link, so there can be no other link around it.
if (insertNode instanceof Link) {
Bracket bracket = this.lastBracket;
while (bracket != null) {
if (!bracket.image) {
// Disallow link opener. It will still get matched, but will not result in a link.
bracket.allowed = false;
}
bracket = bracket.previous;
}
// collapse any link refs contained in this link, they are duds, link takes precedence
// TODO: add a test to see if all link refs should be collapsed or just undefined ones
collapseLinkRefChildren(insertNode, null);
} else if (insertNode instanceof RefNode) {
// have a link ref, collapse to text any tentative ones contained in it, they are duds
collapseLinkRefChildren(insertNode, true);
}
toRemove.unlink();
return true;
} else { // no link or image
index = startIndex;
appendText(input.subSequence(index - 1, index));
removeLastBracket();
return true;
}
}
protected boolean containsLinkRefs(BasedSequence nodeChars, Node next, Boolean isDefined) {
int startOffset = nodeChars.getStartOffset();
int endOffset = nodeChars.getEndOffset();
while (next != null) {
if (next instanceof LinkRef && (isDefined == null || ((LinkRef) next).isDefined() == isDefined) && !(next.getChars().getStartOffset() >= endOffset || next.getChars().getEndOffset() <= startOffset)) {
return true;
}
next = next.getNext();
}
return false;
}
protected void collapseLinkRefChildren(Node node, Boolean isTentative) {
Node child = node.getFirstChild();
boolean hadCollapse = false;
while (child != null) {
Node nextChild = child.getNext();
if (child instanceof LinkRefDerived && (isTentative == null || isTentative == ((RefNode) child).isTentative())) {
// need to collapse this one, moving its text contents to text
collapseLinkRefChildren(child, isTentative);
child.unlink();
TextNodeConverter list = new TextNodeConverter(child.getChars());
list.addChildrenOf(child);
if (nextChild != null) {
list.insertMergedBefore(nextChild);
} else {
list.appendMergedTo(node);
}
hadCollapse = true;
}
child = nextChild;
}
if (hadCollapse) TextNodeConverter.mergeTextNodes(node);
}
/**
* Attempt to parse link destination,
*
* @return the string or null if no match.
*/
@Override
public BasedSequence parseLinkDestination() {
BasedSequence res = match(myParsing.LINK_DESTINATION_ANGLES);
if (res != null) {
return res;
} else {
if (options.linksAllowMatchedParentheses) {
// allow matched parenthesis
BasedSequence matched = match(myParsing.LINK_DESTINATION_MATCHED_PARENS);
if (matched != null) {
int openCount = 0;
int iMax = matched.length();
for (int i = 0; i < iMax; i++) {
char c = matched.charAt(i);
if (c == '\\') {
// escape
i++;
} else if (c == '(') {
openCount++;
} else if (c == ')') {
if (openCount == 0) {
// truncate to this and leave ')' to be parsed
index -= iMax - i;
matched = matched.subSequence(0, i);
break;
}
openCount--;
}
}
return options.spaceInLinkUrls ? matched.trimEnd(BasedSequence.SPACE) : matched;
}
return null;
} else {
// spec 0.27 compatibility
final BasedSequence matched = match(myParsing.LINK_DESTINATION);
return matched != null && options.spaceInLinkUrls ? matched.trimEnd(BasedSequence.SPACE) : matched;
}
}
}
/**
* Attempt to parse link title (sans quotes),
*
* @return the string or null if no match.
*/
@Override
public BasedSequence parseLinkTitle() {
BasedSequence title = match(myParsing.LINK_TITLE);
if (title != null) {
// chop off quotes from title and unescape:
return title; //Escaping.unescapeString(title.substring(1, title.length() - 1));
} else {
return null;
}
}
/**
* Attempt to parse a link label
*
* @return number of characters parsed.
*/
@Override
public int parseLinkLabel() {
BasedSequence m = match(myParsing.LINK_LABEL);
return m == null ? 0 : m.length();
}
/**
* Attempt to parse an autolink (URL or email in pointy brackets).
*
* @return true if processed characters false otherwise
*/
@Override
public boolean parseAutolink() {
BasedSequence m;
if ((m = match(myParsing.EMAIL_AUTOLINK)) != null) {
MailLink node = new MailLink(m.subSequence(0, 1), m.subSequence(1, m.length() - 1), m.subSequence(m.length() - 1, m.length()));
appendNode(node);
return true;
} else if ((m = match(myParsing.AUTOLINK)) != null) {
AutoLink node = new AutoLink(m.subSequence(0, 1), m.subSequence(1, m.length() - 1), m.subSequence(m.length() - 1, m.length()));
appendNode(node);
return true;
} else {
return false;
}
}
/**
* Attempt to parse inline HTML.
*
* @return true if processed characters false otherwise
*/
@Override
public boolean parseHtmlInline() {
BasedSequence m = match(myParsing.HTML_TAG);
if (m != null) {
// separate HTML comment from herd
HtmlInlineBase node;
if (m.startsWith("")) {
node = new HtmlInlineComment(m);
} else {
node = new HtmlInline(m);
}
appendNode(node);
return true;
} else {
return false;
}
}
/**
* Attempt to parse an entity, return Entity object if successful.
*
* @return true if processed characters false otherwise
*/
@Override
public boolean parseEntity() {
BasedSequence m;
if ((m = match(myParsing.ENTITY_HERE)) != null) {
HtmlEntity node = new HtmlEntity(m);
appendNode(node);
return true;
} else {
return false;
}
}
/**
* Parse a run of ordinary characters, or a single character with a special meaning in markdown, as a plain string.
*
* @return true if processed characters false otherwise
*/
protected boolean parseString() {
int begin = index;
int length = input.length();
while (index != length) {
if (specialCharacters.get(input.charAt(index))) {
break;
}
index++;
}
if (begin != index) {
appendText(input, begin, index);
return true;
} else {
return false;
}
}
@Override
protected Object clone() throws CloneNotSupportedException {
return super.clone();
}
/**
* Scan a sequence of characters with code delimiterChar, and return information about the number of delimiters
* and whether they are positioned such that they can open and/or close emphasis or strong emphasis.
*
* @param delimiterProcessor delimiter processor instance
* @param delimiterChar delimiter character being scanned
* @return information about delimiter run, or {@code null}
*/
protected DelimiterData scanDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) {
int startIndex = index;
int delimiterCount = 0;
while (peek() == delimiterChar) {
delimiterCount++;
index++;
}
if (delimiterCount < delimiterProcessor.getMinLength()) {
index = startIndex;
return null;
}
String before = startIndex == 0 ? "\n" : String.valueOf(input.charAt(startIndex - 1));
char charAfter = peek();
String after = charAfter == '\0' ? "\n" : String.valueOf(charAfter);
// We could be more lazy here, in most cases we don't need to do every match case.
boolean beforeIsPunctuation;
boolean afterIsPunctuation;
boolean leftFlanking;
boolean rightFlanking;
boolean beforeIsWhitespace = myParsing.UNICODE_WHITESPACE_CHAR.matcher(before).matches();
boolean afterIsWhitespace = myParsing.UNICODE_WHITESPACE_CHAR.matcher(after).matches();
if (options.inlineDelimiterDirectionalPunctuations) {
beforeIsPunctuation = myParsing.PUNCTUATION_OPEN.matcher(before).matches();
afterIsPunctuation = myParsing.PUNCTUATION_CLOSE.matcher(after).matches();
leftFlanking = !afterIsWhitespace &&
(!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation);
rightFlanking = !beforeIsWhitespace &&
(!beforeIsPunctuation || afterIsWhitespace || afterIsPunctuation);
} else {
beforeIsPunctuation = myParsing.PUNCTUATION.matcher(before).matches();
afterIsPunctuation = myParsing.PUNCTUATION.matcher(after).matches();
leftFlanking = !afterIsWhitespace &&
!(afterIsPunctuation && !beforeIsWhitespace && !beforeIsPunctuation);
rightFlanking = !beforeIsWhitespace &&
!(beforeIsPunctuation && !afterIsWhitespace && !afterIsPunctuation);
}
boolean canOpen;
boolean canClose;
canOpen = delimiterChar == delimiterProcessor.getOpeningCharacter() && delimiterProcessor.canBeOpener(before, after, leftFlanking, rightFlanking, beforeIsPunctuation, afterIsPunctuation, beforeIsWhitespace, afterIsWhitespace);
canClose = delimiterChar == delimiterProcessor.getClosingCharacter() && delimiterProcessor.canBeCloser(before, after, leftFlanking, rightFlanking, beforeIsPunctuation, afterIsPunctuation, beforeIsWhitespace, afterIsWhitespace);
index = startIndex;
if (canOpen || canClose || !delimiterProcessor.skipNonOpenerCloser()) {
return new DelimiterData(delimiterCount, canOpen, canClose);
} else {
return null;
}
}
@Override
public void processDelimiters(Delimiter stackBottom) {
Map openersBottom = new HashMap();
// find first closer above stackBottom:
Delimiter closer = lastDelimiter;
while (closer != null && closer.previous != stackBottom) {
closer = closer.previous;
}
// move forward, looking for closers, and handling each
while (closer != null) {
char delimiterChar = closer.delimiterChar;
DelimiterProcessor delimiterProcessor = delimiterProcessors.get(delimiterChar);
if (!closer.canClose || delimiterProcessor == null) {
closer = closer.next;
continue;
}
char openingDelimiterChar = delimiterProcessor.getOpeningCharacter();
// found delimiter closer. now look back for first matching opener:
int useDelims = 0;
boolean openerFound = false;
boolean potentialOpenerFound = false;
Delimiter opener = closer.previous;
while (opener != null && opener != stackBottom && opener != openersBottom.get(delimiterChar)) {
if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) {
potentialOpenerFound = true;
useDelims = delimiterProcessor.getDelimiterUse(opener, closer);
if (useDelims > 0) {
openerFound = true;
break;
}
}
opener = opener.previous;
}
if (!openerFound) {
if (!potentialOpenerFound) {
// Set lower bound for future searches for openers.
// Only do this when we didn't even have a potential
// opener (one that matches the character and can open).
// If an opener was rejected because of the number of
// delimiters (e.g. because of the "multiple of 3" rule),
// we want to consider it next time because the number
// of delimiters can change as we continue processing.
openersBottom.put(delimiterChar, closer.previous);
if (!closer.canOpen) {
// We can remove a closer that can't be an opener,
// once we've seen there's no matching opener:
removeDelimiterKeepNode(closer);
}
}
closer = closer.next;
continue;
}
// Remove number of used delimiters from stack and inline nodes.
opener.numDelims -= useDelims;
closer.numDelims -= useDelims;
removeDelimitersBetween(opener, closer);
//// The delimiter processor can re-parent the nodes between opener and closer,
//// so make sure they're contiguous already.
//mergeTextNodes(openerNode.getNext(), closerNode.getPrevious());
opener.numDelims += useDelims;
closer.numDelims += useDelims;
delimiterProcessor.process(opener, closer, useDelims);
opener.numDelims -= useDelims;
closer.numDelims -= useDelims;
// No delimiter characters left to process, so we can remove delimiter and the now empty node.
if (opener.numDelims == 0) {
removeDelimiterAndNode(opener);
} else {
// adjust number of characters in the node by keeping outer of numDelims
opener.node.setChars(opener.node.getChars().subSequence(0, opener.numDelims));
}
if (closer.numDelims == 0) {
Delimiter next = closer.next;
removeDelimiterAndNode(closer);
closer = next;
} else {
// adjust number of characters in the node by keeping outer of numDelims
BasedSequence chars = closer.node.getChars();
int length = chars.length();
closer.node.setChars(chars.subSequence(length - closer.numDelims, length));
closer.setIndex(closer.getIndex() + useDelims);
}
}
// removeIndex all delimiters
while (lastDelimiter != null && lastDelimiter != stackBottom) {
removeDelimiterKeepNode(lastDelimiter);
}
}
@Override
public void removeDelimitersBetween(Delimiter opener, Delimiter closer) {
Delimiter delimiter = closer.previous;
while (delimiter != null && delimiter != opener) {
Delimiter previousDelimiter = delimiter.previous;
removeDelimiterKeepNode(delimiter);
delimiter = previousDelimiter;
}
}
/**
* Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`.
*
* @param delim delimiter to remove
*/
@Override
public void removeDelimiterAndNode(Delimiter delim) {
Text node = delim.node;
Text previousText = delim.getPreviousNonDelimiterTextNode();
Text nextText = delim.getNextNonDelimiterTextNode();
if (previousText != null && nextText != null) {
// Merge adjacent text nodes
previousText.setChars(input.baseSubSequence(previousText.getStartOffset(), nextText.getEndOffset()));
nextText.unlink();
}
node.unlink();
removeDelimiter(delim);
}
/**
* Remove the delimiter but keep the corresponding node as text. For unused delimiters such as `_` in `foo_bar`.
*
* @param delim delimiter being processed
*/
@Override
public void removeDelimiterKeepNode(Delimiter delim) {
Node node;
DelimiterProcessor delimiterProcessor = delimiterProcessors.get(delim.delimiterChar);
node = delimiterProcessor != null ? delimiterProcessor.unmatchedDelimiterNode(this, delim) : null;
if (node != null) {
if (node != delim.node) {
// replace node
delim.node.insertAfter(node);
delim.node.unlink();
}
} else {
node = delim.node;
}
Text previousText = delim.getPreviousNonDelimiterTextNode();
Text nextText = delim.getNextNonDelimiterTextNode();
if (node instanceof Text && (previousText != null || nextText != null)) {
// Merge adjacent text nodes into one
if (nextText != null && previousText != null) {
node.setChars(input.baseSubSequence(previousText.getStartOffset(), nextText.getEndOffset()));
previousText.unlink();
nextText.unlink();
} else if (previousText != null) {
node.setChars(input.baseSubSequence(previousText.getStartOffset(), node.getEndOffset()));
previousText.unlink();
} else {
node.setChars(input.baseSubSequence(node.getStartOffset(), nextText.getEndOffset()));
nextText.unlink();
}
}
removeDelimiter(delim);
}
@Override
public void removeDelimiter(Delimiter delim) {
if (delim.previous != null) {
delim.previous.next = delim.next;
}
if (delim.next == null) {
// top of stack
this.lastDelimiter = delim.previous;
} else {
delim.next.previous = delim.previous;
}
}
}