org.aya.literate.parser.BaseMdParser Maven / Gradle / Ivy
The newest version!
// Copyright (c) 2020-2024 Tesla (Yinsen) Zhang.
// Use of this source code is governed by the MIT license that can be found in the LICENSE.md file.
package org.aya.literate.parser;
import com.intellij.openapi.util.TextRange;
import com.intellij.psi.tree.IElementType;
import com.intellij.psi.tree.TokenSet;
import kala.collection.Seq;
import kala.collection.SeqView;
import kala.collection.immutable.ImmutableSeq;
import kala.collection.mutable.MutableList;
import kala.control.Option;
import org.aya.literate.Literate;
import org.aya.literate.UnsupportedMarkdown;
import org.aya.pretty.backend.md.MdStyle;
import org.aya.pretty.doc.Doc;
import org.aya.pretty.doc.Style;
import org.aya.util.error.SourceFile;
import org.aya.util.error.SourcePos;
import org.aya.util.reporter.Reporter;
import org.intellij.markdown.MarkdownElementType;
import org.intellij.markdown.MarkdownElementTypes;
import org.intellij.markdown.MarkdownTokenTypes;
import org.intellij.markdown.ast.ASTNode;
import org.intellij.markdown.ast.ASTUtilKt;
import org.intellij.markdown.ext.blocks.frontmatter.FrontMatterHeaderProvider;
import org.intellij.markdown.flavours.gfm.*;
import org.intellij.markdown.parser.MarkdownParser;
import org.intellij.markdown.parser.MarkerProcessor;
import org.intellij.markdown.parser.MarkerProcessorFactory;
import org.intellij.markdown.parser.markerblocks.MarkerBlockProvider;
import org.intellij.markdown.parser.sequentialparsers.EmphasisLikeParser;
import org.intellij.markdown.parser.sequentialparsers.SequentialParser;
import org.intellij.markdown.parser.sequentialparsers.SequentialParserManager;
import org.intellij.markdown.parser.sequentialparsers.impl.*;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.util.ArrayList;
import java.util.Objects;
public class BaseMdParser {
/** For empty line that end with \n, the index points to \n */
protected final @NotNull SourceFile file;
protected final @NotNull Reporter reporter;
protected final @NotNull ImmutableSeq> languages;
protected final @NotNull MutableList sequentialParsers = MutableList.of(
new AutolinkParser(Seq.of(MarkdownTokenTypes.AUTOLINK, GFMTokenTypes.GFM_AUTOLINK)),
new BacktickParser(),
new MathParser(),
new ImageParser(),
new InlineLinkParser(),
new ReferenceLinkParser(),
new EmphasisLikeParser(new EmphStrongDelimiterParser(), new StrikeThroughDelimiterParser()));
public BaseMdParser(@NotNull SourceFile file, @NotNull Reporter reporter, @NotNull ImmutableSeq> lang) {
this.file = file;
this.reporter = reporter;
this.languages = lang;
}
/// region Entry
protected void addProviders(ArrayList> providers) {
providers.addFirst(new FrontMatterHeaderProvider());
}
public @NotNull Literate parseLiterate() {
var flavour = new GFMFlavourDescriptor() {
@Override public @NotNull SequentialParserManager getSequentialParserManager() {
return new SequentialParserManager() {
@Override public @NotNull Seq getParserSequence() {
return sequentialParsers;
}
};
}
@Override public @NotNull MarkerProcessorFactory getMarkerProcessorFactory() {
return holder -> new GFMMarkerProcessor(holder, GFMConstraints.Companion.getBASE()) {
@Override protected @NotNull ArrayList> initMarkerBlockProviders() {
var providers = super.initMarkerBlockProviders();
addProviders(providers);
return providers;
}
};
}
};
var parser = new MarkdownParser(flavour);
return mapNode(parser.buildMarkdownTreeFromString(file.sourceCode()));
}
/// endregion Entry
/// region Parsing
protected @NotNull ImmutableSeq mapChildren(@NotNull ASTNode parent) {
return mapChildren(parent.getChildren().view());
}
public static final TokenSet NATURAL_EOL = TokenSet.create(
MarkdownElementTypes.PARAGRAPH, MarkdownElementTypes.BLOCK_QUOTE,
MarkdownElementTypes.CODE_FENCE, MarkdownElementTypes.CODE_BLOCK,
MarkdownElementTypes.ORDERED_LIST, MarkdownElementTypes.UNORDERED_LIST, MarkdownElementTypes.LIST_ITEM,
GFMElementTypes.TABLE, GFMElementTypes.BLOCK_MATH,
FrontMatterHeaderProvider.FRONT_MATTER_HEADER
);
protected @NotNull ImmutableSeq mapChildren(@NotNull SeqView nodes) {
var children = MutableList.create();
var wantToSkipEol = false;
for (var child : nodes) {
if (NATURAL_EOL.contains(child.getType())) wantToSkipEol = true;
else {
if (wantToSkipEol && child.getType() == MarkdownTokenTypes.EOL) {
wantToSkipEol = false;
continue;
}
}
children.append(mapNode(child));
}
return children.toImmutableSeq();
}
private static final @NotNull ImmutableSeq HEADINGS = ImmutableSeq.of(
MarkdownElementTypes.ATX_1,
MarkdownElementTypes.ATX_2,
MarkdownElementTypes.ATX_3,
MarkdownElementTypes.ATX_4,
MarkdownElementTypes.ATX_5,
MarkdownElementTypes.ATX_6
);
private static Option peekChild(@NotNull ASTNode node, @NotNull IElementType type) {
return Option.ofNullable(node.findChildOfType(type));
}
@NotNull protected String getTextInNode(@NotNull ASTNode node) {
return ASTUtilKt.getTextInNode(node, file.sourceCode()).toString();
}
private static int isHeading(@NotNull ASTNode node) {
return HEADINGS.indexOf(node.getType());
}
protected record InlineLinkData(@Nullable String title, @NotNull String destination,
@NotNull ImmutableSeq children) { }
protected @NotNull InlineLinkData mapInlineLink(@NotNull ASTNode node) {
var childNode = node.childOfType(MarkdownElementTypes.LINK_TEXT);
var destinationNode = node.childOfType(MarkdownElementTypes.LINK_DESTINATION);
var titleNode = peekChild(node, MarkdownElementTypes.LINK_TITLE);
var titleTextNode = titleNode.map(x -> x.childOfType(MarkdownTokenTypes.TEXT));
var destination = getTextInNode(destinationNode);
var title = titleTextNode.map(this::getTextInNode);
var children = childNode.childrenWithoutSurrounding(1);
return new InlineLinkData(title.getOrNull(), destination, mapChildren(children));
}
protected @NotNull Literate mapNode(@NotNull ASTNode node) {
var type = node.getType();
if (type == MarkdownTokenTypes.EOL || type == MarkdownTokenTypes.HARD_LINE_BREAK) {
return new Literate.Raw(Doc.line());
}
// do not confuse with MarkdownTokenTypes.EMPH
if (type == MarkdownElementTypes.EMPH) {
return new Literate.Many(Style.italic(), mapChildren(
node.childrenWithoutSurrounding(1))
);
}
if (type == MarkdownElementTypes.STRONG) {
return new Literate.Many(Style.italic(), mapChildren(
node.childrenWithoutSurrounding(2))
);
}
if (type == MarkdownElementTypes.PARAGRAPH) {
return new Literate.Many(MdStyle.GFM.Paragraph, mapChildren(node));
}
if (type == MarkdownElementTypes.BLOCK_QUOTE) {
return new Literate.Many(MdStyle.GFM.BlockQuote, mapChildren(node));
}
var i = isHeading(node);
if (i != -1) {
var atxContent = node.childOfType(MarkdownTokenTypes.ATX_CONTENT);
// 1-based headings
return new Literate.Many(new MdStyle.GFM.Heading(i + 1),
mapChildren(atxContent.getChildren().view()
.dropWhile(it -> it.getType() == MarkdownTokenTypes.WHITE_SPACE)
)
);
}
if (type == MarkdownElementTypes.INLINE_LINK) {
var data = mapInlineLink(node);
return new Literate.HyperLink(data.destination, data.title, data.children);
}
if (type == MarkdownElementTypes.IMAGE) {
var inner = node.childOfType(MarkdownElementTypes.INLINE_LINK);
var data = mapInlineLink(inner);
return new Literate.Image(data.destination, data.children);
}
if (type == MarkdownElementTypes.HTML_BLOCK) {
var content = getTextInNode(node);
if (content.startsWith("