org.aya.literate.parser.BaseMdParser Maven / Gradle / Ivy
// Copyright (c) 2020-2024 Tesla (Yinsen) Zhang.
// Use of this source code is governed by the MIT license that can be found in the LICENSE.md file.
package org.aya.literate.parser;
import kala.collection.Seq;
import kala.collection.SeqView;
import kala.collection.immutable.ImmutableSeq;
import kala.collection.immutable.primitive.ImmutableIntSeq;
import kala.collection.mutable.MutableList;
import kala.tuple.primitive.IntObjTuple2;
import kala.value.LazyValue;
import kala.value.MutableValue;
import org.aya.literate.Literate;
import org.aya.literate.UnsupportedMarkdown;
import org.aya.pretty.backend.md.MdStyle;
import org.aya.pretty.doc.Doc;
import org.aya.pretty.doc.Style;
import org.aya.util.error.Panic;
import org.aya.util.error.SourceFile;
import org.aya.util.error.SourcePos;
import org.aya.util.more.StringUtil;
import org.aya.util.reporter.Reporter;
import org.commonmark.node.*;
import org.commonmark.parser.IncludeSourceSpans;
import org.commonmark.parser.Parser;
import org.jetbrains.annotations.Contract;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public class BaseMdParser {
/** For empty line that end with \n, the index points to \n */
protected final @NotNull ImmutableIntSeq linesIndex;
protected final @NotNull SourceFile file;
protected final @NotNull Reporter reporter;
protected final @NotNull ImmutableSeq> languages;
public BaseMdParser(@NotNull SourceFile file, @NotNull Reporter reporter, @NotNull ImmutableSeq> lang) {
this.linesIndex = StringUtil.indexedLines(file.sourceCode())
.mapToInt(ImmutableIntSeq.factory(), IntObjTuple2::component1);
this.file = file;
this.reporter = reporter;
this.languages = lang;
}
/// region Entry
protected @NotNull Parser.Builder parserBuilder() {
return Parser.builder()
.includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES)
.postProcessor(FillCodeBlock.INSTANCE);
}
public @NotNull Literate parseLiterate() {
return mapNode(parserBuilder().build().parse(file.sourceCode()));
}
/// endregion Entry
/// region Parsing
protected @NotNull ImmutableSeq mapChildren(@NotNull Node parent) {
Node next;
var children = MutableList.create();
for (var node = parent.getFirstChild(); node != null; node = next) {
if (children.isNotEmpty() && node instanceof Paragraph) {
children.append(new Literate.Raw(Doc.line()));
}
next = node.getNext();
children.append(mapNode(node));
}
return children.toImmutableSeq();
}
protected record StripTrailing(LazyValue pos, String literal) {}
protected @NotNull StripTrailing stripTrailingNewline(@NotNull String literal, @NotNull Block owner) {
var spans = owner.getSourceSpans();
if (spans != null && spans.size() >= 2) { // always contains '```' and '```'
var inner = ImmutableSeq.from(spans).view().drop(1).dropLast(1).toImmutableSeq();
// remove the last line break if not empty
if (!literal.isEmpty())
literal = literal.substring(0, literal.length() - 1);
return new StripTrailing(LazyValue.of(() -> fromSourceSpans(inner)), literal);
}
throw new Panic("SourceSpans");
}
protected @NotNull Literate mapNode(@NotNull Node node) {
return switch (node) {
case Text text -> new Literate.Raw(Doc.plain(text.getLiteral()));
case Emphasis emphasis -> new Literate.Many(Style.italic(), mapChildren(emphasis));
case HardLineBreak _, SoftLineBreak _ -> new Literate.Raw(Doc.line());
case StrongEmphasis emphasis -> new Literate.Many(Style.bold(), mapChildren(emphasis));
case Paragraph p -> new Literate.Many(MdStyle.GFM.Paragraph, mapChildren(p));
case BlockQuote b -> new Literate.Many(MdStyle.GFM.BlockQuote, mapChildren(b));
case Heading h -> new Literate.Many(new MdStyle.GFM.Heading(h.getLevel()), mapChildren(h));
case Link h -> new Literate.HyperLink(h.getDestination(), h.getTitle(), mapChildren(h));
case Image h -> new Literate.Image(h.getDestination(), mapChildren(h));
case ListItem item -> flatten(collectChildren(item.getFirstChild())
// .flatMap(p -> p instanceof Paragraph ? collectChildren(p.getFirstChild()) : SeqView.of(p))
.flatMap(this::mapChildren)
.toImmutableSeq());
case OrderedList ordered -> new Literate.List(mapChildren(ordered), true);
case BulletList bullet -> new Literate.List(mapChildren(bullet), false);
case Document d -> flatten(mapChildren(d));
case HtmlBlock html when html.getLiteral().startsWith("