All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.aya.literate.parser.BaseMdParser Maven / Gradle / Ivy

There is a newer version: 0.35.0
Show newest version
// Copyright (c) 2020-2024 Tesla (Yinsen) Zhang.
// Use of this source code is governed by the MIT license that can be found in the LICENSE.md file.
package org.aya.literate.parser;

import kala.collection.Seq;
import kala.collection.SeqView;
import kala.collection.immutable.ImmutableSeq;
import kala.collection.immutable.primitive.ImmutableIntSeq;
import kala.collection.mutable.MutableList;
import kala.tuple.primitive.IntObjTuple2;
import kala.value.LazyValue;
import kala.value.MutableValue;
import org.aya.literate.Literate;
import org.aya.literate.UnsupportedMarkdown;
import org.aya.pretty.backend.md.MdStyle;
import org.aya.pretty.doc.Doc;
import org.aya.pretty.doc.Style;
import org.aya.util.error.Panic;
import org.aya.util.error.SourceFile;
import org.aya.util.error.SourcePos;
import org.aya.util.more.StringUtil;
import org.aya.util.reporter.Reporter;
import org.commonmark.node.*;
import org.commonmark.parser.IncludeSourceSpans;
import org.commonmark.parser.Parser;
import org.jetbrains.annotations.Contract;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

public class BaseMdParser {
  /** For empty line that end with \n, the index points to \n */
  protected final @NotNull ImmutableIntSeq linesIndex;
  protected final @NotNull SourceFile file;
  protected final @NotNull Reporter reporter;
  protected final @NotNull ImmutableSeq> languages;

  public BaseMdParser(@NotNull SourceFile file, @NotNull Reporter reporter, @NotNull ImmutableSeq> lang) {
    this.linesIndex = StringUtil.indexedLines(file.sourceCode())
      .mapToInt(ImmutableIntSeq.factory(), IntObjTuple2::component1);
    this.file = file;
    this.reporter = reporter;
    this.languages = lang;
  }

  /// region Entry

  protected @NotNull Parser.Builder parserBuilder() {
    return Parser.builder()
      .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES)
      .postProcessor(FillCodeBlock.INSTANCE);
  }

  public @NotNull Literate parseLiterate() {
    return mapNode(parserBuilder().build().parse(file.sourceCode()));
  }

  /// endregion Entry

  /// region Parsing

  protected @NotNull ImmutableSeq mapChildren(@NotNull Node parent) {
    Node next;
    var children = MutableList.create();
    for (var node = parent.getFirstChild(); node != null; node = next) {
      if (children.isNotEmpty() && node instanceof Paragraph) {
        children.append(new Literate.Raw(Doc.line()));
      }
      next = node.getNext();
      children.append(mapNode(node));
    }
    return children.toImmutableSeq();
  }

  protected record StripTrailing(LazyValue pos, String literal) {}
  protected @NotNull StripTrailing stripTrailingNewline(@NotNull String literal, @NotNull Block owner) {
    var spans = owner.getSourceSpans();
    if (spans != null && spans.size() >= 2) {   // always contains '```' and '```'
      var inner = ImmutableSeq.from(spans).view().drop(1).dropLast(1).toImmutableSeq();
      // remove the last line break if not empty
      if (!literal.isEmpty())
        literal = literal.substring(0, literal.length() - 1);
      return new StripTrailing(LazyValue.of(() -> fromSourceSpans(inner)), literal);
    }
    throw new Panic("SourceSpans");
  }

  protected @NotNull Literate mapNode(@NotNull Node node) {
    return switch (node) {
      case Text text -> new Literate.Raw(Doc.plain(text.getLiteral()));
      case Emphasis emphasis -> new Literate.Many(Style.italic(), mapChildren(emphasis));
      case HardLineBreak _, SoftLineBreak _ -> new Literate.Raw(Doc.line());
      case StrongEmphasis emphasis -> new Literate.Many(Style.bold(), mapChildren(emphasis));
      case Paragraph p -> new Literate.Many(MdStyle.GFM.Paragraph, mapChildren(p));
      case BlockQuote b -> new Literate.Many(MdStyle.GFM.BlockQuote, mapChildren(b));
      case Heading h -> new Literate.Many(new MdStyle.GFM.Heading(h.getLevel()), mapChildren(h));
      case Link h -> new Literate.HyperLink(h.getDestination(), h.getTitle(), mapChildren(h));
      case Image h -> new Literate.Image(h.getDestination(), mapChildren(h));
      case ListItem item -> flatten(collectChildren(item.getFirstChild())
        // .flatMap(p -> p instanceof Paragraph ? collectChildren(p.getFirstChild()) : SeqView.of(p))
        .flatMap(this::mapChildren)
        .toImmutableSeq());
      case OrderedList ordered -> new Literate.List(mapChildren(ordered), true);
      case BulletList bullet -> new Literate.List(mapChildren(bullet), false);
      case Document d -> flatten(mapChildren(d));
      case HtmlBlock html when html.getLiteral().startsWith("