All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.aya.literate.parser.BaseMdParser Maven / Gradle / Ivy

The newest version!
// Copyright (c) 2020-2024 Tesla (Yinsen) Zhang.
// Use of this source code is governed by the MIT license that can be found in the LICENSE.md file.
package org.aya.literate.parser;

import com.intellij.openapi.util.TextRange;
import com.intellij.psi.tree.IElementType;
import com.intellij.psi.tree.TokenSet;
import kala.collection.Seq;
import kala.collection.SeqView;
import kala.collection.immutable.ImmutableSeq;
import kala.collection.mutable.MutableList;
import kala.control.Option;
import org.aya.literate.Literate;
import org.aya.literate.UnsupportedMarkdown;
import org.aya.pretty.backend.md.MdStyle;
import org.aya.pretty.doc.Doc;
import org.aya.pretty.doc.Style;
import org.aya.util.error.SourceFile;
import org.aya.util.error.SourcePos;
import org.aya.util.reporter.Reporter;
import org.intellij.markdown.MarkdownElementType;
import org.intellij.markdown.MarkdownElementTypes;
import org.intellij.markdown.MarkdownTokenTypes;
import org.intellij.markdown.ast.ASTNode;
import org.intellij.markdown.ast.ASTUtilKt;
import org.intellij.markdown.ext.blocks.frontmatter.FrontMatterHeaderProvider;
import org.intellij.markdown.flavours.gfm.*;
import org.intellij.markdown.parser.MarkdownParser;
import org.intellij.markdown.parser.MarkerProcessor;
import org.intellij.markdown.parser.MarkerProcessorFactory;
import org.intellij.markdown.parser.markerblocks.MarkerBlockProvider;
import org.intellij.markdown.parser.sequentialparsers.EmphasisLikeParser;
import org.intellij.markdown.parser.sequentialparsers.SequentialParser;
import org.intellij.markdown.parser.sequentialparsers.SequentialParserManager;
import org.intellij.markdown.parser.sequentialparsers.impl.*;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.util.ArrayList;
import java.util.Objects;

public class BaseMdParser {
  /** For empty line that end with \n, the index points to \n */
  protected final @NotNull SourceFile file;
  protected final @NotNull Reporter reporter;
  protected final @NotNull ImmutableSeq> languages;
  protected final @NotNull MutableList sequentialParsers = MutableList.of(
    new AutolinkParser(Seq.of(MarkdownTokenTypes.AUTOLINK, GFMTokenTypes.GFM_AUTOLINK)),
    new BacktickParser(),
    new MathParser(),
    new ImageParser(),
    new InlineLinkParser(),
    new ReferenceLinkParser(),
    new EmphasisLikeParser(new EmphStrongDelimiterParser(), new StrikeThroughDelimiterParser()));

  public BaseMdParser(@NotNull SourceFile file, @NotNull Reporter reporter, @NotNull ImmutableSeq> lang) {
    this.file = file;
    this.reporter = reporter;
    this.languages = lang;
  }

  /// region Entry
  protected void addProviders(ArrayList> providers) {
    providers.addFirst(new FrontMatterHeaderProvider());
  }

  public @NotNull Literate parseLiterate() {
    var flavour = new GFMFlavourDescriptor() {
      @Override public @NotNull SequentialParserManager getSequentialParserManager() {
        return new SequentialParserManager() {
          @Override public @NotNull Seq getParserSequence() {
            return sequentialParsers;
          }
        };
      }
      @Override public @NotNull MarkerProcessorFactory getMarkerProcessorFactory() {
        return holder -> new GFMMarkerProcessor(holder, GFMConstraints.Companion.getBASE()) {
          @Override protected @NotNull ArrayList> initMarkerBlockProviders() {
            var providers = super.initMarkerBlockProviders();
            addProviders(providers);
            return providers;
          }
        };
      }
    };
    var parser = new MarkdownParser(flavour);
    return mapNode(parser.buildMarkdownTreeFromString(file.sourceCode()));
  }

  /// endregion Entry

  /// region Parsing

  protected @NotNull ImmutableSeq mapChildren(@NotNull ASTNode parent) {
    return mapChildren(parent.getChildren().view());
  }

  public static final TokenSet NATURAL_EOL = TokenSet.create(
    MarkdownElementTypes.PARAGRAPH, MarkdownElementTypes.BLOCK_QUOTE,
    MarkdownElementTypes.CODE_FENCE, MarkdownElementTypes.CODE_BLOCK,
    MarkdownElementTypes.ORDERED_LIST, MarkdownElementTypes.UNORDERED_LIST, MarkdownElementTypes.LIST_ITEM,
    GFMElementTypes.TABLE, GFMElementTypes.BLOCK_MATH,
    FrontMatterHeaderProvider.FRONT_MATTER_HEADER
  );
  protected @NotNull ImmutableSeq mapChildren(@NotNull SeqView nodes) {
    var children = MutableList.create();
    var wantToSkipEol = false;
    for (var child : nodes) {
      if (NATURAL_EOL.contains(child.getType())) wantToSkipEol = true;
      else {
        if (wantToSkipEol && child.getType() == MarkdownTokenTypes.EOL) {
          wantToSkipEol = false;
          continue;
        }
      }

      children.append(mapNode(child));
    }

    return children.toImmutableSeq();
  }

  private static final @NotNull ImmutableSeq HEADINGS = ImmutableSeq.of(
    MarkdownElementTypes.ATX_1,
    MarkdownElementTypes.ATX_2,
    MarkdownElementTypes.ATX_3,
    MarkdownElementTypes.ATX_4,
    MarkdownElementTypes.ATX_5,
    MarkdownElementTypes.ATX_6
  );

  private static Option peekChild(@NotNull ASTNode node, @NotNull IElementType type) {
    return Option.ofNullable(node.findChildOfType(type));
  }

  @NotNull protected String getTextInNode(@NotNull ASTNode node) {
    return ASTUtilKt.getTextInNode(node, file.sourceCode()).toString();
  }

  private static int isHeading(@NotNull ASTNode node) {
    return HEADINGS.indexOf(node.getType());
  }

  protected record InlineLinkData(@Nullable String title, @NotNull String destination,
                                  @NotNull ImmutableSeq children) { }

  protected @NotNull InlineLinkData mapInlineLink(@NotNull ASTNode node) {
    var childNode = node.childOfType(MarkdownElementTypes.LINK_TEXT);
    var destinationNode = node.childOfType(MarkdownElementTypes.LINK_DESTINATION);

    var titleNode = peekChild(node, MarkdownElementTypes.LINK_TITLE);
    var titleTextNode = titleNode.map(x -> x.childOfType(MarkdownTokenTypes.TEXT));

    var destination = getTextInNode(destinationNode);
    var title = titleTextNode.map(this::getTextInNode);
    var children = childNode.childrenWithoutSurrounding(1);

    return new InlineLinkData(title.getOrNull(), destination, mapChildren(children));
  }

  protected @NotNull Literate mapNode(@NotNull ASTNode node) {
    var type = node.getType();

    if (type == MarkdownTokenTypes.EOL || type == MarkdownTokenTypes.HARD_LINE_BREAK) {
      return new Literate.Raw(Doc.line());
    }

    // do not confuse with MarkdownTokenTypes.EMPH
    if (type == MarkdownElementTypes.EMPH) {
      return new Literate.Many(Style.italic(), mapChildren(
        node.childrenWithoutSurrounding(1))
      );
    }

    if (type == MarkdownElementTypes.STRONG) {
      return new Literate.Many(Style.italic(), mapChildren(
        node.childrenWithoutSurrounding(2))
      );
    }

    if (type == MarkdownElementTypes.PARAGRAPH) {
      return new Literate.Many(MdStyle.GFM.Paragraph, mapChildren(node));
    }

    if (type == MarkdownElementTypes.BLOCK_QUOTE) {
      return new Literate.Many(MdStyle.GFM.BlockQuote, mapChildren(node));
    }

    var i = isHeading(node);
    if (i != -1) {
      var atxContent = node.childOfType(MarkdownTokenTypes.ATX_CONTENT);
      // 1-based headings
      return new Literate.Many(new MdStyle.GFM.Heading(i + 1),
        mapChildren(atxContent.getChildren().view()
          .dropWhile(it -> it.getType() == MarkdownTokenTypes.WHITE_SPACE)
        )
      );
    }

    if (type == MarkdownElementTypes.INLINE_LINK) {
      var data = mapInlineLink(node);
      return new Literate.HyperLink(data.destination, data.title, data.children);
    }

    if (type == MarkdownElementTypes.IMAGE) {
      var inner = node.childOfType(MarkdownElementTypes.INLINE_LINK);
      var data = mapInlineLink(inner);
      return new Literate.Image(data.destination, data.children);
    }

    if (type == MarkdownElementTypes.HTML_BLOCK) {
      var content = getTextInNode(node);
      if (content.startsWith("