org.jetbrains.dokka.analysis.markdown.jb.MarkdownParser.kt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of analysis-markdown Show documentation
Show all versions of analysis-markdown Show documentation
Dokka is an API documentation engine for Kotlin and Java, performing the same function as Javadoc for Java
The newest version!
/*
* Copyright 2014-2024 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
*/
package org.jetbrains.dokka.analysis.markdown.jb
import org.intellij.markdown.MarkdownElementTypes
import org.intellij.markdown.MarkdownTokenTypes
import org.intellij.markdown.ast.ASTNode
import org.intellij.markdown.ast.CompositeASTNode
import org.intellij.markdown.ast.LeafASTNode
import org.intellij.markdown.ast.impl.ListItemCompositeNode
import org.intellij.markdown.flavours.gfm.GFMElementTypes
import org.intellij.markdown.flavours.gfm.GFMFlavourDescriptor
import org.intellij.markdown.flavours.gfm.GFMTokenTypes
import org.intellij.markdown.html.HtmlGenerator
import org.jetbrains.dokka.InternalDokkaApi
import org.jetbrains.dokka.analysis.markdown.jb.factories.DocTagsFromIElementFactory
import org.jetbrains.dokka.links.DRI
import org.jetbrains.dokka.links.PointingToDeclaration
import org.jetbrains.dokka.model.doc.*
import java.net.MalformedURLException
import java.net.URL
import org.intellij.markdown.parser.MarkdownParser as IntellijMarkdownParser
@InternalDokkaApi
public open class MarkdownParser(
private val externalDri: (String) -> DRI?,
private val kdocLocation: String?,
) : Parser() {
private lateinit var destinationLinksMap: Map
private lateinit var text: String
override fun parseStringToDocNode(extractedString: String): DocTag {
val gfmFlavourDescriptor = GFMFlavourDescriptor()
val markdownAstRoot = IntellijMarkdownParser(gfmFlavourDescriptor).buildMarkdownTreeFromString(extractedString)
destinationLinksMap = getAllDestinationLinks(extractedString, markdownAstRoot).toMap()
text = extractedString
val parsed = visitNode(markdownAstRoot)
if (parsed.size == 1) {
return parsed.first()
}
return CustomDocTag(children = parsed, params = emptyMap(), name = "")
}
override fun preparse(text: String): String = text.replace("\r\n", "\n").replace("\r", "\n")
override fun parseTagWithBody(tagName: String, content: String): TagWrapper =
when (tagName) {
"see" -> {
val referencedName = content.substringBefore(' ')
val dri = externalDri(referencedName)
See(
parseStringToDocNode(content.substringAfter(' ')),
dri?.fqDeclarationName() ?: referencedName,
dri
)
}
"throws", "exception" -> {
val dri = externalDri(content.substringBefore(' '))
Throws(
parseStringToDocNode(content.substringAfter(' ')),
dri?.fqDeclarationName() ?: content.substringBefore(' '),
dri
)
}
else -> super.parseTagWithBody(tagName, content)
}
private fun headersHandler(node: ASTNode) =
DocTagsFromIElementFactory.getInstance(
node.type,
visitNode(node.children.find { it.type == MarkdownTokenTypes.ATX_CONTENT }
?: throw detailedException("Wrong AST Tree. Header does not contain expected content", node)
).flatMap { it.children }
)
/**
* Handler for [MarkdownTokenTypes.ATX_CONTENT], which is the content of the header
* elements like [MarkdownElementTypes.ATX_1], [MarkdownElementTypes.ATX_2] and so on.
*
* For example, a header line like `# Header text` is expected to be parsed into:
* - One [MarkdownTokenTypes.ATX_HEADER] with startOffset = 0, endOffset = 1 (only the `#` symbol)
* - Composite [MarkdownTokenTypes.ATX_CONTENT] with four children: WHITE_SPACE, TEXT, WHITE_SPACE, TEXT.
*/
private fun headerContentHandler(node: ASTNode): List {
// ATX_CONTENT contains everything after the `#` symbol, so if there's a space
// in-between the `#` symbol and the text (like `# header`), it will be present here too.
// However, we don't need the leading space between the `#` symbol and the text, nor do we need trailing spaces,
// so we just skip it (otherwise the header text will be parsed as `header` instead of `header`).
// If there's more space between `#` and text, like `# header`, it will still be a single WHITE_SPACE
// element, but it will be wider, so the solution below should still hold. The same applies to trailing spaces.
val trimmedChildren = node.children.trimWhitespaceToken()
val children = trimmedChildren.evaluateChildren()
return DocTagsFromIElementFactory.getInstance(
MarkdownElementTypes.PARAGRAPH, // PARAGRAPH instead of TEXT to preserve compatibility with prev. versions
children = children
)
}
/**
* @return a sublist of [this] list that does not contain
* leading and trailing [MarkdownTokenTypes.WHITE_SPACE] elements
*/
private fun List.trimWhitespaceToken(): List {
val firstNonWhitespaceIndex = this.indexOfFirst { it.type != MarkdownTokenTypes.WHITE_SPACE }
if (firstNonWhitespaceIndex == -1) {
return this
}
val lastNonWhitespaceIndex = this.indexOfLast { it.type != MarkdownTokenTypes.WHITE_SPACE }
return this.subList(firstNonWhitespaceIndex, lastNonWhitespaceIndex + 1)
}
private fun horizontalRulesHandler() =
DocTagsFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE)
private fun emphasisHandler(node: ASTNode) =
DocTagsFromIElementFactory.getInstance(
node.type,
children = node.children.evaluateChildrenWithDroppedEnclosingTokens(1)
)
private fun strongHandler(node: ASTNode) =
DocTagsFromIElementFactory.getInstance(
node.type,
children = node.children.evaluateChildrenWithDroppedEnclosingTokens(2)
)
private fun List.evaluateChildrenWithDroppedEnclosingTokens(count: Int) =
drop(count).dropLast(count).evaluateChildren()
private fun blockquotesHandler(node: ASTNode) =
DocTagsFromIElementFactory.getInstance(
node.type, children = node.children
.filterIsInstance()
.evaluateChildren()
)
private fun listsHandler(node: ASTNode): List {
val children = node.children.filterIsInstance().flatMap {
if (it.children.last().type in listOf(
MarkdownElementTypes.ORDERED_LIST,
MarkdownElementTypes.UNORDERED_LIST
)
) {
val nestedList = it.children.last()
(it.children as MutableList).removeAt(it.children.lastIndex)
listOf(it, nestedList)
} else
listOf(it)
}
return DocTagsFromIElementFactory.getInstance(
node.type,
children =
children
.flatMap {
if (it.type == MarkdownElementTypes.LIST_ITEM)
DocTagsFromIElementFactory.getInstance(
it.type,
children = it
.children
.filterIsInstance()
.evaluateChildren()
)
else
visitNode(it)
},
params =
if (node.type == MarkdownElementTypes.ORDERED_LIST) {
val listNumberNode = node.children.first().children.first()
mapOf(
"start" to text.substring(
listNumberNode.startOffset,
listNumberNode.endOffset
).trim().dropLast(1)
)
} else
emptyMap()
)
}
private fun resolveDRI(mdLink: String): DRI? =
mdLink
.removePrefix("[")
.removeSuffix("]")
.let { link ->
try {
URL(link)
null
} catch (e: MalformedURLException) {
externalDri(link)
}
}
private fun getAllDestinationLinks(text: String, node: ASTNode): List> =
node.children
.filter { it.type == MarkdownElementTypes.LINK_DEFINITION }
.map {
text.substring(it.children[0].startOffset, it.children[0].endOffset).toLowerCase() to
text.substring(it.children[2].startOffset, it.children[2].endOffset)
} +
node.children.filterIsInstance().flatMap { getAllDestinationLinks(text, it) }
private fun referenceLinksHandler(node: ASTNode): List {
val linkLabel = node.children.find { it.type == MarkdownElementTypes.LINK_LABEL }
?: throw detailedException("Wrong AST Tree. Reference link does not contain link label", node)
val linkText = node.children.findLast { it.type == MarkdownElementTypes.LINK_TEXT } ?: linkLabel
val linkKey = text.substring(linkLabel.startOffset, linkLabel.endOffset)
val link = destinationLinksMap[linkKey.toLowerCase()] ?: linkKey
return linksHandler(linkText, link)
}
private fun inlineLinksHandler(node: ASTNode): List {
val linkText = node.children.find { it.type == MarkdownElementTypes.LINK_TEXT }
?: throw detailedException("Wrong AST Tree. Inline link does not contain link text", node)
val linkDestination = node.children.find { it.type == MarkdownElementTypes.LINK_DESTINATION }
val linkTitle = node.children.find { it.type == MarkdownElementTypes.LINK_TITLE }
// Link destination may be ommited: https://github.github.com/gfm/#example-495
val link = linkDestination?.let { text.substring(it.startOffset, it.endOffset) }
return linksHandler(linkText, link, linkTitle)
}
private fun markdownFileHandler(node: ASTNode) =
DocTagsFromIElementFactory.getInstance(
node.type,
children = node.children
.filterSpacesAndEOL()
.evaluateChildren()
)
private fun autoLinksHandler(node: ASTNode): List {
val link = text.substring(node.startOffset + 1, node.endOffset - 1)
return linksHandler(node, link)
}
private fun linksHandler(linkText: ASTNode, link: String?, linkTitle: ASTNode? = null): List {
val dri: DRI? = link?.let { resolveDRI(it) }
val linkOrEmpty = link ?: ""
val linkTextString =
if (linkTitle == null) linkOrEmpty else text.substring(linkTitle.startOffset + 1, linkTitle.endOffset - 1)
val params = if (linkTitle == null)
mapOf("href" to linkOrEmpty)
else
mapOf("href" to linkOrEmpty, "title" to linkTextString)
return if (link != null && dri == null && !linkOrEmpty.isRemoteLink()) {
DocTagsFromIElementFactory.getInstance(
MarkdownTokenTypes.TEXT,
params = params,
children = linkText.children.drop(1).dropLast(1).evaluateChildren(),
body = linkTextString.removeSurrounding("[", "]")
)
} else {
DocTagsFromIElementFactory.getInstance(
MarkdownElementTypes.INLINE_LINK,
params = params,
children = linkText.children.drop(1).dropLast(1).evaluateChildren(),
dri = dri
)
}
}
private fun codeLineHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
MarkdownElementTypes.CODE_BLOCK,
body = text.substring(node.startOffset, node.endOffset)
)
private fun textHandler(node: ASTNode, keepAllFormatting: Boolean) = DocTagsFromIElementFactory.getInstance(
MarkdownTokenTypes.TEXT,
body = text.substring(node.startOffset, node.endOffset).transform(),
keepFormatting = keepAllFormatting
)
private fun strikeThroughHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
node.type,
children = node.children.evaluateChildrenWithDroppedEnclosingTokens(2)
)
private fun tableHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
GFMElementTypes.TABLE,
children = node.children
.filter { it.type == GFMElementTypes.ROW || it.type == GFMElementTypes.HEADER }
.evaluateChildren()
)
private fun headerHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
GFMElementTypes.HEADER,
children = node.children
.filter { it.type == GFMTokenTypes.CELL }
.evaluateChildren()
)
private fun rowHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
GFMElementTypes.ROW,
children = node.children
.filter { it.type == GFMTokenTypes.CELL }
.evaluateChildren()
)
private fun cellHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
GFMTokenTypes.CELL,
children = node.children.filterTabSeparators().evaluateChildren().trimSurroundingTokensIfText()
)
private fun String.isRemoteLink() = try {
URL(this)
true
} catch (e: MalformedURLException) {
false
}
private fun imagesHandler(node: ASTNode): List =
with(node.children.last().children) {
val destination = find { it.type == MarkdownElementTypes.LINK_DESTINATION }
val description = find { it.type == MarkdownElementTypes.LINK_TEXT }
val src = destination?.let {
mapOf("href" to text.substring(it.startOffset, it.endOffset))
} ?: emptyMap()
val alt = description?.let {
mapOf("alt" to text.substring(it.startOffset + 1, it.endOffset - 1))
} ?: emptyMap()
return DocTagsFromIElementFactory.getInstance(
node.type,
params = src + alt
)
}
private fun rawHtmlHandler(node: ASTNode): List =
DocTagsFromIElementFactory.getInstance(
node.type,
body = text.substring(node.startOffset, node.endOffset)
)
private fun codeSpansHandler(node: ASTNode) =
DocTagsFromIElementFactory.getInstance(
node.type,
children = DocTagsFromIElementFactory.getInstance(
MarkdownTokenTypes.TEXT,
body = text.substring(node.startOffset + 1, node.endOffset - 1).replace('\n', ' ').trimIndent(),
keepFormatting = true
)
)
private fun codeFencesHandler(node: ASTNode) =
DocTagsFromIElementFactory.getInstance(
node.type,
children = node
.children
.dropWhile { it.type != MarkdownTokenTypes.CODE_FENCE_CONTENT }
.dropLastWhile { it.type != MarkdownTokenTypes.CODE_FENCE_CONTENT }
.filter { it.type != MarkdownTokenTypes.WHITE_SPACE }
.map {
if (it.type == MarkdownTokenTypes.EOL)
LeafASTNode(MarkdownTokenTypes.HARD_LINE_BREAK, 0, 0)
else
it
}.evaluateChildren(keepAllFormatting = true),
params = node
.children
.find { it.type == MarkdownTokenTypes.FENCE_LANG }
?.let { mapOf("lang" to text.substring(it.startOffset, it.endOffset)) }
?: emptyMap()
)
private fun codeBlocksHandler(node: ASTNode) =
DocTagsFromIElementFactory.getInstance(node.type, children = node.children.mergeLeafASTNodes().flatMap {
DocTagsFromIElementFactory.getInstance(
MarkdownTokenTypes.TEXT,
body = HtmlGenerator.trimIndents(text.substring(it.startOffset, it.endOffset), 4).toString()
)
})
private fun defaultHandler(node: ASTNode) =
DocTagsFromIElementFactory.getInstance(
MarkdownElementTypes.PARAGRAPH,
children = node.children.evaluateChildren()
)
private fun visitNode(node: ASTNode, keepAllFormatting: Boolean = false): List =
when (node.type) {
MarkdownElementTypes.ATX_1,
MarkdownElementTypes.ATX_2,
MarkdownElementTypes.ATX_3,
MarkdownElementTypes.ATX_4,
MarkdownElementTypes.ATX_5,
MarkdownElementTypes.ATX_6,
-> headersHandler(node)
MarkdownTokenTypes.ATX_CONTENT -> headerContentHandler(node)
MarkdownTokenTypes.HORIZONTAL_RULE -> horizontalRulesHandler()
MarkdownElementTypes.STRONG -> strongHandler(node)
MarkdownElementTypes.EMPH -> emphasisHandler(node)
MarkdownElementTypes.FULL_REFERENCE_LINK,
MarkdownElementTypes.SHORT_REFERENCE_LINK,
-> referenceLinksHandler(node)
MarkdownElementTypes.INLINE_LINK -> inlineLinksHandler(node)
MarkdownElementTypes.AUTOLINK -> autoLinksHandler(node)
MarkdownElementTypes.BLOCK_QUOTE -> blockquotesHandler(node)
MarkdownElementTypes.UNORDERED_LIST,
MarkdownElementTypes.ORDERED_LIST,
-> listsHandler(node)
MarkdownElementTypes.CODE_BLOCK -> codeBlocksHandler(node)
MarkdownElementTypes.CODE_FENCE -> codeFencesHandler(node)
MarkdownElementTypes.CODE_SPAN -> codeSpansHandler(node)
MarkdownElementTypes.IMAGE -> imagesHandler(node)
MarkdownElementTypes.HTML_BLOCK,
MarkdownTokenTypes.HTML_TAG,
MarkdownTokenTypes.HTML_BLOCK_CONTENT,
-> rawHtmlHandler(node)
MarkdownTokenTypes.HARD_LINE_BREAK -> DocTagsFromIElementFactory.getInstance(node.type)
MarkdownTokenTypes.CODE_FENCE_CONTENT,
MarkdownTokenTypes.CODE_LINE,
-> codeLineHandler(node)
MarkdownTokenTypes.TEXT -> textHandler(node, keepAllFormatting)
MarkdownElementTypes.MARKDOWN_FILE -> markdownFileHandler(node)
GFMElementTypes.STRIKETHROUGH -> strikeThroughHandler(node)
GFMElementTypes.TABLE -> tableHandler(node)
GFMElementTypes.HEADER -> headerHandler(node)
GFMElementTypes.ROW -> rowHandler(node)
GFMTokenTypes.CELL -> cellHandler(node)
else -> defaultHandler(node)
}
private fun List.filterTabSeparators() =
this.filterNot { it.type == GFMTokenTypes.TABLE_SEPARATOR }
private fun List.filterSpacesAndEOL() =
this.filterNot { it.type == MarkdownTokenTypes.WHITE_SPACE || it.type == MarkdownTokenTypes.EOL }
private fun List.evaluateChildren(keepAllFormatting: Boolean = false): List =
this.removeUselessTokens().swapImagesThatShouldBeLinks(keepAllFormatting).mergeLeafASTNodes().flatMap { visitNode(it, keepAllFormatting) }
private fun List.swapImagesThatShouldBeLinks(keepAllFormatting: Boolean): List =
if (keepAllFormatting) {
this
} else {
flatMap { node ->
if (node.type == MarkdownElementTypes.IMAGE
&& node.children.firstOrNull()?.let { it is LeafASTNode && it.type.name == "!" } == true
&& node.children.lastOrNull()?.type == MarkdownElementTypes.SHORT_REFERENCE_LINK
) {
node.children
} else {
listOf(node)
}
}
}
private fun List.removeUselessTokens(): List =
this.filterIndexed { index, node ->
!(node.type == MarkdownElementTypes.LINK_DEFINITION || (
node.type == MarkdownTokenTypes.EOL &&
this.getOrNull(index - 1)?.type == MarkdownTokenTypes.HARD_LINE_BREAK
))
}
private fun List.trimSurroundingTokensIfText() = mapIndexed { index, elem ->
val elemTransformed = if (index == 0 && elem is Text) elem.copy(elem.body.trimStart()) else elem
if (index == lastIndex && elemTransformed is Text) elemTransformed.copy(elemTransformed.body.trimEnd()) else elemTransformed
}
private val notLeafNodes = listOf(
MarkdownTokenTypes.HORIZONTAL_RULE,
MarkdownTokenTypes.HARD_LINE_BREAK,
MarkdownTokenTypes.HTML_TAG,
MarkdownTokenTypes.HTML_BLOCK_CONTENT
)
private fun ASTNode.isNotLeaf() = this is CompositeASTNode || this.type in notLeafNodes
private fun List.isNotLeaf(index: Int): Boolean =
if (index in 0..this.lastIndex)
this[index].isNotLeaf()
else
false
private fun List.mergeLeafASTNodes(): List {
val children: MutableList = mutableListOf()
var index = 0
while (index <= this.lastIndex) {
if (this.isNotLeaf(index)) {
children += this[index]
} else {
val startOffset = this[index].startOffset
val sIndex = index
while (index < this.lastIndex) {
if (this.isNotLeaf(index + 1) || this[index + 1].startOffset != this[index].endOffset) {
children += mergedLeafNode(this, index, startOffset, sIndex)
break
}
index++
}
if (index == this.lastIndex) {
children += mergedLeafNode(this, index, startOffset, sIndex)
}
}
index++
}
return children
}
private fun mergedLeafNode(nodes: List, index: Int, startOffset: Int, sIndex: Int): LeafASTNode {
val endOffset = nodes[index].endOffset
val type = if (nodes.subList(sIndex, index)
.any { it.type == MarkdownTokenTypes.CODE_LINE }
) MarkdownTokenTypes.CODE_LINE else MarkdownTokenTypes.TEXT
return LeafASTNode(type, startOffset, endOffset)
}
private fun String.transform() = this
.replace(Regex("\n\n+"), "") // Squashing new lines between paragraphs
.replace(Regex("\n"), " ")
.replace(Regex(" >+ +"), " ") // Replacement used in blockquotes, get rid of garbage
private fun detailedException(baseMessage: String, node: ASTNode) =
IllegalStateException(
baseMessage + " in ${kdocLocation ?: "unspecified location"}, element starts from offset ${node.startOffset} and ends ${node.endOffset}: ${
text.substring(
node.startOffset,
node.endOffset
)
}"
)
public companion object {
public fun DRI.fqDeclarationName(): String? {
if (this.target !is PointingToDeclaration) {
return null
}
return listOfNotNull(this.packageName, this.classNames, this.callable?.name)
.joinToString(separator = ".")
.takeIf { it.isNotBlank() }
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy