org.conqat.lib.commons.markup.MarkupUtils Maven / Gradle / Ivy
Show all versions of teamscale-lib-commons Show documentation
/*
* Copyright (c) CQSE GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.conqat.lib.commons.markup;
import java.util.List;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.conqat.lib.commons.collections.Pair;
import org.conqat.lib.commons.collections.PairList;
import org.conqat.lib.commons.string.StringUtils;
/**
* Util class for handling markup (i.e., HTML tags or Markdown relevant symbols in String literals
*/
public class MarkupUtils {
/** Map with rules we use to escape things to markdown. */
private static final PairList ESCAPING_RULES = new PairList<>();
static {
ESCAPING_RULES.add("#", "#");
ESCAPING_RULES.add("\\*", "*");
ESCAPING_RULES.add("_", "_");
ESCAPING_RULES.add("~", "~");
ESCAPING_RULES.add("\\[", "[");
ESCAPING_RULES.add("]", "]");
ESCAPING_RULES.add("!", "!");
ESCAPING_RULES.add("`", "`");
ESCAPING_RULES.add("\\n", " ");
// trick to prevent automatic linking of URLs in string literals
ESCAPING_RULES.add("//", "//");
ESCAPING_RULES.add("www", "www");
}
/** The marker for both the start and end of a code block in markdown. */
public static final String CODE_BLOCK_DELIMITER = "```";
/**
* Replaces characters that are meaningful in Markdown markup language with their corresponding
* entity numbers.
*
* Use this on source code that you don't want to format as code. Use
* {@link MarkupUtils#formatAsSourceCode(String)} on source code that you do want to format as code
* in finding messages.
*/
public static String escapeMarkdownRelevantSymbols(String s) {
for (Pair fromTo : ESCAPING_RULES) {
s = s.replaceAll(fromTo.getFirst(), fromTo.getSecond());
}
return s;
}
/**
* Unescapes code escaped according to {@link #escapeMarkdownRelevantSymbols(String)}
*/
public static String unescapeMarkdownRelevantSymbols(String s) {
for (Pair fromTo : ESCAPING_RULES.reversed()) {
s = s.replaceAll(fromTo.getSecond(), fromTo.getFirst());
}
return s;
}
/**
* Trims all lines that are not in a <pre></pre> block.
*
* Markdown renders indented blocks as code blocks. This might not be intended when we load a
* description from a resource file, where the indentations might be present because of the
* formatting of that specific file (e.g. indented blocks of XML tags).
*
* Note that the contents of <pre></pre> blocks are expected to be properly formatted,
* and nested or not closed <pre> tags are not handled. This function should not be called if
* the description uses the Markdown syntax for code blocks instead of HTML tags.
*/
public static String trimLines(String extractedText) {
StringBuilder reformattedText = new StringBuilder();
String[] lines = StringUtils.splitLines(extractedText);
boolean hasOpenedCodeBlock = false;
for (String line : lines) {
if (hasOpenedCodeBlock) {
reformattedText.append(line);
} else {
reformattedText.append(line.trim());
}
reformattedText.append("\n");
// A previously opened block will be closed if
is present in the line
hasOpenedCodeBlock &= !line.contains("");
// A block will be opened if is not followed byin the same line hasOpenedCodeBlock |= line.lastIndexOf("") < line.lastIndexOf("
"); } return reformattedText.toString(); } /** * Formats the given string as source code by enclosing it with backticks. Properly handles the case * where the given string already contains backticks itself. Replaces newlines by spaces. Use this * to format text from source code as code in a finding message. Use * {@link MarkupUtils#escapeMarkdownRelevantSymbols(String)} for text from source code that should * not be formatted as code. ** Example: *
String findingMessage = "Don't use literal " + MarkupUtils.formatAsSourceCode(literalToken.getText()) + " here."
*/ public static String formatAsSourceCode(String sourceCodeText) { sourceCodeText = sourceCodeText.replace("\n", " "); int numberOfEnclosingBackticks = findMaxNumberOfBackticks(sourceCodeText) + 1; String enclosingBackticks = StringUtils.repeat("`", numberOfEnclosingBackticks); StringBuilder enclosedText = new StringBuilder(enclosingBackticks); // If the string starts with a backtick, we need to separate the enclosing // backticks from the actual string. The additional whitespace does not affect // the markdown rendering. if (sourceCodeText.startsWith("`")) { enclosedText.append(" "); } enclosedText.append(sourceCodeText); if (sourceCodeText.endsWith("`")) { enclosedText.append(" "); } enclosedText.append(enclosingBackticks); return enclosedText.toString(); } private static int findMaxNumberOfBackticks(String sourceCodeText) { int maxNumberOfBackticks = 0; int currentNumberOfBackticks = 0; for (char c : sourceCodeText.toCharArray()) { if (c == '`') { currentNumberOfBackticks++; } else { currentNumberOfBackticks = 0; } maxNumberOfBackticks = Math.max(maxNumberOfBackticks, currentNumberOfBackticks); } return maxNumberOfBackticks; } /** * Whether a language is defined for all code blocks in a markdown file's content. * * @param fileContent * The markdown file's content to analyse. * @return Whether for all code blocks a language is defined. */ public static boolean languageDefinedForAllCodeBlocks(@NonNull String fileContent) { ListcodeBlockStartLines = MarkdownCodeBlockFinder .findCodeBlockStartLines(fileContent); return codeBlockStartLines.stream().allMatch(line -> definesLanguageForCodeBlock(line.getLineContent())); } private static boolean definesLanguageForCodeBlock(String line) { return !line.trim().equals(CODE_BLOCK_DELIMITER); } }