All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.conqat.lib.commons.markup.MarkupUtils Maven / Gradle / Ivy

There is a newer version: 2024.7.2
Show newest version
/*
 * Copyright (c) CQSE GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.conqat.lib.commons.markup;

import java.util.List;

import org.checkerframework.checker.nullness.qual.NonNull;
import org.conqat.lib.commons.collections.Pair;
import org.conqat.lib.commons.collections.PairList;
import org.conqat.lib.commons.string.StringUtils;

/**
 * Util class for handling markup (i.e., HTML tags or Markdown relevant symbols in String literals
 */
public class MarkupUtils {

	/** Map with rules we use to escape things to markdown. */
	private static final PairList ESCAPING_RULES = new PairList<>();

	static {
		ESCAPING_RULES.add("#", "#");
		ESCAPING_RULES.add("\\*", "*");
		ESCAPING_RULES.add("_", "_");
		ESCAPING_RULES.add("~", "~");
		ESCAPING_RULES.add("\\[", "[");
		ESCAPING_RULES.add("]", "]");
		ESCAPING_RULES.add("!", "!");
		ESCAPING_RULES.add("`", "`");
		ESCAPING_RULES.add("\\n", " ");
		// trick to prevent automatic linking of URLs in string literals
		ESCAPING_RULES.add("//", "//");
		ESCAPING_RULES.add("www", "www");
	}

	/** The marker for both the start and end of a code block in markdown. */
	public static final String CODE_BLOCK_DELIMITER = "```";

	/**
	 * Replaces characters that are meaningful in Markdown markup language with their corresponding
	 * entity numbers.
	 * 

* Use this on source code that you don't want to format as code. Use * {@link MarkupUtils#formatAsSourceCode(String)} on source code that you do want to format as code * in finding messages. */ public static String escapeMarkdownRelevantSymbols(String s) { for (Pair fromTo : ESCAPING_RULES) { s = s.replaceAll(fromTo.getFirst(), fromTo.getSecond()); } return s; } /** * Unescapes code escaped according to {@link #escapeMarkdownRelevantSymbols(String)} */ public static String unescapeMarkdownRelevantSymbols(String s) { for (Pair fromTo : ESCAPING_RULES.reversed()) { s = s.replaceAll(fromTo.getSecond(), fromTo.getFirst()); } return s; } /** * Trims all lines that are not in a <pre></pre> block. *

* Markdown renders indented blocks as code blocks. This might not be intended when we load a * description from a resource file, where the indentations might be present because of the * formatting of that specific file (e.g. indented blocks of XML tags). *

* Note that the contents of <pre></pre> blocks are expected to be properly formatted, * and nested or not closed <pre> tags are not handled. This function should not be called if * the description uses the Markdown syntax for code blocks instead of HTML tags. */ public static String trimLines(String extractedText) { StringBuilder reformattedText = new StringBuilder(); String[] lines = StringUtils.splitLines(extractedText); boolean hasOpenedCodeBlock = false; for (String line : lines) { if (hasOpenedCodeBlock) { reformattedText.append(line); } else { reformattedText.append(line.trim()); } reformattedText.append("\n"); // A previously opened block will be closed if

is present in the line hasOpenedCodeBlock &= !line.contains(""); // A block will be opened if
 is not followed by 
in the same line hasOpenedCodeBlock |= line.lastIndexOf("") < line.lastIndexOf("
");
		}

		return reformattedText.toString();
	}

	/**
	 * Formats the given string as source code by enclosing it with backticks. Properly handles the case
	 * where the given string already contains backticks itself. Replaces newlines by spaces. Use this
	 * to format text from source code as code in a finding message. Use
	 * {@link MarkupUtils#escapeMarkdownRelevantSymbols(String)} for text from source code that should
	 * not be formatted as code.
	 * 

* Example: * String findingMessage = "Don't use literal " + MarkupUtils.formatAsSourceCode(literalToken.getText()) + " here." */ public static String formatAsSourceCode(String sourceCodeText) { sourceCodeText = sourceCodeText.replace("\n", " "); int numberOfEnclosingBackticks = findMaxNumberOfBackticks(sourceCodeText) + 1; String enclosingBackticks = StringUtils.repeat("`", numberOfEnclosingBackticks); StringBuilder enclosedText = new StringBuilder(enclosingBackticks); // If the string starts with a backtick, we need to separate the enclosing // backticks from the actual string. The additional whitespace does not affect // the markdown rendering. if (sourceCodeText.startsWith("`")) { enclosedText.append(" "); } enclosedText.append(sourceCodeText); if (sourceCodeText.endsWith("`")) { enclosedText.append(" "); } enclosedText.append(enclosingBackticks); return enclosedText.toString(); } private static int findMaxNumberOfBackticks(String sourceCodeText) { int maxNumberOfBackticks = 0; int currentNumberOfBackticks = 0; for (char c : sourceCodeText.toCharArray()) { if (c == '`') { currentNumberOfBackticks++; } else { currentNumberOfBackticks = 0; } maxNumberOfBackticks = Math.max(maxNumberOfBackticks, currentNumberOfBackticks); } return maxNumberOfBackticks; } /** * Whether a language is defined for all code blocks in a markdown file's content. * * @param fileContent * The markdown file's content to analyse. * @return Whether for all code blocks a language is defined. */ public static boolean languageDefinedForAllCodeBlocks(@NonNull String fileContent) { List codeBlockStartLines = MarkdownCodeBlockFinder .findCodeBlockStartLines(fileContent); return codeBlockStartLines.stream().allMatch(line -> definesLanguageForCodeBlock(line.getLineContent())); } private static boolean definesLanguageForCodeBlock(String line) { return !line.trim().equals(CODE_BLOCK_DELIMITER); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy