All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.okapi.common.encoder.MarkdownEncoder Maven / Gradle / Ivy

There is a newer version: 1.47.0
Show newest version
package net.sf.okapi.common.encoder;

import net.sf.okapi.common.HTMLCharacterEntities;
import net.sf.okapi.common.IParameters;

import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MarkdownEncoder extends DefaultEncoder {
	private final Logger LOGGER = LoggerFactory.getLogger(MarkdownEncoder.class);

	private boolean escapeSpecialMarkdownCharacters;
	private String markdownCharactersToEscape;
	private String htmlEntitiesToEscape;
	private IParameters params;
	public static final String DEFAULT_CHARACTERS_TO_ESCAPE = "*_`{}[]<>()#+\\-.!|";
	private Pattern escapingRegex;
	private Pattern htmlEntityEscapingRegex;
	private final HTMLCharacterEntities htmlCharacterEntities;

	public MarkdownEncoder() {
		super();
		escapeSpecialMarkdownCharacters = false;
		markdownCharactersToEscape = DEFAULT_CHARACTERS_TO_ESCAPE;
		htmlEntitiesToEscape = "";
		htmlCharacterEntities = new HTMLCharacterEntities();
		htmlCharacterEntities.ensureInitialization(true);
	}

	@Override
	public void setOptions(IParameters params, String encoding, String lineBreak) {
		this.params = params;
		if (params != null) {
			escapeSpecialMarkdownCharacters = params.getBoolean("unescapeBackslashCharacters");
			markdownCharactersToEscape = params.getString("charactersToEscape");
			htmlEntitiesToEscape = params.getString("htmlEntitiesToEscape");
			if (escapeSpecialMarkdownCharacters) {
				escapingRegex = buildEscapingRegex();
			}

			if (!htmlEntitiesToEscape.isEmpty()) {
				htmlEntityEscapingRegex = buildHtmlEntityEscapingRegex();
			}
		}
		setLineBreak(lineBreak);
	}

	@Override
	public String encode(String text, EncoderContext context) {
		if (text == null)
			return "";

		String result = text.replace("\n", getLineBreak());

		// escaping HTML entities takes priority over special markdown characters.
		result = escapeHtmlEntities(result);

		if (escapeSpecialMarkdownCharacters) {
			result = result.replaceAll(escapingRegex.pattern(), "\\\\$1");
		}

		return result;
	}

	@Override
	public String encode(char value,
						 EncoderContext context) {

		String result = String.valueOf(value).replace("\n", getLineBreak());

		// escaping HTML entities takes priority over special markdown characters.
		if (!htmlEntitiesToEscape.isEmpty()) {
			StringBuilder sb = new StringBuilder();
			escapeHtmlEntity(value, sb);
			result = sb.toString();
		}

		if (escapeSpecialMarkdownCharacters) {
			result = result.replaceAll(escapingRegex.pattern(), "\\\\$1");
		}

		return result;
	}

	@Override
	public String encode(int value,
						 EncoderContext context) {
		String result;
		if (Character.isSupplementaryCodePoint(value)) {
			result = new String(Character.toChars(value)).replace("\n", getLineBreak());
		} else {
			result = String.valueOf((char) value).replace("\n", getLineBreak());
		}

		// escaping HTML entities takes priority over special markdown characters.
		result = escapeHtmlEntities(result);

		if (escapeSpecialMarkdownCharacters) {
			result = result.replaceAll(escapingRegex.pattern(), "\\\\$1");
		}

		return result;
	}

	private Pattern buildEscapingRegex() {
		try {
			return Pattern.compile("([" + Pattern.quote(markdownCharactersToEscape) + "])");
		} catch (PatternSyntaxException e) {
			LOGGER.warn("Invalid charactersToEscape value: {}", markdownCharactersToEscape);
			return Pattern.compile("([" + Pattern.quote(DEFAULT_CHARACTERS_TO_ESCAPE) + "])");
		}
	}

	private Pattern buildHtmlEntityEscapingRegex() {
		try {
			return Pattern.compile("([" + Pattern.quote(htmlEntitiesToEscape) + "])");
		} catch (PatternSyntaxException e) {
			LOGGER.warn("Invalid htmlEntitiesToEscape value: {}", htmlEntitiesToEscape);
			return Pattern.compile("");
		}
	}

	private String escapeHtmlEntities(String result) {
		if (!htmlEntitiesToEscape.isEmpty()) {
			StringBuilder sb = new StringBuilder();
			for (char c : result.toCharArray()) {
				escapeHtmlEntity(c, sb);
			}
			result = sb.toString();
		}
		return result;
	}

	private void escapeHtmlEntity(char value, StringBuilder sb) {
		if (htmlEntityEscapingRegex.matcher(String.valueOf(value)).find()) {
			String htmlName = htmlCharacterEntities.getName(value);
			if (htmlName != null) {
				sb.append(String.format("&%s;", htmlName));
			} else {
				sb.append(value);
			}
		} else {
			sb.append(value);
		}
	}

	@Override
	public IParameters getParameters() {
		return params;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy