
net.sf.okapi.common.encoder.MarkdownEncoder Maven / Gradle / Ivy
package net.sf.okapi.common.encoder;
import net.sf.okapi.common.HTMLCharacterEntities;
import net.sf.okapi.common.IParameters;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MarkdownEncoder extends DefaultEncoder {
private final Logger LOGGER = LoggerFactory.getLogger(MarkdownEncoder.class);
private boolean escapeSpecialMarkdownCharacters;
private String markdownCharactersToEscape;
private String htmlEntitiesToEscape;
private IParameters params;
public static final String DEFAULT_CHARACTERS_TO_ESCAPE = "*_`{}[]<>()#+\\-.!|";
private Pattern escapingRegex;
private Pattern htmlEntityEscapingRegex;
private final HTMLCharacterEntities htmlCharacterEntities;
public MarkdownEncoder() {
super();
escapeSpecialMarkdownCharacters = false;
markdownCharactersToEscape = DEFAULT_CHARACTERS_TO_ESCAPE;
htmlEntitiesToEscape = "";
htmlCharacterEntities = new HTMLCharacterEntities();
htmlCharacterEntities.ensureInitialization(true);
}
@Override
public void setOptions(IParameters params, String encoding, String lineBreak) {
this.params = params;
if (params != null) {
escapeSpecialMarkdownCharacters = params.getBoolean("unescapeBackslashCharacters");
markdownCharactersToEscape = params.getString("charactersToEscape");
htmlEntitiesToEscape = params.getString("htmlEntitiesToEscape");
if (escapeSpecialMarkdownCharacters) {
escapingRegex = buildEscapingRegex();
}
if (!htmlEntitiesToEscape.isEmpty()) {
htmlEntityEscapingRegex = buildHtmlEntityEscapingRegex();
}
}
setLineBreak(lineBreak);
}
@Override
public String encode(String text, EncoderContext context) {
if (text == null)
return "";
String result = text.replace("\n", getLineBreak());
// escaping HTML entities takes priority over special markdown characters.
result = escapeHtmlEntities(result);
if (escapeSpecialMarkdownCharacters) {
result = result.replaceAll(escapingRegex.pattern(), "\\\\$1");
}
return result;
}
@Override
public String encode(char value,
EncoderContext context) {
String result = String.valueOf(value).replace("\n", getLineBreak());
// escaping HTML entities takes priority over special markdown characters.
if (!htmlEntitiesToEscape.isEmpty()) {
StringBuilder sb = new StringBuilder();
escapeHtmlEntity(value, sb);
result = sb.toString();
}
if (escapeSpecialMarkdownCharacters) {
result = result.replaceAll(escapingRegex.pattern(), "\\\\$1");
}
return result;
}
@Override
public String encode(int value,
EncoderContext context) {
String result;
if (Character.isSupplementaryCodePoint(value)) {
result = new String(Character.toChars(value)).replace("\n", getLineBreak());
} else {
result = String.valueOf((char) value).replace("\n", getLineBreak());
}
// escaping HTML entities takes priority over special markdown characters.
result = escapeHtmlEntities(result);
if (escapeSpecialMarkdownCharacters) {
result = result.replaceAll(escapingRegex.pattern(), "\\\\$1");
}
return result;
}
private Pattern buildEscapingRegex() {
try {
return Pattern.compile("([" + Pattern.quote(markdownCharactersToEscape) + "])");
} catch (PatternSyntaxException e) {
LOGGER.warn("Invalid charactersToEscape value: {}", markdownCharactersToEscape);
return Pattern.compile("([" + Pattern.quote(DEFAULT_CHARACTERS_TO_ESCAPE) + "])");
}
}
private Pattern buildHtmlEntityEscapingRegex() {
try {
return Pattern.compile("([" + Pattern.quote(htmlEntitiesToEscape) + "])");
} catch (PatternSyntaxException e) {
LOGGER.warn("Invalid htmlEntitiesToEscape value: {}", htmlEntitiesToEscape);
return Pattern.compile("");
}
}
private String escapeHtmlEntities(String result) {
if (!htmlEntitiesToEscape.isEmpty()) {
StringBuilder sb = new StringBuilder();
for (char c : result.toCharArray()) {
escapeHtmlEntity(c, sb);
}
result = sb.toString();
}
return result;
}
private void escapeHtmlEntity(char value, StringBuilder sb) {
if (htmlEntityEscapingRegex.matcher(String.valueOf(value)).find()) {
String htmlName = htmlCharacterEntities.getName(value);
if (htmlName != null) {
sb.append(String.format("&%s;", htmlName));
} else {
sb.append(value);
}
} else {
sb.append(value);
}
}
@Override
public IParameters getParameters() {
return params;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy