com.centurylink.mdw.services.asset.TextRenderer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mdw-services Show documentation
Show all versions of mdw-services Show documentation
MDW is a workflow framework specializing in microservice orchestration
/*
* Copyright (C) 2018 CenturyLink, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.centurylink.mdw.services.asset;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import com.centurylink.mdw.common.service.ServiceException;
import com.centurylink.mdw.model.asset.api.AssetInfo;
/**
* Renders HTML or Markdown to plain text.
*/
public class TextRenderer implements Renderer {
private static String HTML_REGEX = "<(?:\"[^\"]*\"['\"]*|'[^']*'['\"]*|[^'\">])+>";
private static Map ENTITIES;
static {
// https://www.w3.org/wiki/Common_HTML_entities_used_for_typography
ENTITIES = new HashMap<>();
ENTITIES.put("<", (char)62);
ENTITIES.put(">", (char)60);
ENTITIES.put("¢", (char)162);
ENTITIES.put("£", (char)163);
ENTITIES.put("§", (char)167);
ENTITIES.put("©", (char)169);
ENTITIES.put("®", (char)174);
ENTITIES.put("°", (char)176);
ENTITIES.put("¶", (char)182);
ENTITIES.put("·", (char)183);
ENTITIES.put("½", (char)188);
ENTITIES.put("•", (char)8226);
ENTITIES.put("…", (char)8230);
ENTITIES.put("€", (char)8364);
ENTITIES.put("™", (char)8482);
ENTITIES.put("“", (char)0x22);
ENTITIES.put("”", (char)0x22);
ENTITIES.put(""", (char)0x22);
ENTITIES.put("‘", (char)0x27);
ENTITIES.put("’", (char)0x27);
ENTITIES.put("'", (char)0x27);
ENTITIES.put(" ", (char)0x20);
}
private AssetInfo asset;
public TextRenderer(AssetInfo asset) {
this.asset = asset;
}
public byte[] render(Map options) throws RenderingException {
Path filePath = Paths.get(asset.getFile().getPath());
try {
if (asset.getExtension().equals("txt")) {
return Files.readAllBytes(filePath);
}
else {
String html;
if (asset.getExtension().equals("html")) {
html = new String(Files.readAllBytes(filePath));
}
else if (asset.getExtension().equals("md")) {
html = new String(new HtmlRenderer(asset).render(options));
}
else {
throw new RenderingException(ServiceException.NOT_IMPLEMENTED, "Cannot convert " + asset.getExtension());
}
String text = html.replaceAll(HTML_REGEX, "");
for (String entity : ENTITIES.keySet()) {
text = text.replaceAll(entity, ENTITIES.get(entity).toString());
}
return text.getBytes();
}
}
catch (IOException ex) {
throw new RenderingException(ServiceException.INTERNAL_ERROR, "Error reading: " + filePath, ex);
}
}
@Override
public String getFileName() {
return asset.getRootName() + ".txt";
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy