org.symphonyoss.client.util.MlMessageParser Maven / Gradle / Ivy
The newest version!
/*
*
* Copyright 2016 The Symphony Software Foundation
*
* Licensed to The Symphony Software Foundation (SSF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.symphonyoss.client.util;
import org.apache.commons.lang3.StringEscapeUtils;
import org.jsoup.nodes.Attribute;
import org.jsoup.select.Elements;
import org.symphonyoss.client.model.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.symphonyoss.client.SymphonyClient;
import org.symphonyoss.client.exceptions.SymException;
import org.symphonyoss.client.exceptions.UsersClientException;
import org.symphonyoss.symphony.clients.model.SymUser;
import org.xml.sax.helpers.DefaultHandler;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Provides simple utility to parse MlMessage formatted text
*
* @author Frank Tarsillo
*/
public class MlMessageParser extends DefaultHandler {
private String messageMl;
private Element elementMessageML;
private Document doc;
private Document originalDoc;
private StringBuilder textDoc = new StringBuilder();
private String[] textChunks;
private SymphonyClient symClient;
private final Logger logger = LoggerFactory.getLogger(MlMessageParser.class);
public MlMessageParser(SymphonyClient symClient) {
this.symClient = symClient;
}
public MlMessageParser() {
}
public void parseMessage(String message) throws SymException {
Document doc = Jsoup.parse(message);
originalDoc = doc.clone();
Element elementErrors = doc.body().getElementsByTag("errors").first();
if (elementErrors != null) {
if (elementErrors.outerHtml() != null)
logger.debug("Errors found in message: {}", elementErrors.outerHtml());
}
//Lets remove the errors elements
doc.select("errors").remove();
elementMessageML = doc.select("messageML").first();
if(elementMessageML==null)
elementMessageML = doc.select("div").first();
if (elementMessageML != null) {
if (elementMessageML.outerHtml() != null)
logger.debug("Doc parsed: {}", elementMessageML.outerHtml());
} else {
logger.error("Could not parse document for message {}", message);
throw new SymException("Malformed message");
}
textDoc = new StringBuilder();
stripTags(textDoc, elementMessageML.childNodes());
textChunks = textDoc.toString().split("\\s+");
}
public String[] getTextChunks() {
return textChunks;
}
@SuppressWarnings("unused")
public void setTextChunks(String[] textChunks) {
this.textChunks = textChunks;
}
private void stripTags(StringBuilder builder, List nodesList) {
for (Node node : nodesList) {
String nodeName = node.nodeName();
if (nodeName.equalsIgnoreCase("#text")) {
builder.append(node.toString().trim()).append(" ");
} else {
if (nodeName.equalsIgnoreCase(NodeTypes.ANCHOR.toString())) {
if (node.attributes().hasKey(AttribTypes.HREF.toString()))
builder.append(node.attr(AttribTypes.HREF.toString()));
} else if (nodeName.equalsIgnoreCase(NodeTypes.HASHTAG.toString())) {
if (node.attributes().hasKey(AttribTypes.TAG.toString()))
builder.append("#").append(node.attr(AttribTypes.TAG.toString())).append(" ");
} else if (nodeName.equalsIgnoreCase(NodeTypes.MENTION.toString())) {
SymUser user = new SymUser();
user.setEmailAddress("UID:" + node.attr(AttribTypes.UID.toString()));
user.setId(Long.valueOf(node.attr(AttribTypes.UID.toString())));
if (node.attributes().hasKey(AttribTypes.UID.toString())) {
if (symClient != null)
try {
user = symClient.getUsersClient().getUserFromId(Long.valueOf(node.attr(AttribTypes.UID.toString())));
} catch (UsersClientException e) {
logger.error("Could not identify user from userID", e);
}
} else if (node.attributes().hasKey(AttribTypes.EMAIL.toString())) {
user.setEmailAddress(node.attr(AttribTypes.EMAIL.toString()));
}
builder.append(user.getEmailAddress());
} else if (nodeName.equalsIgnoreCase(NodeTypes.CASHTAG.toString())) {
if (node.attributes().hasKey(AttribTypes.TAG.toString()))
builder.append("$").append(node.attr(AttribTypes.TAG.toString())).append(" ");
} else {
// recurse
stripTags(builder, node.childNodes());
}
}
}
}
public String getText() {
return textDoc.toString();
}
@SuppressWarnings("unused")
public String getOuterHtml() {
return originalDoc.outerHtml();
}
public String getHtmlStartingFromText(String text) {
StringBuilder stringBuilder = new StringBuilder();
getHtmlStartingFromText(text, stringBuilder, elementMessageML.childNodes(), false);
return stringBuilder.toString();
}
public String getHtmlStartingFromNode(String nodeType, String attrib, String attribValue) {
StringBuilder stringBuilder = new StringBuilder();
getHtmlStartingFromNode(nodeType, attrib, attribValue, stringBuilder, elementMessageML.childNodes(), false);
return stringBuilder.toString();
}
public void getHtmlStartingFromText(String text, StringBuilder builder, List nodesList, boolean append) {
for (Node node : nodesList) {
String nodeName = node.nodeName();
if (append) {
builder.append(node.outerHtml());
continue;
}
if (nodeName.equalsIgnoreCase("#text")) {
if (node.toString().trim().equalsIgnoreCase(text))
append = true;
}
getHtmlStartingFromText(text, builder, node.childNodes(), append);
}
}
private void getHtmlStartingFromNode(String nodeType, String attrib, String attribValue, StringBuilder builder, List nodesList, boolean append) {
for (Node node : nodesList) {
String nodeName = node.nodeName();
if (append) {
if (node.nodeName().equalsIgnoreCase("#text") && node.outerHtml().charAt(0) != ' ')
builder.append(" ");
builder.append(node.outerHtml());
if (!node.nodeName().equalsIgnoreCase("#text"))
builder.append(" ");
continue;
}
if (nodeName.equalsIgnoreCase(nodeType)) {
if (node.attributes().hasKey(attrib) && node.attr(attrib).equalsIgnoreCase(attribValue))
append = true;
}
getHtmlStartingFromNode(nodeType, attrib, attribValue, builder, node.childNodes(), append);
}
}
public void updateMentionUidToEmail(SymphonyClient symClient) {
updateMentionUidToEmail(symClient, elementMessageML.childNodes());
}
//Terrible that Symphony publishes UID on mention but only allows EMAIL on message submission.
private void updateMentionUidToEmail(SymphonyClient symClient, List nodesList) {
for (Node node : nodesList) {
String nodeName = node.nodeName();
if (nodeName.equalsIgnoreCase(NodeTypes.MENTION.toString())) {
if (node.attributes().hasKey(AttribTypes.UID.toString())) {
String uid = node.attr(AttribTypes.UID.toString());
SymUser user = null;
try {
user = symClient.getUsersClient().getUserFromId(Long.parseLong(uid));
logger.info("Translated mention uid {} to email {}", uid, user.getEmailAddress());
} catch (UsersClientException e) {
logger.error("Could not identify user email from id", e);
}
if (user != null && user.getEmailAddress() != null) {
uid = user.getEmailAddress();
}
Attribute emailAttribute = new Attribute(AttribTypes.EMAIL.toString(), uid);
node.attributes().put(emailAttribute);
node.removeAttr(AttribTypes.UID.toString());
}
}
updateMentionUidToEmail(symClient, node.childNodes());
}
}
@SuppressWarnings("unused")
public Elements getAllElements() {
return elementMessageML.getAllElements();
}
@SuppressWarnings("unused")
public List getChildNodes() {
return elementMessageML.childNodes();
}
/**
* Experimental - will attempt to escape all text within xml elements
* @param xml full xml
* @return escaped xml string
*/
public static String escapeAllXml(String xml)
{
// Match the pattern text
Pattern xmlCleanerPattern = Pattern.compile("(<[^/<>]*>)([^<>]*)([^<>]*>)");
StringBuilder xmlStringBuilder = new StringBuilder();
Matcher matcher = xmlCleanerPattern.matcher(xml);
int lastEnd = 0;
while (matcher.find())
{
// Include any non-matching text between this result and the previous result
if (matcher.start() > lastEnd) {
xmlStringBuilder.append(xml.substring(lastEnd, matcher.start()));
}
lastEnd = matcher.end();
// Sanitise the characters inside the tags and append the sanitised version
String cleanText = StringEscapeUtils.escapeXml(matcher.group(2));
xmlStringBuilder.append(matcher.group(1)).append(cleanText).append(matcher.group(3));
}
// Include any leftover text after the last result
xmlStringBuilder.append(xml.substring(lastEnd));
return xmlStringBuilder.toString();
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy