net.yapbam.util.HtmlUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of yapbam-commons Show documentation
Commons Yapbam classes used by desktop and Android versions.
There is a newer version: 1.9.1
package net.yapbam.util;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringEscapeUtils;

/** Some utility methods on html Strings.
 * 
 * @author Jean-Marc Astesana
 * License GPL v3
 */
public abstract class HtmlUtils {
	private static final Pattern P = Pattern.compile("\\[([^\\[\\]]*)\\[([^\\]]+)\\]\\]");
	private static final Mangler MANGLER = new Mangler("[]",'_');
	public static final String START_TAG = "";
	public static final String END_TAG = "";
	public static final String START_BODY_TAG = "";
	public static final String END_BODY_TAG = "";
	public static final String NEW_LINE_TAG = "
";
	public static final String NON_BREAKING_SPACE = " ";

	private HtmlUtils() {
		// To prevent instantiation
		super();
	}
	
	/** Removes the <html> and </html> tag respectively at the beginning and the end of a string.
	 * @param text The string to process
	 * @return the string without the html tags, or the trimmed string if it doesn't not contains the tags.
	 */
	public static String removeHtmlTags (String text) {
		text = text.trim();
		String upper = text.toUpperCase();
		if (upper.startsWith(START_TAG) && upper.endsWith(END_TAG)) {
			text = text.substring(START_TAG.length());
			text = text.substring(0, text.length()-END_TAG.length());
		}
		return text;
	}
	
	/** Converts an encoded string to html.
	 * @param content The encoded content.
Links are encoded with the following syntax [text[url]].
	 * 
If text is omitted, url is used as text.
	 * 
Examples:
	 * This is a [link to Google[http://www.google.com]] -> This is a <a href="http://www.google.com">link to Google</a>
	 * Try [[http://www.google.com]] -> Try <a href="http://www.google.com">http://www.google.com</a>
	 * 
	 * @return the html text corresponding to the encoded content. The returned string does not contains "<html></html>" tags around the html generated content. 
	 */
	public static String toHtml(String content) {
		Matcher m = P.matcher(content);
		StringBuilder sb = new StringBuilder();
		int previousEnd = 0;
		while (m.find()) {
			if (previousEnd!=m.start()) {
				sb.append(StringEscapeUtils.escapeHtml3(content.substring(previousEnd, m.start())));
			}
			previousEnd = m.end();
			if (isValidURL(m.group(2))) {
				sb.append(getHTMLLink(m.group(1), m.group(2)));
			} else {
				sb.append(content.substring(m.start(), previousEnd));
			}
		}
		if (previousEnd" + StringEscapeUtils.escapeHtml3(name.isEmpty() ? url : MANGLER.unmangle(name)) + "";
	}

	public static Matcher getLink(String encodedContent, int start, int end) {
		Matcher m = P.matcher(encodedContent);
		while (m.find()) {
			if (m.start()