All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.squeakysand.commons.html.HtmlUtils Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2010-2012 Craig S. Dickson (http://craigsdickson.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.squeakysand.commons.html;

import org.apache.commons.lang.StringEscapeUtils;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HtmlUtils {

    private static final Logger LOG = LoggerFactory.getLogger(HtmlUtils.class);

    public static String escape(String plainText) {
        return StringEscapeUtils.escapeHtml(plainText);
    }

    /**
     * Removes all HTML tags and just returns the text elements around the tags.
     *
     * @param html string containing HTML markup tags.
     * @return string with all HTML tags removed. This string will still contain HTML entity references however, see {@link #unescapeHtml(java.lang.String)}.
     */
    public static String removeHtmlTags(String html) {
        Document document = Jsoup.parse(html);
        String result = document.text();
        LOG.debug("'{}' became '{}'", html, result);
        return result;
    }

    /**
     * Replaces any HTML entity references with their plain text representation.
     *
     * @param escapedHtml string containing HTML entity references.
     * @return string with HTML entities replaced with plain text equivalents.
     */
    public static String unescapeHtml(String escapedHtml) {
        String result = StringEscapeUtils.unescapeHtml(escapedHtml);
        LOG.debug("'{}' unescaped to '{}'", escapedHtml, result);
        return result;
    }

    public static String toPlainText(String html) {
        String result = removeHtmlTags(html);
        result = unescapeHtml(result);
        LOG.debug("'{}' converted to '{}'", html, result);
        return result;
    }

    /**
     * Crops the content of the HTML to the specified length (ignoring HTML tag characters and entity reference characters)
     * and resolves any open tag issues created by the crop.
     *
     * @param html the HTML text to crop
     *
     * @return a HTML string cropped to the specified length with all tags closed.
     */
    public static String smartCrop(String html, int maxLength) {
        throw new RuntimeException();
    }

    public static String smartCrop(String html, int maxLength, String suffix) {
        throw new RuntimeException();
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy