All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencms.util.CmsHtmlConverter Maven / Gradle / Ivy

Go to download

OpenCms is an enterprise-ready, easy to use website content management system based on Java and XML technology. Offering a complete set of features, OpenCms helps content managers worldwide to create and maintain beautiful websites fast and efficiently.

There is a newer version: 18.0
Show newest version
/*
 * This library is part of OpenCms -
 * the Open Source Content Management System
 *
 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * For further information about Alkacon Software GmbH & Co. KG, please see the
 * company website: http://www.alkacon.com
 *
 * For further information about OpenCms, please see the
 * project website: http://www.opencms.org
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.opencms.util;

import org.opencms.file.CmsObject;
import org.opencms.file.CmsProperty;
import org.opencms.file.CmsPropertyDefinition;
import org.opencms.file.CmsResource;
import org.opencms.i18n.CmsEncoder;
import org.opencms.main.CmsException;
import org.opencms.main.CmsLog;
import org.opencms.main.OpenCms;

import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.logging.Log;

/**
 * HTML cleaner and pretty printer.

* * Used to clean up HTML code (e.g. remove word tags) and optionally create XHTML from HTML.

* * @since 6.0.0 */ public class CmsHtmlConverter { /** Parameter value for disabled mode. **/ public static final String PARAM_DISABLED = CmsStringUtil.FALSE; /** Parameter value for enabled mode. **/ public static final String PARAM_ENABLED = CmsStringUtil.TRUE; /** Parameter value for replace paragraph mode. */ public static final String PARAM_REPLACE_PARAGRAPHS = "replace-paragraphs"; /** Parameter value for WORD mode. **/ public static final String PARAM_WORD = "cleanup"; /** Parameter value for XHTML mode. **/ public static final String PARAM_XHTML = "xhtml"; /** The separator used for the configured modes String. */ public static final char SEPARATOR_MODES = ';'; /** The log object for this class. */ private static final Log LOG = CmsLog.getLog(CmsHtmlConverter.class); /** The encoding used for the HTML code conversion. */ private String m_encoding; /** The conversion mode for the converter. */ private String m_mode; /** * Constructor, creates a new CmsHtmlConverter.

* * The encoding used by default is {@link CmsEncoder#ENCODING_UTF_8}.

*/ public CmsHtmlConverter() { init(CmsEncoder.ENCODING_UTF_8, PARAM_ENABLED); } /** * Constructor, creates a new CmsHtmlConverter.

* * Possible values for the default conversion mode are:

    *
  • {@link #PARAM_DISABLED}: The conversion is disabled.
  • *
  • {@link #PARAM_ENABLED}: Conversion is enabled without transformation, so HTML is pretty printed only.
  • *
  • {@link #PARAM_XHTML}: Conversion from HTML to XHTML is enabled.
  • *
  • {@link #PARAM_WORD}: Cleanup of word like HTML tags is enabled.
  • *
  • Other values can be used by the implementing converter class.
  • *
* Values can be combined with the ; separator, so it is e.g. possible to convert * to XHTML and clean from word at the same time.

* * @param encoding the encoding used for the HTML code conversion * @param mode the conversion mode to use */ public CmsHtmlConverter(String encoding, String mode) { init(encoding, mode); } /** * Reads the content conversion property of a given resource and returns its value.

* * A default value (disabled) is returned if the property could not be read.

* * @param cms the CmsObject * @param resource the resource in the VFS * @return the content conversion property value */ public static String getConversionSettings(CmsObject cms, CmsResource resource) { // read the content-conversion property String contentConversion; try { String resourceName = cms.getSitePath(resource); CmsProperty contentConversionProperty = cms.readPropertyObject( resourceName, CmsPropertyDefinition.PROPERTY_CONTENT_CONVERSION, true); contentConversion = contentConversionProperty.getValue(CmsHtmlConverter.PARAM_DISABLED); } catch (CmsException e) { // if there was an error reading the property, choose a default value contentConversion = CmsHtmlConverter.PARAM_DISABLED; } return contentConversion; } /** * Tests if the content conversion is enabled.

* * @param conversionMode the content conversion mode string * @return true or false */ public static boolean isConversionEnabled(String conversionMode) { boolean value = true; if ((conversionMode == null) || (conversionMode.indexOf(PARAM_DISABLED) != -1)) { value = false; } return value; } /** * Converts the given HTML code according to the settings of this converter.

* * @param htmlInput HTML input stored in an array of bytes * @return array of bytes containing the converted HTML * * @throws UnsupportedEncodingException if the encoding set for the conversion is not supported */ public byte[] convertToByte(byte[] htmlInput) throws UnsupportedEncodingException { return convertToByte(new String(htmlInput, getEncoding())); } /** * Converts the given HTML code according to the settings of this converter.

* * @param htmlInput HTML input stored in a string * @return array of bytes containing the converted HTML * * @throws UnsupportedEncodingException if the encoding set for the conversion is not supported */ public byte[] convertToByte(String htmlInput) throws UnsupportedEncodingException { return convertToString(htmlInput).getBytes(getEncoding()); } /** * Converts the given HTML code according to the settings of this converter.

* * If an any error occurs during the conversion process, the original input is returned unmodified.

* * @param htmlInput HTML input stored in an array of bytes * @return array of bytes containing the converted HTML */ public byte[] convertToByteSilent(byte[] htmlInput) { try { return convertToByte(htmlInput); } catch (Exception e) { if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_CONVERSION_BYTE_FAILED_0), e); } return htmlInput; } } /** * Converts the given HTML code according to the settings of this converter.

* * If an any error occurs during the conversion process, the original input is returned unmodified.

* * @param htmlInput HTML input stored in a string * @return array of bytes containing the converted HTML */ public byte[] convertToByteSilent(String htmlInput) { try { return convertToByte(htmlInput.getBytes(getEncoding())); } catch (Exception e) { if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_CONVERSION_BYTE_FAILED_0), e); } try { return htmlInput.getBytes(getEncoding()); } catch (UnsupportedEncodingException e1) { if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_CONVERSION_BYTE_FAILED_0), e1); } return htmlInput.getBytes(); } } } /** * Converts the given HTML code according to the settings of this converter.

* * @param htmlInput HTML input stored in an array of bytes * @return string containing the converted HTML * * @throws UnsupportedEncodingException if the encoding set for the conversion is not supported */ public String convertToString(byte[] htmlInput) throws UnsupportedEncodingException { return convertToString(new String(htmlInput, getEncoding())); } /** * Converts the given HTML code according to the settings of the converter.

* * @param htmlInput HTML input stored in a string * @return string containing the converted HTML * * @throws UnsupportedEncodingException if the encoding set for the conversion is not supported */ public String convertToString(String htmlInput) throws UnsupportedEncodingException { // first: collect all converter classes to use on the input Map> converters = new HashMap>(); for (Iterator i = getModes().iterator(); i.hasNext();) { String mode = i.next(); String converterClass = OpenCms.getResourceManager().getHtmlConverter(mode); List modes = new ArrayList(); if (converters.containsKey(converterClass)) { // converter class already defined for a previous mode, get mode list modes = converters.get(converterClass); } // add mode name to list for the converter modes.add(mode); // store converter with modes in map converters.put(converterClass, modes); } // second: convert the content with all found converter classes for (Iterator>> i = converters.entrySet().iterator(); i.hasNext();) { Entry> entry = i.next(); String className = entry.getKey(); List modes = entry.getValue(); try { I_CmsHtmlConverter converter = (I_CmsHtmlConverter)Class.forName(className).newInstance(); // initialize converter converter.init(getEncoding(), modes); // convert input String htmlInput = converter.convertToString(htmlInput); } catch (ClassNotFoundException e) { LOG.error( org.opencms.loader.Messages.get().getBundle().key( org.opencms.loader.Messages.LOG_HTML_CONVERTER_CLASS_NOT_FOUND_1, className), e); } catch (IllegalAccessException e) { LOG.error( org.opencms.loader.Messages.get().getBundle().key( org.opencms.loader.Messages.LOG_HTML_CONVERTER_CLASS_NOT_FOUND_1, className), e); } catch (InstantiationException e) { LOG.error( org.opencms.loader.Messages.get().getBundle().key( org.opencms.loader.Messages.LOG_HTML_CONVERTER_CLASS_NOT_FOUND_1, className), e); } } return htmlInput; } /** * Converts the given HTML code according to the settings of this converter.

* * If an any error occurs during the conversion process, the original input is returned unmodified.

* * @param htmlInput HTML input stored in an array of bytes * * @return string containing the converted HTML */ public String convertToStringSilent(byte[] htmlInput) { try { return convertToString(htmlInput); } catch (Exception e) { if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_CONVERSION_BYTE_FAILED_0), e); } try { return new String(htmlInput, getEncoding()); } catch (UnsupportedEncodingException e1) { if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_CONVERSION_BYTE_FAILED_0), e1); } return new String(htmlInput); } } } /** * Converts the given HTML code according to the settings of this converter.

* * If an any error occurs during the conversion process, the original input is returned unmodified.

* * @param htmlInput HTML input stored in string * * @return string containing the converted HTML */ public String convertToStringSilent(String htmlInput) { try { return convertToString(htmlInput); } catch (Exception e) { if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_CONVERSION_BYTE_FAILED_0), e); } return htmlInput; } } /** * Returns the encoding used for the HTML code conversion.

* * @return the encoding used for the HTML code conversion */ public String getEncoding() { return m_encoding; } /** * Returns the conversion mode to use.

* * @return the conversion mode to use */ public String getMode() { return m_mode; } /** * Returns the conversion modes to use as List of String parameters.

* * @return the conversion modes to use as List of String parameters */ private List getModes() { List modes = new ArrayList(); try { modes = CmsStringUtil.splitAsList(getMode(), SEPARATOR_MODES, true); } catch (Exception e) { // error generating list, an empty list will be returned } return modes; } /** * Initializes the HTML converter instance.

* * Possible values for the conversion mode are dependent from the converter implementation.

* * Values can be combined with the ; separator, so that it is e.g. possible to convert * to XHTML and clean from word at the same time.

* * @param encoding the encoding used for the HTML code conversion * @param mode the conversion mode to use */ private void init(String encoding, String mode) { if (encoding == null) { m_encoding = CmsEncoder.ENCODING_UTF_8; } else { m_encoding = encoding; } if (CmsStringUtil.isEmptyOrWhitespaceOnly(mode)) { m_mode = ""; } else { m_mode = mode; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy