All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.googlecode.htmlcompressor.compressor.XmlCompressor Maven / Gradle / Ivy

Go to download

HtmlCompressor is a small, fast and very easy to use Java library that minifies given HTML or XML source by removing extra whitespaces, comments and other unneeded characters without breaking the content structure. As a result pages become smaller in size and load faster. A command-line version of the compressor is also available.

There is a newer version: 2.0.2
Show newest version
/**
 *    Copyright 2009-2020 the original author or authors.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */
package com.googlecode.htmlcompressor.compressor;

import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Class that compresses given XML source by removing comments, extra spaces and line breaks while preserving content
 * within CDATA blocks.
 *
 * @author Sergiy Kovalchuk
 */
public class XmlCompressor implements Compressor {

    /** The enabled. */
    private boolean enabled = true;

    /** The remove comments. */
    // default settings
    private boolean removeComments = true;

    /** The remove intertag spaces. */
    private boolean removeIntertagSpaces = true;

    /** The Constant tempCdataBlock. */
    // temp replacements for preserved blocks
    protected static final String TEMP_CD_DATA_BLOCK = "%%%COMPRESS~CDATA~{0,number,#}%%%";

    /** The Constant cdataPattern. */
    // compiled regex patterns
    protected static final Pattern cdataPattern = Pattern.compile("",
            Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

    /** The Constant commentPattern. */
    protected static final Pattern commentPattern = Pattern.compile("",
            Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

    /** The Constant intertagPattern. */
    protected static final Pattern intertagPattern = Pattern.compile(">\\s+<",
            Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

    /** The Constant tagEndSpacePattern. */
    protected static final Pattern tagEndSpacePattern = Pattern.compile("(<(?:[^>]+?))(?:\\s+?)(/?>)",
            Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

    /** The Constant multispacePattern. */
    protected static final Pattern multispacePattern = Pattern.compile("\\s+(?=[^<]*?>)",
            Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

    /** The Constant tagPropertyPattern. */
    protected static final Pattern tagPropertyPattern = Pattern.compile("(\\s\\w+)\\s*=\\s*(?=[^<]*?>)",
            Pattern.CASE_INSENSITIVE);

    /** The Constant tempCdataPattern. */
    protected static final Pattern tempCdataPattern = Pattern.compile("%%%COMPRESS~CDATA~(\\d+?)%%%",
            Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

    /**
     * The main method that compresses given XML source and returns compressed result.
     *
     * @param xml
     *            XML content to compress
     * @return compressed content.
     */
    @Override
    public String compress(String xml) {
        if (!enabled || xml == null || xml.length() == 0) {
            return xml;
        }

        // preserved block containers
        List cdataBlocks = new ArrayList<>();

        // preserve blocks
        xml = preserveBlocks(xml, cdataBlocks);

        // process pure xml
        xml = processXml(xml);

        // return preserved blocks
        xml = returnBlocks(xml, cdataBlocks);

        return xml.trim();
    }

    /**
     * Preserve blocks.
     *
     * @param xml
     *            the xml
     * @param cdataBlocks
     *            the cdata blocks
     * @return the string
     */
    protected String preserveBlocks(String xml, List cdataBlocks) {
        // preserve CDATA blocks
        Matcher matcher = cdataPattern.matcher(xml);
        int index = 0;
        StringBuffer sb = new StringBuffer();
        while (matcher.find()) {
            cdataBlocks.add(matcher.group(0));
            matcher.appendReplacement(sb, MessageFormat.format(TEMP_CD_DATA_BLOCK, index++));
        }
        matcher.appendTail(sb);
        xml = sb.toString();

        return xml;
    }

    /**
     * Return blocks.
     *
     * @param xml
     *            the xml
     * @param cdataBlocks
     *            the cdata blocks
     * @return the string
     */
    protected String returnBlocks(String xml, List cdataBlocks) {
        // put CDATA blocks back
        Matcher matcher = tempCdataPattern.matcher(xml);
        StringBuffer sb = new StringBuffer();
        while (matcher.find()) {
            matcher.appendReplacement(sb,
                    Matcher.quoteReplacement(cdataBlocks.get(Integer.parseInt(matcher.group(1)))));
        }
        matcher.appendTail(sb);
        xml = sb.toString();

        return xml;
    }

    /**
     * Process xml.
     *
     * @param xml
     *            the xml
     * @return the string
     */
    protected String processXml(String xml) {
        // remove comments
        xml = removeComments(xml);

        // remove inter-tag spaces
        xml = removeIntertagSpaces(xml);

        // remove unneeded spaces inside tags
        xml = removeSpacesInsideTags(xml);

        return xml;
    }

    /**
     * Removes the spaces inside tags.
     *
     * @param xml
     *            the xml
     * @return the string
     */
    protected String removeSpacesInsideTags(String xml) {
        // replace miltiple spaces inside tags with single spaces
        xml = multispacePattern.matcher(xml).replaceAll(" ");

        // remove spaces around equal sign inside tags
        xml = tagPropertyPattern.matcher(xml).replaceAll("$1=");

        // remove ending spaces inside tags
        xml = tagEndSpacePattern.matcher(xml).replaceAll("$1$2");
        return xml;
    }

    /**
     * Removes the intertag spaces.
     *
     * @param xml
     *            the xml
     * @return the string
     */
    protected String removeIntertagSpaces(String xml) {
        // remove inter-tag spaces
        if (removeIntertagSpaces) {
            xml = intertagPattern.matcher(xml).replaceAll("><");
        }
        return xml;
    }

    /**
     * Removes the comments.
     *
     * @param xml
     *            the xml
     * @return the string
     */
    protected String removeComments(String xml) {
        // remove comments
        if (removeComments) {
            xml = commentPattern.matcher(xml).replaceAll("");
        }
        return xml;
    }

    /**
     * Returns true if compression is enabled.
     *
     * @return true if compression is enabled.
     */
    public boolean isEnabled() {
        return enabled;
    }

    /**
     * If set to false all compression will be bypassed. Might be useful for testing purposes. Default is
     * true.
     *
     * @param enabled
     *            set false to bypass all compression
     */
    public void setEnabled(boolean enabled) {
        this.enabled = enabled;
    }

    /**
     * Returns true if all XML comments will be removed.
     *
     * @return true if all XML comments will be removed
     */
    public boolean isRemoveComments() {
        return removeComments;
    }

    /**
     * If set to true all XML comments will be removed. Default is true.
     *
     * @param removeComments
     *            set true to remove all XML comments
     */
    public void setRemoveComments(boolean removeComments) {
        this.removeComments = removeComments;
    }

    /**
     * Returns true if all inter-tag whitespace characters will be removed.
     *
     * @return true if all inter-tag whitespace characters will be removed.
     */
    public boolean isRemoveIntertagSpaces() {
        return removeIntertagSpaces;
    }

    /**
     * If set to true all inter-tag whitespace characters will be removed. Default is true.
     *
     * @param removeIntertagSpaces
     *            set true to remove all inter-tag whitespace characters
     */
    public void setRemoveIntertagSpaces(boolean removeIntertagSpaces) {
        this.removeIntertagSpaces = removeIntertagSpaces;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy