
com.googlecode.htmlcompressor.compressor.XmlCompressor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of htmlcompressor Show documentation
Show all versions of htmlcompressor Show documentation
HtmlCompressor is a small, fast and very easy to use Java library that minifies given HTML or XML source by
removing extra whitespaces, comments and other unneeded characters without breaking the content structure.
As a result pages become smaller in size and load faster. A command-line version of the compressor is also
available.
/**
* Copyright 2009-2020 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.googlecode.htmlcompressor.compressor;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Class that compresses given XML source by removing comments, extra spaces and line breaks while preserving content
* within CDATA blocks.
*
* @author Sergiy Kovalchuk
*/
public class XmlCompressor implements Compressor {
/** The enabled. */
private boolean enabled = true;
/** The remove comments. */
// default settings
private boolean removeComments = true;
/** The remove intertag spaces. */
private boolean removeIntertagSpaces = true;
/** The Constant tempCdataBlock. */
// temp replacements for preserved blocks
protected static final String TEMP_CD_DATA_BLOCK = "%%%COMPRESS~CDATA~{0,number,#}%%%";
/** The Constant cdataPattern. */
// compiled regex patterns
protected static final Pattern cdataPattern = Pattern.compile("",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant commentPattern. */
protected static final Pattern commentPattern = Pattern.compile("",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant intertagPattern. */
protected static final Pattern intertagPattern = Pattern.compile(">\\s+<",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant tagEndSpacePattern. */
protected static final Pattern tagEndSpacePattern = Pattern.compile("(<(?:[^>]+?))(?:\\s+?)(/?>)",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant multispacePattern. */
protected static final Pattern multispacePattern = Pattern.compile("\\s+(?=[^<]*?>)",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant tagPropertyPattern. */
protected static final Pattern tagPropertyPattern = Pattern.compile("(\\s\\w+)\\s*=\\s*(?=[^<]*?>)",
Pattern.CASE_INSENSITIVE);
/** The Constant tempCdataPattern. */
protected static final Pattern tempCdataPattern = Pattern.compile("%%%COMPRESS~CDATA~(\\d+?)%%%",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/**
* The main method that compresses given XML source and returns compressed result.
*
* @param xml
* XML content to compress
* @return compressed content.
*/
@Override
public String compress(String xml) {
if (!enabled || xml == null || xml.length() == 0) {
return xml;
}
// preserved block containers
List cdataBlocks = new ArrayList<>();
// preserve blocks
xml = preserveBlocks(xml, cdataBlocks);
// process pure xml
xml = processXml(xml);
// return preserved blocks
xml = returnBlocks(xml, cdataBlocks);
return xml.trim();
}
/**
* Preserve blocks.
*
* @param xml
* the xml
* @param cdataBlocks
* the cdata blocks
* @return the string
*/
protected String preserveBlocks(String xml, List cdataBlocks) {
// preserve CDATA blocks
Matcher matcher = cdataPattern.matcher(xml);
int index = 0;
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
cdataBlocks.add(matcher.group(0));
matcher.appendReplacement(sb, MessageFormat.format(TEMP_CD_DATA_BLOCK, index++));
}
matcher.appendTail(sb);
xml = sb.toString();
return xml;
}
/**
* Return blocks.
*
* @param xml
* the xml
* @param cdataBlocks
* the cdata blocks
* @return the string
*/
protected String returnBlocks(String xml, List cdataBlocks) {
// put CDATA blocks back
Matcher matcher = tempCdataPattern.matcher(xml);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(sb,
Matcher.quoteReplacement(cdataBlocks.get(Integer.parseInt(matcher.group(1)))));
}
matcher.appendTail(sb);
xml = sb.toString();
return xml;
}
/**
* Process xml.
*
* @param xml
* the xml
* @return the string
*/
protected String processXml(String xml) {
// remove comments
xml = removeComments(xml);
// remove inter-tag spaces
xml = removeIntertagSpaces(xml);
// remove unneeded spaces inside tags
xml = removeSpacesInsideTags(xml);
return xml;
}
/**
* Removes the spaces inside tags.
*
* @param xml
* the xml
* @return the string
*/
protected String removeSpacesInsideTags(String xml) {
// replace miltiple spaces inside tags with single spaces
xml = multispacePattern.matcher(xml).replaceAll(" ");
// remove spaces around equal sign inside tags
xml = tagPropertyPattern.matcher(xml).replaceAll("$1=");
// remove ending spaces inside tags
xml = tagEndSpacePattern.matcher(xml).replaceAll("$1$2");
return xml;
}
/**
* Removes the intertag spaces.
*
* @param xml
* the xml
* @return the string
*/
protected String removeIntertagSpaces(String xml) {
// remove inter-tag spaces
if (removeIntertagSpaces) {
xml = intertagPattern.matcher(xml).replaceAll("><");
}
return xml;
}
/**
* Removes the comments.
*
* @param xml
* the xml
* @return the string
*/
protected String removeComments(String xml) {
// remove comments
if (removeComments) {
xml = commentPattern.matcher(xml).replaceAll("");
}
return xml;
}
/**
* Returns true
if compression is enabled.
*
* @return true
if compression is enabled.
*/
public boolean isEnabled() {
return enabled;
}
/**
* If set to false
all compression will be bypassed. Might be useful for testing purposes. Default is
* true
.
*
* @param enabled
* set false
to bypass all compression
*/
public void setEnabled(boolean enabled) {
this.enabled = enabled;
}
/**
* Returns true
if all XML comments will be removed.
*
* @return true
if all XML comments will be removed
*/
public boolean isRemoveComments() {
return removeComments;
}
/**
* If set to true
all XML comments will be removed. Default is true
.
*
* @param removeComments
* set true
to remove all XML comments
*/
public void setRemoveComments(boolean removeComments) {
this.removeComments = removeComments;
}
/**
* Returns true
if all inter-tag whitespace characters will be removed.
*
* @return true
if all inter-tag whitespace characters will be removed.
*/
public boolean isRemoveIntertagSpaces() {
return removeIntertagSpaces;
}
/**
* If set to true
all inter-tag whitespace characters will be removed. Default is true
.
*
* @param removeIntertagSpaces
* set true
to remove all inter-tag whitespace characters
*/
public void setRemoveIntertagSpaces(boolean removeIntertagSpaces) {
this.removeIntertagSpaces = removeIntertagSpaces;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy