com.googlecode.htmlcompressor.compressor.HtmlCompressor Maven / Gradle / Ivy
Show all versions of htmlcompressor Show documentation
/*
* Copyright 2009-2022 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.googlecode.htmlcompressor.compressor;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.mozilla.javascript.ErrorReporter;
/**
* Class that compresses given HTML source by removing comments, extra spaces and lin<pre> while preserving
* content within <pre>, <textarea>, <script> and <style> tags.
*
* Blocks that should be additionally preserved could be marked with:
* <!-- {{{ -->
*
...
*
<!-- }}} -->
* or any number of user defined patterns.
*
* Content inside <script> or <style> tags could be optionally compressed using
* Yahoo YUI Compressor or
* Google Closure Compiler libraries.
*
* @author Sergiy Kovalchuk
*/
public class HtmlCompressor implements Compressor {
/** The Constant JS_COMPRESSOR_YUI. */
public static final String JS_COMPRESSOR_YUI = "yui";
/** The Constant JS_COMPRESSOR_CLOSURE. */
public static final String JS_COMPRESSOR_CLOSURE = "closure";
/**
* Predefined pattern that matches <?php ... ?>
tags. Could be passed inside a list to
* {@link #setPreservePatterns(List) setPreservePatterns} method.
*/
public static final Pattern PHP_TAG_PATTERN = Pattern.compile("<\\?php.*?\\?>",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/**
* Predefined pattern that matches <% ... %>
tags. Could be passed inside a list to
* {@link #setPreservePatterns(List) setPreservePatterns} method.
*/
public static final Pattern SERVER_SCRIPT_TAG_PATTERN = Pattern.compile("<%.*?%>", Pattern.DOTALL);
/**
* Predefined pattern that matches <--# ... -->
tags. Could be passed inside a list to
* {@link #setPreservePatterns(List) setPreservePatterns} method.
*/
public static final Pattern SERVER_SIDE_INCLUDE_PATTERN = Pattern.compile("", Pattern.DOTALL);
/**
* Predefined list of tags that are very likely to be block-level. Could be passed to
* {@link #setRemoveSurroundingSpaces(String) setRemoveSurroundingSpaces} method.
*/
public static final String BLOCK_TAGS_MIN = "html,head,body,br,p";
/**
* Predefined list of tags that are block-level by default, excluding <div>
and
* <li>
tags. Table tags are also included. Could be passed to
* {@link #setRemoveSurroundingSpaces(String) setRemoveSurroundingSpaces} method.
*/
public static final String BLOCK_TAGS_MAX = BLOCK_TAGS_MIN
+ ",h1,h2,h3,h4,h5,h6,blockquote,center,dl,fieldset,form,frame,frameset,hr,noframes,ol,table,tbody,tr,td,th,tfoot,thead,ul";
/**
* Could be passed to {@link #setRemoveSurroundingSpaces(String) setRemoveSurroundingSpaces} method to remove all
* surrounding spaces (not recommended).
*/
public static final String ALL_TAGS = "all";
/** The enabled. */
private boolean enabled = true;
// javascript and css compressor implementations
/** The java script compressor. */
private Compressor javaScriptCompressor;
/** The css compressor. */
private Compressor cssCompressor;
// default settings
/** The remove comments. */
private boolean removeComments = true;
/** The remove multi spaces. */
private boolean removeMultiSpaces = true;
// optional settings
/** The remove intertag spaces. */
private boolean removeIntertagSpaces;
/** The remove quotes. */
private boolean removeQuotes;
/** The compress java script. */
private boolean compressJavaScript;
/** The compress css. */
private boolean compressCss;
/** The simple doctype. */
private boolean simpleDoctype;
/** The remove script attributes. */
private boolean removeScriptAttributes;
/** The remove style attributes. */
private boolean removeStyleAttributes;
/** The remove link attributes. */
private boolean removeLinkAttributes;
/** The remove form attributes. */
private boolean removeFormAttributes;
/** The remove input attributes. */
private boolean removeInputAttributes;
/** The simple boolean attributes. */
private boolean simpleBooleanAttributes;
/** The remove java script protocol. */
private boolean removeJavaScriptProtocol;
/** The remove http protocol. */
private boolean removeHttpProtocol;
/** The remove https protocol. */
private boolean removeHttpsProtocol;
/** The preserve line breaks. */
private boolean preserveLineBreaks;
/** The remove surrounding spaces. */
private String removeSurroundingSpaces;
/** The preserve patterns. */
private List preservePatterns;
// statistics
/** The generate statistics. */
private boolean generateStatistics;
/** The statistics. */
private HtmlCompressorStatistics statistics;
// YUICompressor settings
/** The yui js no munge. */
private boolean yuiJsNoMunge;
/** The yui js preserve all semi colons. */
private boolean yuiJsPreserveAllSemiColons;
/** The yui js disable optimizations. */
private boolean yuiJsDisableOptimizations;
/** The yui js line break. */
private int yuiJsLineBreak = -1;
/** The yui css line break. */
private int yuiCssLineBreak = -1;
/** The yui error reporter. */
// error reporter implementation for YUI compressor
private ErrorReporter yuiErrorReporter;
/** The Constant tempCondCommentBlock. */
// temp replacements for preserved blocks
protected static final String TEMP_COND_COMMENT_BLOCK = "%%%~COMPRESS~COND~{0,number,#}~%%%";
/** The Constant tempPreBlock. */
protected static final String TEMP_PRE_BLOCK = "%%%~COMPRESS~PRE~{0,number,#}~%%%";
/** The Constant tempTextAreaBlock. */
protected static final String TEMP_TEXT_AREA_BLOCK = "%%%~COMPRESS~TEXTAREA~{0,number,#}~%%%";
/** The Constant tempScriptBlock. */
protected static final String TEMP_SCRIPT_BLOCK = "%%%~COMPRESS~SCRIPT~{0,number,#}~%%%";
/** The Constant tempStyleBlock. */
protected static final String TEMP_STYLE_BLOCK = "%%%~COMPRESS~STYLE~{0,number,#}~%%%";
/** The Constant tempEventBlock. */
protected static final String TEMP_EVENT_BLOCK = "%%%~COMPRESS~EVENT~{0,number,#}~%%%";
/** The Constant tempLineBreakBlock. */
protected static final String TEMP_LINE_BREAK_BLOCK = "%%%~COMPRESS~LT~{0,number,#}~%%%";
/** The Constant tempSkipBlock. */
protected static final String TEMP_SKIP_BLOCK = "%%%~COMPRESS~SKIP~{0,number,#}~%%%";
/** The Constant tempUserBlock. */
protected static final String TEMP_USER_BLOCK = "%%%~COMPRESS~USER{0,number,#}~{1,number,#}~%%%";
/** The Constant emptyPattern. */
// compiled regex patterns
protected static final Pattern emptyPattern = Pattern.compile("\\s");
/** The Constant skipPattern. */
protected static final Pattern skipPattern = Pattern.compile(
"(.*?)", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant condCommentPattern. */
protected static final Pattern condCommentPattern = Pattern
.compile("()(.*?)()", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant commentPattern. */
protected static final Pattern commentPattern = Pattern.compile("|",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant intertagPattern_TagTag. */
protected static final Pattern intertagPattern_TagTag = Pattern.compile(">\\s+<",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant intertagPattern_TagCustom. */
protected static final Pattern intertagPattern_TagCustom = Pattern.compile(">\\s+%%%~",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant intertagPattern_CustomTag. */
protected static final Pattern intertagPattern_CustomTag = Pattern.compile("~%%%\\s+<",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant intertagPattern_CustomCustom. */
protected static final Pattern intertagPattern_CustomCustom = Pattern.compile("~%%%\\s+%%%~",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant multispacePattern. */
protected static final Pattern multispacePattern = Pattern.compile("\\s+",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant tagEndSpacePattern. */
protected static final Pattern tagEndSpacePattern = Pattern.compile("(<(?:[^>]+?))(?:\\s+?)(/?>)",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant tagLastUnquotedValuePattern. */
protected static final Pattern tagLastUnquotedValuePattern = Pattern.compile("=\\s*[a-z0-9-_]+$",
Pattern.CASE_INSENSITIVE);
/** The Constant tagQuotePattern. */
protected static final Pattern tagQuotePattern = Pattern.compile("\\s*=\\s*([\"'])([a-z0-9-_]+?)\\1(/?)(?=[^<]*?>)",
Pattern.CASE_INSENSITIVE);
/** The Constant prePattern. */
protected static final Pattern prePattern = Pattern.compile("(]*?>)(.*?)(
)",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant taPattern. */
protected static final Pattern taPattern = Pattern.compile("()",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant scriptPattern. */
protected static final Pattern scriptPattern = Pattern.compile("()",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant stylePattern. */
protected static final Pattern stylePattern = Pattern.compile("()",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant tagPropertyPattern. */
protected static final Pattern tagPropertyPattern = Pattern.compile("(\\s\\w+)\\s*=\\s*(?=[^<]*?>)",
Pattern.CASE_INSENSITIVE);
/** The Constant cdataPattern. */
protected static final Pattern cdataPattern = Pattern.compile("\\s*\\s*",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant doctypePattern. */
protected static final Pattern doctypePattern = Pattern.compile("]*>",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant typeAttrPattern. */
protected static final Pattern typeAttrPattern = Pattern.compile("type\\s*=\\s*([\\\"']*)(.+?)\\1",
Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
/** The Constant jsTypeAttrPattern. */
protected static final Pattern jsTypeAttrPattern = Pattern.compile(
"(