org.apache.shindig.gadgets.parse.CompactHtmlSerializer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of shindig-gadgets Show documentation
Show all versions of shindig-gadgets Show documentation
Renders gadgets, provides the gadget metadata service, and serves
all javascript required by the OpenSocial specification.
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.shindig.gadgets.parse;
import com.google.common.collect.ImmutableSortedSet;
import org.w3c.dom.Node;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
/**
* Performs simple content compaction while writing HTML documents. The compaction includes:
*
* - Collapsing consecutive whitespaces while preserving those within style, pre and script tags
*
- Removing HTML comments while preserving IE conditional comments
*
*
* TODO - Consider adding attribute quoting elimination, empty attribute elimination where safe
* end-tag elmination where safe.
*/
public class CompactHtmlSerializer extends DefaultHtmlSerializer {
private static final ImmutableSortedSet SPECIAL_TAGS = ImmutableSortedSet
.orderedBy(String.CASE_INSENSITIVE_ORDER)
.add("style", "pre", "script", "textarea")
.build();
private static final String HTML_WHITESPACE = " \t\r\n";
@Override
protected void writeText(Node n, Appendable output) throws IOException {
if (isSpecialTag(n.getParentNode().getNodeName())) {
super.writeText(n, output);
} else {
collapseWhitespace(n.getTextContent(), output);
}
}
@Override
protected void writeComment(Node n, Appendable output) throws IOException {
if (isSpecialTag(n.getParentNode().getNodeName())) {
super.writeComment(n, output);
} else if (isIeConditionalComment(n)) {
super.writeComment(n, output);
}
}
/**
* See MSDN
* and PPK
*/
private boolean isIeConditionalComment(Node n) {
String comment = n.getTextContent();
return comment.contains("[if ") && comment.contains("[endif]");
}
/**
* Returns true if a tag with a given tagName should preserve any whitespaces
* in its children nodes.
*/
static boolean isSpecialTag(String tagName) {
return SPECIAL_TAGS.contains(tagName);
}
/**
* Collapse any consecutive HTML whitespace characters inside a string into
* one space character (0x20). This method will not output any characters when
* the given string is entirely composed of whitespaces.
*
* References:
*
* - http://www.w3.org/TR/html401/struct/text.html#h-9.1
* - http://java.sun.com/javase/6/docs/api/java/lang/Character.html#isWhitespace(char)
*
*/
static void collapseWhitespace(String str, Appendable output) throws IOException {
str = StringUtils.stripStart(str, HTML_WHITESPACE);
// Whitespaces between a sequence of non-whitespace characters
boolean seenWhitespace = false;
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
if (HTML_WHITESPACE.indexOf(c) != -1) {
seenWhitespace = true;
} else {
if (seenWhitespace) {
output.append(' ');
}
output.append(c);
seenWhitespace = false;
}
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy