
org.apache.shindig.gadgets.parse.GadgetHtmlParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of shindig-gadgets Show documentation
Show all versions of shindig-gadgets Show documentation
Renders gadgets, provides the gadget metadata service, and serves
all javascript required by the OpenSocial specification.
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
package org.apache.shindig.gadgets.parse;
import com.google.common.collect.Lists;
import com.google.inject.ImplementedBy;
import com.google.inject.Inject;
import com.google.inject.Provider;
import org.apache.shindig.common.cache.Cache;
import org.apache.shindig.common.cache.CacheProvider;
import org.apache.shindig.common.util.HashUtil;
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.nekohtml.NekoSimplifiedHtmlParser;
import org.w3c.dom.Attr;
import org.w3c.dom.DOMException;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import java.util.LinkedList;
/**
* Parser for arbitrary HTML content
*/
@ImplementedBy(NekoSimplifiedHtmlParser.class)
public abstract class GadgetHtmlParser {
public static final String PARSED_DOCUMENTS = "parsedDocuments";
public static final String PARSED_FRAGMENTS = "parsedFragments";
private Cache documentCache;
private Cache fragmentCache;
private Provider serializerProvider = new DefaultSerializerProvider();
protected final DOMImplementation documentFactory;
protected GadgetHtmlParser(DOMImplementation documentFactory) {
this.documentFactory = documentFactory;
}
@Inject
public void setCacheProvider(CacheProvider cacheProvider) {
documentCache = cacheProvider.createCache(PARSED_DOCUMENTS);
fragmentCache = cacheProvider.createCache(PARSED_FRAGMENTS);
}
@Inject
public void setSerializerProvider(Provider serProvider) {
this.serializerProvider = serProvider;
}
/**
* @param content
* @return true if we detect a preamble of doctype or html
*/
protected static boolean attemptFullDocParseFirst(String content) {
String normalized = content.substring(0, Math.min(100, content.length())).toUpperCase();
return normalized.contains(" beforeHead = Lists.newLinkedList();
LinkedList beforeBody = Lists.newLinkedList();
while (html.hasChildNodes()) {
Node child = html.removeChild(html.getFirstChild());
if (child.getNodeType() == Node.ELEMENT_NODE &&
"head".equalsIgnoreCase(child.getNodeName())) {
if (head == null) {
head = child;
} else {
// Concatenate elements together.
transferChildren(head, child);
}
} else if (child.getNodeType() == Node.ELEMENT_NODE &&
"body".equalsIgnoreCase(child.getNodeName())) {
if (body == null) {
body = child;
} else {
// Concatenate elements together.
transferChildren(body, child);
}
} else if (head == null) {
beforeHead.add(child);
} else if (body == null) {
beforeBody.add(child);
} else {
// Both and are present. Append to tail of .
body.appendChild(child);
}
}
// Ensure head tag exists
if (head == null) {
// beforeHead contains all elements that should be prepended to . Switch them.
LinkedList temp = beforeBody;
beforeBody = beforeHead;
beforeHead = temp;
// Add as first element
head = document.createElement("head");
html.insertBefore(head, html.getFirstChild());
} else {
// Re-append head node.
html.appendChild(head);
}
// Ensure body tag exists.
if (body == null) {
// Add immediately after head.
body = document.createElement("body");
html.insertBefore(body, head.getNextSibling());
} else {
// Re-append body node.
html.appendChild(body);
}
// Leftovers: nodes before the first node found and the first node found.
// Prepend beforeHead to the front of , and beforeBody to beginning of ,
// in the order they were found in the document.
prependToNode(head, beforeHead);
prependToNode(body, beforeBody);
// One exception.