All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.shindig.gadgets.parse.GadgetHtmlParser Maven / Gradle / Ivy

Go to download

Renders gadgets, provides the gadget metadata service, and serves all javascript required by the OpenSocial specification.

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package org.apache.shindig.gadgets.parse;

import com.google.common.collect.Lists;
import com.google.inject.ImplementedBy;
import com.google.inject.Inject;
import com.google.inject.Provider;

import org.apache.shindig.common.cache.Cache;
import org.apache.shindig.common.cache.CacheProvider;
import org.apache.shindig.common.util.HashUtil;
import org.apache.shindig.gadgets.GadgetException;
import org.apache.shindig.gadgets.parse.nekohtml.NekoSimplifiedHtmlParser;
import org.w3c.dom.Attr;
import org.w3c.dom.DOMException;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import java.util.LinkedList;

/**
 * Parser for arbitrary HTML content
 */
@ImplementedBy(NekoSimplifiedHtmlParser.class)
public abstract class GadgetHtmlParser {

  public static final String PARSED_DOCUMENTS = "parsedDocuments";
  public static final String PARSED_FRAGMENTS = "parsedFragments";

  private Cache documentCache;
  private Cache fragmentCache;
  private Provider serializerProvider = new DefaultSerializerProvider();
  protected final DOMImplementation documentFactory;

  protected GadgetHtmlParser(DOMImplementation documentFactory) {
    this.documentFactory = documentFactory;
  }

  @Inject
  public void setCacheProvider(CacheProvider cacheProvider) {
    documentCache = cacheProvider.createCache(PARSED_DOCUMENTS);
    fragmentCache = cacheProvider.createCache(PARSED_FRAGMENTS);
  }

  @Inject
  public void setSerializerProvider(Provider serProvider) {
    this.serializerProvider = serProvider;
  }

  /**
   * @param content
   * @return true if we detect a preamble of doctype or html
   */
  protected static boolean attemptFullDocParseFirst(String content) {
    String normalized = content.substring(0, Math.min(100, content.length())).toUpperCase();
    return normalized.contains(" beforeHead = Lists.newLinkedList();
      LinkedList beforeBody = Lists.newLinkedList();

      while (html.hasChildNodes()) {
        Node child = html.removeChild(html.getFirstChild());
        if (child.getNodeType() == Node.ELEMENT_NODE &&
            "head".equalsIgnoreCase(child.getNodeName())) {
          if (head == null) {
            head = child;
          } else {
            // Concatenate  elements together.
            transferChildren(head, child);
          }
        } else if (child.getNodeType() == Node.ELEMENT_NODE &&
                   "body".equalsIgnoreCase(child.getNodeName())) {
          if (body == null) {
            body = child;
          } else {
            // Concatenate  elements together.
            transferChildren(body, child);
          }
        } else if (head == null) {
          beforeHead.add(child);
        } else if (body == null) {
          beforeBody.add(child);
        } else {
          // Both  and  are present. Append to tail of .
          body.appendChild(child);
        }
      }

      // Ensure head tag exists
      if (head == null) {
        // beforeHead contains all elements that should be prepended to . Switch them.
        LinkedList temp = beforeBody;
        beforeBody = beforeHead;
        beforeHead = temp;

        // Add as first element
        head = document.createElement("head");
        html.insertBefore(head, html.getFirstChild());
      } else {
        // Re-append head node.
        html.appendChild(head);
      }

      // Ensure body tag exists.
      if (body == null) {
        // Add immediately after head.
        body = document.createElement("body");
        html.insertBefore(body, head.getNextSibling());
      } else {
        // Re-append body node.
        html.appendChild(body);
      }

      // Leftovers: nodes before the first  node found and the first  node found.
      // Prepend beforeHead to the front of , and beforeBody to beginning of ,
      // in the order they were found in the document.
      prependToNode(head, beforeHead);
      prependToNode(body, beforeBody);

      // One exception.