org.sonar.plugins.web.lex.PageLexer Maven / Gradle / Ivy

Go to download
/*
 * Sonar Web Plugin
 * Copyright (C) 2010 Matthijs Galesloot
 * [email protected]
 *
 * Licensed under the Apache License, Version 2.0 (the "License")
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.sonar.plugins.web.lex;

import org.sonar.channel.Channel;
import org.sonar.channel.ChannelDispatcher;
import org.sonar.channel.CodeReader;
import org.sonar.plugins.web.node.Node;
import org.sonar.plugins.web.node.NodeType;
import org.sonar.plugins.web.node.TagNode;

import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * Lexical analysis of a web page.
 *
 * @author Matthijs Galesloot
 */
@SuppressWarnings("unchecked")
public final class PageLexer {

  /**
   * The order of the tokenizers is significant, as they are processed in this order.
   *
   * TextTokenizer must be last, it will always consume the characters until the next token arrives.
   */
  @SuppressWarnings("rawtypes")
  private static List tokenizers = Arrays.asList(
      /* HTML Comments */
      new CommentTokenizer("", true),
      /* JSP Comments */
      new CommentTokenizer("<%--", "--%>", false),
      /* HTML Directive */
      new DoctypeTokenizer(""),
      /* XML Directives */
      new DirectiveTokenizer(""),
      /* JSP Directives */
      new DirectiveTokenizer("<%@", "%>"),
      /* JSP Expressions */
      new ExpressionTokenizer("<%", "%>"),
      /* XML and HTML Tags */
      new ElementTokenizer("<", ">"),
      /* Text (for everything else) */
      new TextTokenizer());

  /**
   * Parse a nested node.
   */
  @SuppressWarnings("rawtypes")
  public List nestedParse(CodeReader reader) {
    List nodeList = new ArrayList();
    for (AbstractTokenizer tokenizer : (List) tokenizers) {
      if (tokenizer.consume(reader, nodeList)) {
        break;
      }
    }
    return nodeList;
  }

  /**
   * Parse the input into a list of tokens, with parent/child relations between the tokens.
   */
  public List parse(Reader reader) {

    // CodeReader reads the file stream
    CodeReader codeReader = new CodeReader(reader);

    // ArrayList collects the nodes
    List nodeList = new ArrayList();

    // ChannelDispatcher manages the tokenizers
    ChannelDispatcher> channelDispatcher = ChannelDispatcher.builder().addChannels((Channel[]) tokenizers.toArray(new Channel[tokenizers.size()])).build();
    channelDispatcher.consume(codeReader, nodeList);

    createNodeHierarchy(nodeList);

    return nodeList;
  }

  /**
   * Scan the nodes and build the hierarchy of parent and child nodes.
   */
  private void createNodeHierarchy(List nodeList) {
    TagNode current = null;
    for (Node node : nodeList) {
      if (node.getNodeType() == NodeType.Tag) {
        TagNode element = (TagNode) node;

        // start element
        if (!element.isEndElement()) {
          element.setParent(current);
          current = element;
        }

        // end element
        if ((element.isEndElement() || element.hasEnd()) && current != null) {
          current = current.getParent();
        }
      }
    }
  }
}