org.sonar.duplications.detector.suffixtree.Search Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of sonar-duplications Show documentation
Detect duplicated code
There is a newer version: 7.0
/*
 * SonarQube
 * Copyright (C) 2009-2016 SonarSource SA
 * mailto:contact AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
package org.sonar.duplications.detector.suffixtree;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;

public final class Search {

  private final SuffixTree tree;
  private final TextSet text;
  private final Collector reporter;

  private final List list = new ArrayList<>();
  private final List innerNodes = new ArrayList<>();
  
  private static final Comparator DEPTH_COMPARATOR = (o1, o2) -> o2.depth - o1.depth;

  private Search(SuffixTree tree, TextSet text, Collector reporter) {
    this.tree = tree;
    this.text = text;
    this.reporter = reporter;
  }
  
  public static void perform(TextSet text, Collector reporter) {
    new Search(SuffixTree.create(text), text, reporter).compute();
  }

  private void compute() {
    // O(N)
    dfs();

    // O(N * log(N))
    Collections.sort(innerNodes, DEPTH_COMPARATOR);

    // O(N)
    visitInnerNodes();
  }

  /**
   * Depth-first search (DFS).
   */
  private void dfs() {
    Deque stack = new LinkedList<>();
    stack.add(tree.getRootNode());
    while (!stack.isEmpty()) {
      Node node = stack.removeLast();
      node.startSize = list.size();
      if (node.getEdges().isEmpty()) {
        // leaf
        list.add(node.depth);
        node.endSize = list.size();
      } else {
        if (!node.equals(tree.getRootNode())) {
          // inner node = not leaf and not root
          innerNodes.add(node);
        }
        for (Edge edge : node.getEdges()) {
          Node endNode = edge.getEndNode();
          endNode.depth = node.depth + edge.getSpan() + 1;
          stack.addLast(endNode);
        }
      }
    }
    // At this point all inner nodes are ordered by the time of entering, so we visit them from last to first
    ListIterator iterator = innerNodes.listIterator(innerNodes.size());
    while (iterator.hasPrevious()) {
      Node node = iterator.previous();
      int max = -1;
      for (Edge edge : node.getEdges()) {
        max = Math.max(edge.getEndNode().endSize, max);
      }
      node.endSize = max;
    }
  }

  /**
   * Each inner-node represents prefix of some suffixes, thus substring of text.
   */
  private void visitInnerNodes() {
    for (Node node : innerNodes) {
      if (containsOrigin(node)) {
        report(node);
      }
    }
  }

  /**
   * TODO Godin: in fact computations here are the same as in {@link #report(Node)},
   * so maybe would be better to remove this duplication,
   * however it should be noted that this check can't be done in {@link Collector#endOfGroup()},
   * because it might lead to creation of unnecessary new objects
   */
  private boolean containsOrigin(Node node) {
    for (int i = node.startSize; i < node.endSize; i++) {
      int start = tree.text.length() - list.get(i);
      int end = start + node.depth;
      if (text.isInsideOrigin(end)) {
        return true;
      }
    }
    return false;
  }

  private void report(Node node) {
    reporter.startOfGroup(node.endSize - node.startSize, node.depth);
    for (int i = node.startSize; i < node.endSize; i++) {
      int start = tree.text.length() - list.get(i);
      int end = start + node.depth;
      reporter.part(start, end);
    }
    reporter.endOfGroup();
  }

  public abstract static class Collector {

    /**
     * Invoked at the beginning of processing for current node.
     * 
     * Length - is a depth of node. And nodes are visited in descending order of depth,
     * thus we guaranty that length will not increase between two sequential calls of this method
     * (can be equal or less than previous value).
     * 
     *
     * @param size number of parts in group
     * @param length length of each part in group
     */
    abstract void startOfGroup(int size, int length);

    /**
     * Invoked as many times as leaves in the subtree, where current node is root.
     *
     * @param start start position in generalised text
     * @param end end position in generalised text
     */
    abstract void part(int start, int end);

    /**
     * Invoked at the end of processing for current node.
     */
    abstract void endOfGroup();

  }

}