All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucr.cs.bdlab.beast.indexing.RTreeGuttman Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2018 University of California, Riverside
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.ucr.cs.bdlab.beast.indexing;

import edu.ucr.cs.bdlab.beast.cg.SpatialJoinAlgorithms;
import edu.ucr.cs.bdlab.beast.geolite.EnvelopeNDLite;
import edu.ucr.cs.bdlab.beast.geolite.GeometryHelper;
import edu.ucr.cs.bdlab.beast.geolite.IFeature;
import edu.ucr.cs.bdlab.beast.util.BitArray;
import edu.ucr.cs.bdlab.beast.util.IntArray;
import edu.ucr.cs.bdlab.beast.util.LongArray;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.locationtech.jts.geom.Geometry;

import java.io.Closeable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Stack;
import java.util.function.BiConsumer;

import static edu.ucr.cs.bdlab.beast.cg.SpatialJoinAlgorithms.getMBRs;
import static edu.ucr.cs.bdlab.beast.cg.SpatialJoinAlgorithms.getMBRs2;

/**
 * A partial implementation for the original Antonin Guttman R-tree as described
 * in the following paper.
 * Antonin Guttman: R-Trees: A Dynamic Index Structure for Spatial Searching.
 * SIGMOD Conference 1984: 47-57
 *
 * It only contains the implementation of the parts needed for the indexing
 * methods. For example, the delete operation was not implemented as it is
 * not needed. Also, this index is designed mainly to be used to index a sample
 * in memory and use it for the partitioning. So, the disk-based mapping and
 * search were not implemented for simplicity.
 */
public class RTreeGuttman implements Closeable {
  private static final Log LOG = LogFactory.getLog(RTreeGuttman.class);

  /** Maximum capacity of a node (M) */
  protected final int maxCapacity;

  /** Minimum capacity of a node (m) */
  protected final int minCapacity;

  /**
   * The minimum coordinates of all the objects (nodes and entries). First index is for the dimensions and second
   * dimension is for the object number.
   */
  protected double[][] minCoord;

  /**
   * The maximum coordinates of all the objects (nodes and entries). First index is for the dimensions and second
   * dimension is for the object number.
   */
  protected double[][] maxCoord;

  /** Global minimum across all dimensions. Useful when the minimum is -Infinity */
  protected double[] globalMin;

  /** Global maximum across all dimensions. Useful when the maximum is Infinity */
  protected double[] globalMax;

  /**A bit vector that stores which nodes are leaves*/
  protected BitArray isLeaf;

  /**A list of int[] that stores the children of each node*/
  protected List children;

  /**Total number of data entries*/
  protected int numEntries;

  /**Total number of nodes*/
  protected int numNodes;

  /**The index of the root in the list of nodes*/
  protected int root;

  /**Only when processing an on-disk tree. Stores the offset of each data entry in the file*/
  protected int[] entryOffsets;

  /**A deserializer that reads objects stored on disk*/
  private Deserializer deser;

  /**The input stream that points to the underlying file*/
  private FSDataInputStream seekableIn;

  /**When reading the tree from disk. The offset of the beginning of the tree*/
  private long treeStartOffset;

  /**The total size of the data chunk*/
  private int totalDataSize;

  /**A random number generator for choosing random nodes when cannot decide*/
  protected Random random = new Random();

  /**
   * Retrieves the number of dimensions of the tree
   * @return the number of dimensions for data entries.
   */
  public int getNumDimensions() {
    return minCoord.length;
  }

  /**
   * Retrieves the maximum number of objects that can be currently stored in the tree without expansion
   * @return the maximum number of objects that the tree can hold (nodes + data entries).
   */
  protected int getCurrentCapacity() {
    return minCoord[0].length;
  }

  /**
   * Make a room in the data structures to accommodate a new object whether it is a node or a data entry.
   */
  protected void makeRoomForOneMoreObject() {
    int currentSize = numEntries + numNodes;
    if (getCurrentCapacity() <= currentSize) {
      // Expand the coordinate arrays in big chunks to avoid memory copy
      int numDimensions = getNumDimensions();
      int newCapacity = getCurrentCapacity() * 2;
      for (int d = 0; d < numDimensions; d++) {
        double[] newCoords = new double[newCapacity];
        System.arraycopy(minCoord[d], 0, newCoords, 0, currentSize);
        minCoord[d] = newCoords;

        newCoords = new double[newCapacity];
        System.arraycopy(maxCoord[d], 0, newCoords, 0, currentSize);
        maxCoord[d] = newCoords;
      }
      this.isLeaf.resize(newCapacity);
    }
  }

  /**
   * Creates a new node that contains the given object and returns the ID of that node.
   * @param leaf set to true to create a leaf node
   * @param iChildren the indexes of all children in this node
   * @return the id of the new node created.
   */
  protected int Node_createNodeWithChildren(boolean leaf, int ... iChildren) {
    makeRoomForOneMoreObject();
    int iNewNode = numEntries + numNodes;
    this.isLeaf.set(iNewNode, leaf);
    this.children.add(iNewNode, new IntArray());
    this.numNodes++;
    Node_reset(iNewNode, iChildren);
    return iNewNode;
  }

  /**
   * Reset a node to contain a new set of children wiping away the current children.
   * @param iNode the index of the node to reset
   * @param newChildren the new list of child node ids.
   */
  protected void Node_reset(int iNode, int ... newChildren) {
    children.get(iNode).clear();
    children.get(iNode).append(newChildren, 0, newChildren.length);
    Node_recalculateMBR(iNode);
  }

  protected void Node_recalculateMBR(int iNode) {
    for (int d = 0; d < getNumDimensions(); d++) {
      minCoord[d][iNode] = Double.POSITIVE_INFINITY;
      maxCoord[d][iNode] = Double.NEGATIVE_INFINITY;
    }
    for (int iChild : children.get(iNode)) {
      for (int d = 0; d < getNumDimensions(); d++) {
        minCoord[d][iNode] = Math.min(minCoord[d][iNode], minCoord[d][iChild]);
        maxCoord[d][iNode] = Math.max(maxCoord[d][iNode], maxCoord[d][iChild]);
      }
    }
  }

  /**
   * Returns the number of children for the given node.
   * @param iNode the index of the node
   * @return the size of the node in terms of number of children.
   */
  protected int Node_size(int iNode) {
    return children.get(iNode).size();
  }

  /**
   * Calculates the volume of the node
   * @param iNode the ID of the node
   * @return the volume of the given node (e.g., area for two dimensions).
   */
  protected double Node_volume(int iNode) {
    double vol = 1.0;
    for (int d = 0; d < getNumDimensions(); d++)
      vol *= Math.min(globalMax[d], maxCoord[d][iNode]) - Math.max(globalMin[d], minCoord[d][iNode]);
    return vol;
  }

  /**
   * Calculates the volume (area) expansion that will happen if the given object is added to a given node.
   * @param iNode the ID of the node that would be expanded
   * @param iNewChild the ID of the object that would be added to the node
   * @return the expansion of the volume of the given child is added to the given node.
   */
  protected double Node_volumeExpansion(int iNode, int iNewChild) {
    double volBefore = 1.0, volAfter = 1.0;
    for (int d = 0; d < getNumDimensions(); d++) {
      volBefore *= Math.min(maxCoord[d][iNode], globalMax[d]) - Math.max(minCoord[d][iNode], globalMin[d]);
      volAfter *= Math.min(globalMax[d], Math.max(maxCoord[d][iNode], maxCoord[d][iNewChild])) -
          Math.max(globalMin[d], Math.min(minCoord[d][iNode], minCoord[d][iNewChild]));
    }
    return volAfter - volBefore;
  }

  /**
   * Adds a new child to an existing node.
   * @param iNode the index of the parent node
   * @param iNewChild the index of the child node
   */
  protected void Node_addChild(int iNode, int iNewChild) {
    this.children.get(iNode).add(iNewChild);
  }

  /**
   * Expand the MBR of the given node to enclose the given new object
   * @param node the node number
   * @param newObject the index of the object to expand to
   */
  protected void Node_expand(int node, int newObject) {
    // Expand the MBR to enclose the new child
    for (int d = 0; d < getNumDimensions(); d++) {
      minCoord[d][node] = Math.min(minCoord[d][node], minCoord[d][newObject]);
      maxCoord[d][node] = Math.max(maxCoord[d][node], maxCoord[d][newObject]);
    }
  }

  /**
   * Split an existing node around the given separator. Current children from
   * indexes 1 to separator-1 (inclusive) remain in the given node. All remaining
   * children go to a new node. The ID of the new node created that contain the
   * children from separator to end.
   * @param iNode the index of the node to split
   * @param separator the index of the first child to be in the new node
   * @return the ID of the new node created after split
   */
  protected int Node_split(int iNode, int separator) {
    // Create the new node that will hold the entries from separator -> size
    makeRoomForOneMoreObject();
    int iNewNode = numNodes + numEntries;
    this.numNodes++;
    // Make room for the children of the new node
    this.children.add(iNewNode, new IntArray());
    // The new node in the same level so it follow the leaf/non-leaf status of the current node
    isLeaf.set(iNewNode, isLeaf.get(iNode));

    // Split the children around the separator
    children.get(iNewNode).append(children.get(iNode), separator,
        children.get(iNode).size() - separator);
    children.get(iNode).resize(separator);

    // Recalculate the MBRs of the two nodes
    Node_recalculateMBR(iNode);
    Node_recalculateMBR(iNewNode);

    return iNewNode;
  }

  /**
   * Initialize the current R-tree from given data entries
   * @param x1 array of lower coordinates on the x-dimension
   * @param y1 array of lower coordinates on the y-dimension
   * @param x2 array of upper coordinates on the x-dimension
   * @param y2 array of upper coordinates on the y-dimension
   */
  public void initializeFromRects(double[] x1, double[] y1, double[] x2, double[] y2) {
    this.initializeDataEntries(x1, y1, x2, y2);
    this.insertAllDataEntries();
  }

  public void initializeFromBoxes(double[][] min, double[][] max) {
    this.initializeDataEntries(min, max);
    this.insertAllDataEntries();
  }

  /**
   * Initialize the tree from a set of points
   * @param points array of points. First index represents the dimension and second index represents the point number.
   */
  public void initializeFromPoints(double[][] points) {
    this.initializeDataEntries(points, points);
    this.insertAllDataEntries();
  }

  /**
   * Construct a new empty R-tree with the given parameters.
   * @param minCapacity - Minimum capacity of a node
   * @param maxCapcity - Maximum capacity of a node
   */
  public RTreeGuttman(int minCapacity, int maxCapcity) {
    if (minCapacity > maxCapcity / 2)
      throw new RuntimeException(String.format("Invalid minCapacity=%d and maxCapacity=%d. minCapacity should be at most maxCapacity/2", minCapacity, maxCapcity));
    if (minCapacity == 0)
      throw new RuntimeException("minCapacity cannot be equal to zero");
    this.minCapacity = minCapacity;
    this.maxCapacity = maxCapcity;
  }

  protected void insertAllDataEntries() {
    root = Node_createNodeWithChildren(true, 0);
    // Insert one by one
    for (int i = 1; i < numEntries; i++)
      insertAnExistingDataEntry(i);
  }

  /**
   * Initialize the data entries to a set of point coordinates without actually inserting them into the tree structure.
   * @param coords array of point coordinates for the entries. First index represents the dimension and second index
   *               represents the point.
   */
  protected void initializeDataEntries(double[][] coords) {
    initializeDataEntries(coords, coords);
  }

  protected void initializeDataEntries(double[] xs, double[] ys) {
    initializeDataEntries(new double[][] {xs, ys});
  }

  /**
   * Initialize data entries from a list of minimum bounding boxes MBBs.
   * @param minCoords the minimum coordinates of the MBBs where the first index is the dimension and the second index
   *                  is the object number
   * @param maxCoords the maximum coordinates of the MBBs where the first index is the dimension and the second index
   *                  is the object number
   */
  protected void initializeDataEntries(double[][] minCoords, double[][] maxCoords) {
    this.numEntries = minCoords[0].length;
    this.numNodes = 0; // Initially, no nodes are there
    this.isLeaf = new BitArray(numEntries);
    children = new ArrayList<>(numEntries);
    this.minCoord = new double[minCoords.length][numEntries];
    this.maxCoord = new double[maxCoords.length][numEntries];
    for (int d = 0; d < getNumDimensions(); d++) {
      System.arraycopy(minCoords[d], 0, this.minCoord[d], 0, numEntries);
      System.arraycopy(maxCoords[d], 0, this.maxCoord[d], 0, numEntries);
      // Handle empty points (with NaN coordinates) to an empty box with inverted infinite coordinates
      for (int $i = 0; $i < numEntries; $i++) {
        if (Double.isNaN(minCoords[d][$i]) || Double.isNaN(maxCoords[d][$i])) {
          this.minCoord[d][$i] = Double.POSITIVE_INFINITY;
          this.maxCoord[d][$i] = Double.NEGATIVE_INFINITY;
        }
      }
    }
    // Compute finite global minimum and maximum. Useful when some infinite coordinates are there
    globalMin = new double[getNumDimensions()];
    globalMax = new double[getNumDimensions()];
    for (int d = 0; d < getNumDimensions(); d++) {
      globalMin[d] = Double.POSITIVE_INFINITY;
      for (double x : minCoord[d]) {
        if (Double.isFinite(x))
          globalMin[d] = Math.min(globalMin[d], x);
      }
      globalMax[d] = Double.NEGATIVE_INFINITY;
      for (double x : maxCoord[d]) {
        if (Double.isFinite(x))
          globalMax[d] = Math.max(globalMax[d], x);
      }
      // Expand it even more to ensure it includes all potential values
      double diff = globalMax[d] - globalMin[d];
      globalMin[d] -= diff;
      globalMax[d] += diff;
    }
    for (int i = 0; i < numEntries; i++)
      children.add(null);
  }

  /**
   * Initialize the data entries to a set of rectangular coordinates without
   * actually inserting them into the tree structure.
   * @param x1 array of lower x-coordinates of rectangles
   * @param y1 array of lower y-coordinates of rectangles
   * @param x2 array of upper x-coordinates of rectangles
   * @param y2 array of upper y-coordinates of rectangles
   */
  protected void initializeDataEntries(double[] x1, double[] y1, double[] x2, double[] y2) {
    initializeDataEntries(new double[][]{x1, y1}, new double[][]{x2, y2});
  }


  /**
   * Inserts the given data entry into the tree. We assume that the coordinates
   * of this data entry are already stored in the coordinates arrays.
   * @param iEntry - The index of the point in the array of points
   */
  protected void insertAnExistingDataEntry(final int iEntry) {
    // The path from the root to the newly inserted record. Used for splitting.
    IntArray path = new IntArray();
    int iCurrentVisitedNode = root;
    path.add(iCurrentVisitedNode);
    // Descend in the tree until we find a leaf node to add the object to
    while (!isLeaf.get(iCurrentVisitedNode)) {
      // Node is not leaf. Choose a child node
      // Descend to the best child found
      int iBestChild = chooseSubtree(iEntry, iCurrentVisitedNode);
      iCurrentVisitedNode = iBestChild;
      path.add(iCurrentVisitedNode);
    }

    // Now we have a child node. Insert the current element to it and split
    // if necessary
    Node_addChild(iCurrentVisitedNode, iEntry);
    adjustTree(iCurrentVisitedNode, path);
  }

  /**
   * Choose the best subtree to add a data entry to.
   * According to the original R-tree paper, this function chooses the node with
   * the minimum volume expansion, then the one with the smallest volume,
   * then the one with the least number of records, then randomly to any one.
   * @param iEntry the index of the entry to choose a subtree for
   * @param iNode the index of the node to choose from its children
   * @return the index of the child of the given node that the entry would be added to.
   */
  protected int chooseSubtree(int iEntry, int iNode) {
    if (minCoord[0][iEntry] > maxCoord[0][iEntry]) {
      return children.get(iNode).get(random.nextInt(Node_size(iNode)));
    }
    // 1. Choose the child with the minimum expansion
    double minExpansion = Double.POSITIVE_INFINITY;
    int iBestChild = -1;
    for (int iCandidateChild : children.get(iNode)) {
      double expansion = Node_volumeExpansion(iCandidateChild, iEntry);
      if (expansion < minExpansion) {
        minExpansion = expansion;
        iBestChild = iCandidateChild;
      } else if (expansion == minExpansion) {
        // Resolve ties by choosing the entry with the rectangle of smallest area
        if (Node_volume(iCandidateChild) < Node_volume(iBestChild))
          iBestChild = iCandidateChild;
      }
    }
    assert iBestChild != -1;
    return iBestChild;
  }

  /**
   * Adjust the tree after an insertion by making the necessary splits up to the root.
   * @param leafNode the index of the leaf node where the insertion happened
   * @param path the path that lead to the leafNode from the root. The last element is {@code leafNode}
   */
  protected void adjustTree(int leafNode, IntArray path) {
    int newNode = -1;
    if (Node_size(leafNode) > maxCapacity) {
      // Node full. Split into two
      newNode = split(leafNode, minCapacity);
    }
    // AdjustTree. Ascend from the leaf node L
    for (int $i = path.size() - 1; $i >= 0; $i--) {
      int iNode = path.get($i);
      // Adjust covering rectangle in the node
      Node_expand(iNode, ($i == path.size() - 1) ? children.get(iNode).peek() : path.get($i+1));
      if ($i == 0) {
        // The node is the root (no parent)
        if (newNode != -1) {
          // If the root is split, create a new root
          root = Node_createNodeWithChildren(false, iNode, newNode);
        }
        // If N is the root with no partner NN, stop.
      } else {
        int parent = path.get($i - 1);
        if (newNode != -1) {
          // If N has a partner NN resulting from an earlier split,
          // create a new entry ENN and add to the parent if there is room.
          // Add Enn to P if there is room
          Node_addChild(parent, newNode);
          Node_expand(parent, newNode);
          newNode = -1;
          if (Node_size(parent) >= maxCapacity)
            newNode = split(parent, minCapacity);
        }
      }
    }
  }


  /**
   * Linear splitting algorithm as described on Page 52 of the paper.
   * This function should update the MBR of the given node and the newly created node. This function does not
   * necessarily have to update the MBR of the parent node or add the new node to the parent.
   * @param iNode the index of the node to split
   * @param minSplitSize the minimum split size
   * @return the ID of the newly created node after split
   */
  protected int split(int iNode, int minSplitSize) {
    IntArray nodeChildren = children.get(iNode);
    int[] highestLowSide = new int[getNumDimensions()];
    int[] lowestHighSide = new int[getNumDimensions()];
    for (int d = 0; d < getNumDimensions(); d++)
      highestLowSide[d] = lowestHighSide[d] = nodeChildren.get(0);
    for (int iChild = 1; iChild < nodeChildren.size(); iChild++) {
      int child = nodeChildren.get(iChild);
      for (int d = 0; d < getNumDimensions(); d++) {
        if (minCoord[d][child] > minCoord[d][highestLowSide[d]])
          highestLowSide[d] = child;
        if (maxCoord[d][child] < maxCoord[d][lowestHighSide[d]])
          lowestHighSide[d] = child;
      }
    }
    double[] separations = new double[getNumDimensions()];
    int maxSeparationD = 0;
    for (int d = 0; d < getNumDimensions(); d++) {
      separations[d] = (Math.max(globalMin[d], minCoord[d][highestLowSide[d]]) - Math.min(globalMax[d], maxCoord[d][lowestHighSide[d]])) /
          (Math.min(globalMax[d], maxCoord[d][root]) - Math.max(globalMin[d], minCoord[d][root]));
      if (separations[d] > separations[maxSeparationD])
        maxSeparationD = d;
    }
    // The seed points for the two splits resulting from the split
    int seed1 = highestLowSide[maxSeparationD];
    int seed2 = lowestHighSide[maxSeparationD];

    // After picking the seeds, we will start picking next elements one-by-one
    IntArray nonAssignedNodes = nodeChildren.clone();
    Node_reset(iNode, seed1);
    int iNewNode = Node_createNodeWithChildren(isLeaf.get(iNode), seed2);
    nonAssignedNodes.remove(seed1);
    nonAssignedNodes.remove(seed2);
    int group1 = iNode;
    int group2 = iNewNode;
    for (int child : nonAssignedNodes) {
      // If one group has so few entries that all the rest must be assigned to it
      // in order to have the minimum number minSplitSize, assign them and stop
      if (nonAssignedNodes.size() + Node_size(group1) <= minSplitSize) {
        // Assign all the rest to group1
        for (int iObject : nonAssignedNodes) {
          Node_addChild(group1, iObject);
          Node_expand(group1, iObject);
        }
        break;
      } else if (nonAssignedNodes.size() + Node_size(group2) <= minSplitSize) {
        // Assign all the rest to group2
        for (int iObject : nonAssignedNodes) {
          Node_addChild(group2, iObject);
          Node_expand(group2, iObject);
        }
        break;
      } else {
        double d1 = Node_volumeExpansion(group1, child);
        double d2 = Node_volumeExpansion(group2, child);
        if (d1 == d2) {
          // Resolve ties by adding the entry to theh gorup with samller area
          d1 = Node_volume(group1);
          d2 = Node_volume(group2);
          if (d1 == d2) {
            // then to the one wih fewer entries
            d1 = Node_size(group1);
            d2 = Node_size(group2);
            if (d1 == d2) {
              // ... then to either
              d1 = 0.5;
              d2 = Math.random();
            }
          }
        }
        if (d1 < d2) {
          Node_addChild(group1, child);
          Node_expand(group1, child);
        } else if (d1 > d2) {
          Node_addChild(group2, child);
          Node_expand(group2, child);
        }
      }
    }
    return  iNewNode;
  }

  /**
   * Search for all the entries that overlap a given query rectangle
   * @param min the coordinate of the minimum corner
   * @param max the coordinate of the maximum corner
   * @param results the results as a list of entry IDs as given in the construction function
   */
  public void search(double[] min, double[] max, IntArray results) {
    results.clear();
    IntArray nodesToSearch = new IntArray();
    nodesToSearch.add(root);
    while (!nodesToSearch.isEmpty()) {
      int nodeToSearch = nodesToSearch.pop();
      if (isLeaf.get(nodeToSearch)) {
        // Search and return all the entries in the leaf node
        for (int iEntry : children.get(nodeToSearch)) {
          if (Object_overlaps(iEntry, min, max))
            results.add(iEntry);
        }
      } else {
        // A non-leaf node, expand the search to all overlapping children
        for (int iChild : children.get(nodeToSearch)) {
          if (Object_overlaps(iChild, min, max))
            nodesToSearch.add(iChild);
        }
      }
    }
  }

  public Iterable search(EnvelopeNDLite e) {
    return new SearchIterator(e);
  }

  public Iterable search(double x1, double y1, double x2, double y2) {
    return search(new EnvelopeNDLite(2, x1, y1, x2, y2));
  }

  /**
   * Tests if an object (entry or node) overlaps with a rectangle
   * @param iEntry the inex of the entry
   * @param min the coordinates of the minimum corner of the search box
   * @param max the coordinates of the maximum corner of the search box
   * @return {@code true} if the entry overlaps the given rectangle
   */
  protected boolean Object_overlaps(int iEntry, double[] min, double[] max) {
    for (int d = 0; d < getNumDimensions(); d++) {
      // The first condition addresses the case of an envelope that represents a point that coincides with the
      // lower edge of another envelope
      if (min[d] != minCoord[d][iEntry] &&
          (min[d] >= maxCoord[d][iEntry] || minCoord[d][iEntry] >= max[d]))
        return false;
    }
    return true;
  }

  protected static boolean Object_overlaps(RTreeGuttman rtree1, int iEntry1, RTreeGuttman rtree2, int iEntry2) {
    assert rtree1.getNumDimensions() == rtree2.getNumDimensions() : "Incompatible dimensions";
    for (int d = 0; d < rtree1.getNumDimensions(); d++)
      if (rtree1.minCoord[d][iEntry1] >= rtree2.maxCoord[d][iEntry2] ||
          rtree2.minCoord[d][iEntry2] >= rtree1.maxCoord[d][iEntry1])
        return false;
    return true;
  }

  /**
   * Total number of objects in the tree.
   * @return the number of data entries in the tree
   */
  public int numOfDataEntries() {
    return numEntries;
  }

  /**
   * Returns number of nodes in the tree.
   * @return the number of nodes in the tree
   */
  public int numOfNodes() {
    return numNodes;
  }

  /**
   * Computes the height of the tree which is defined as the number of edges
   * on the path from the root to the deepest node. Sine the R-tree is perfectly
   * balanced, it is enough to measure the length of the path from the root to
   * any node, e.g., the left-most node.
   * @return the height of the tree (number of levels - 1)
   */
  public int getHeight() {
    if (numNodes == 0)
      return 0;
    // Compute the height of the tree by traversing any path from the root
    // to the leaf.
    // Since the tree is balanced, any path would work
    int height = 0;
    int iNode = root;
    while (!isLeaf.get(iNode)) {
      height++;
      iNode = children.get(iNode).get(0);
    }
    return height;
  }

  /**
   * The total number of leaf nodes.
   * @return the total number of leaf nodes
   */
  public int getNumLeaves() {
    return (int) isLeaf.countOnes();
  }

  /**
   * Retrieve all the leaf nodes in the tree.
   * @return an Iterable over all leaf nodes
   */
  public Iterable getAllLeaves() {
    return new LeafNodeIterable();
  }

  /**
   * Creates an R-tree that contains only nodes (no data entries). The given
   * coordinates are used for the leaf nodes. This tree is used to model an R-tree
   * and use the different R-tree algorithms to test where an entry would end up
   * in the R-tree (without really inserting it).
   * @param x1 the lower x-coordinates of nodes
   * @param y1 the lower y-coordinates of nodes
   * @param x2 the upper x-coordinates of nodes
   * @param y2 the upper y-coordinates of nodes
   * @see #noInsert(double[], double[])
   */
  protected void initializeHollowRTree(double[] x1, double[] y1, double[] x2, double[] y2) {
    // Create a regular R-tree with the given rectangles as data entries.
    initializeFromRects(x1, y1, x2, y2);

    // Make sure we have a room for an extra object which will be used in noInsert
    makeRoomForOneMoreObject();
  }

  /**
   * Simulates an insertion of a record and returns the ID of the object that either
   * contains the given boundaries or will be its sibling.
   * @param min the minimum corner of the box to insert
   * @param max the maximum corner of the box to insert
   * @return the ID of the node that the object would be inserted to
   */
  protected int noInsert(double[] min, double[] max) {
    int i = numEntries + numNodes;
    for (int d = 0; d < getNumDimensions(); d++) {
      minCoord[d][i] = min[d];
      maxCoord[d][i] = max[d];
    }

    // Descend from the root until reaching a data entry
    // The range of IDs for data entries is [0, numEntries[
    // All node IDs is in the rnage [numEntries, numEntries + numNodes[
    int p = root;
    while (p >= numOfDataEntries())
      p = chooseSubtree(i, p);

    // Return the index of the leaf node without really inserting the element
    return p;
  }

  /**
   * Only when the tree is read from file, return the total size of the data part
   * in bytes.
   * @return the total size of data in the file
   */
  public int getTotalDataSize() {
    return totalDataSize;
  }

  public static int spatialJoin(RTreeGuttman rtree1, RTreeGuttman rtree2, BiConsumer result) {
    int numResults = 0;
    assert rtree1.getNumDimensions() == rtree2.getNumDimensions() : "Incompatible dimensions";
    // If the roots are disjoint, end right away
    if (!Object_overlaps(rtree1, rtree1.root, rtree2, rtree2.root))
      return numResults;
    LongArray pairsToTest = new LongArray();
    pairsToTest.add(((long) rtree1.root << 32) | rtree2.root);
    while (!pairsToTest.isEmpty()) {
      long pairToTest = pairsToTest.pop();
      int node1 = (int) (pairToTest >>> 32L);
      int node2 = (int) (pairToTest & 0xffffffff);
      if (rtree1.isLeaf.get(node1) && rtree2.isLeaf.get(node2)) {
        // Both are leaves, test all entries
        for (int child1 : rtree1.children.get(node1)) {
          for (int child2 : rtree2.children.get(node2)) {
            if (Object_overlaps(rtree1, child1, rtree2, child2)) {
              if (result != null)
                result.accept(child1, child2);
              numResults++;
            }
          }
        }
      } else if (rtree1.isLeaf.get(node1)) {
        // First node is leaf but second node is not leaf. Test first node with all children of second node
        for (int child2 : rtree2.children.get(node2)) {
          if (Object_overlaps(rtree1, node1, rtree2, child2))
            pairsToTest.add(((long) node1 << 32) | child2);
        }
      } else if (rtree2.isLeaf.get(node2)) {
        // First node is non-leaf but second node is leaf
        for (int child1 : rtree1.children.get(node1)) {
          if (Object_overlaps(rtree1, child1, rtree2, node2))
            pairsToTest.add(((long) child1 << 32) | node2);
        }
      } else {
        // Both are non-leaves. Cross product all children
        for (int child1 : rtree1.children.get(node1)) {
          for (int child2 : rtree2.children.get(node2)) {
            if (Object_overlaps(rtree1, child1, rtree2, child2)) {
              pairsToTest.add(((long) child1 << 32) | child2);
            }
          }
        }
      }
    }
    return numResults;
  }

  /**
   * A class used to iterate over the data entries in the R-tree
   */
  public class Entry {
    /**The ID of the entry*/
    public int id;
    /**The minimum corner of the box of the entry*/
    public double[] min;
    /**The maximum corner of the box of the entry*/
    public double[] max;

    protected Entry() {
      min = new double[getNumDimensions()];
      max = new double[getNumDimensions()];
    }

    @Override
    public String toString() {
      return String.format("Entry #%d (%f, %f, %f, %f)", id, min[0], min[1], max[0], max[1]);
    }

    public Object getObject() throws IOException {
      if (deser == null)
        return null;
      smartSeek(seekableIn, treeStartOffset + entryOffsets[id]);
      return deser.deserialize(seekableIn/*, entryOffsets[id+1] - entryOffsets[id]*/);
    }
  }

  /**
   * An iterable and iterator that traverses all data entries in the tree.
   */
  protected class EntryIterator implements Iterable, Iterator {
    private int iNextEntry = 0;
    private final Entry entry = new Entry();

    protected EntryIterator() {}

    @Override
    public Iterator iterator() {
      return this;
    }

    @Override
    public boolean hasNext() {
      return iNextEntry < RTreeGuttman.this.numEntries;
    }

    @Override
    public Entry next() {
      entry.id = iNextEntry;
      for (int d = 0; d < getNumDimensions(); d++) {
        entry.min[d] = minCoord[d][iNextEntry];
        entry.max[d] = maxCoord[d][iNextEntry];
      }
      iNextEntry++;
      return entry;
    }

    public void remove() {
      throw new RuntimeException("Not supported");
    }
  }

  /**
   * Returns an iterable on all data entries in the tree.
   * @return an iterator to all entries in the file
   */
  public Iterable entrySet() {
    return new EntryIterator();
  }

  /**
   * An iterator for range query search results
   */
  protected class SearchIterator implements Iterable, Iterator {
    /**The list of nodes yet to be searched*/
    private IntArray nodesToSearch;

    /**The ID of the entry to return on the next call*/
    private int iNextEntry;

    /**The object used to return all search results*/
    private Entry entry;

    /**The minimum corner of the search range*/
    private double[] min;

    /**The maximum corner of the search range*/
    private double[] max;

    protected SearchIterator(EnvelopeNDLite e) {
      this.min = new double[e.getCoordinateDimension()];
      this.max = new double[e.getCoordinateDimension()];
      for (int $d = 0; $d < e.getCoordinateDimension(); $d++) {
        this.min[$d] = e.getMinCoord($d);
        this.max[$d] = e.getMaxCoord($d);
      }
      searchFirst();
    }

    /**
     * Search for the first element in the result
     */
    protected void searchFirst() {
      nodesToSearch = new IntArray();
      nodesToSearch.add(root);
      entry = new Entry();
      while (!nodesToSearch.isEmpty()) {
        // We keep the top of the stack for the subsequent next calls
        int iNodeToSearch = nodesToSearch.peek();
        if (isLeaf.get(iNodeToSearch)) {
          for (iNextEntry = 0; iNextEntry < Node_size(iNodeToSearch); iNextEntry++) {
            // Found a matching element in a leaf node
            if (Object_overlaps(children.get(iNodeToSearch).get(iNextEntry), min, max))
              return;
          }
          // Node did not match any records, no longer needed
          nodesToSearch.pop();
        } else {
          // Found a matching non-leaf node, visit its children
          nodesToSearch.pop(); // No longer needed
          for (int iChild : children.get(iNodeToSearch)) {
            if (Object_overlaps(iChild, min, max))
              nodesToSearch.add(iChild);
          }
        }
      }
      iNextEntry = -1;
    }

    protected void prefetchNext() {
      int iNodeToSearch = nodesToSearch.peek();
      while (++iNextEntry < Node_size(iNodeToSearch)) {
        if (Object_overlaps(children.get(iNodeToSearch).get(iNextEntry), min, max))
          return;
      }
      // Done with the current leaf node. Continue searching for the next leaf
      nodesToSearch.pop();
      while (!nodesToSearch.isEmpty()) {
        iNodeToSearch = nodesToSearch.peek();
        if (isLeaf.get(iNodeToSearch)) {
          for (iNextEntry = 0; iNextEntry < Node_size(iNodeToSearch); iNextEntry++) {
            // Found a matching element in a leaf node
            if (Object_overlaps(children.get(iNodeToSearch).get(iNextEntry), min, max))
              return;
          }
          // Node did not match any records, no longer needed
          nodesToSearch.pop();
        } else {
          // Found a matching non-leaf node, visit its children
          nodesToSearch.pop(); // No longer needed
          for (int iChild : children.get(iNodeToSearch)) {
            if (Object_overlaps(iChild, min, max))
              nodesToSearch.add(iChild);
          }
        }
      }
      iNextEntry = -1; // No more entries to search
    }

    @Override
    public Iterator iterator() {
      return this;
    }

    @Override
    public boolean hasNext() {
      return iNextEntry != -1;
    }

    @Override
    public Entry next() {
      int iEntry = children.get(nodesToSearch.peek()).get(iNextEntry);
      entry.id = iEntry;
      for (int d = 0; d < getNumDimensions(); d++) {
        entry.min[d] = minCoord[d][iEntry];
        entry.max[d] = maxCoord[d][iEntry];
      }
      prefetchNext();
      return entry;
    }

    public void remove() {
      throw new RuntimeException("Not supported");
    }
  }

  /**
   * A class that holds information about one node in the tree.
   */
  public static class Node {
    /**The internal ID of the node*/
    public int id;

    /**Whether this is a leaf node or not*/
    public boolean isLeaf;

    /**The coordinate of the minimum corner of the node*/
    public double[] min;
    /**The coordinate of the maximum corner of the node*/
    public double[] max;

    protected Node(int numDimensions){
      min = new double[numDimensions];
      max = new double[numDimensions];
    }

    public String toWKT() {
      return String.format("POLYGON((%f %f, %f %f, %f %f, %f %f, %f %f))",
          min[0], min[1], min[0], max[1], max[0], max[1], max[0], min[1], min[0], min[1]);
    }
  }

  protected class NodeIterable implements Iterable, Iterator {
    /**The ID of the next node to be returned*/
    protected int iNextNode;

    /**Current node pointed by the iterator*/
    protected Node currentNode;

    protected NodeIterable() {
      currentNode = new Node(getNumDimensions());
      iNextNode = numEntries - 1;
      prefetchNext();
    }

    protected void prefetchNext() {
      if (iNextNode >= numEntries + numNodes)
        return;
      iNextNode++;
    }

    @Override
    public Iterator iterator() {
      return this;
    }

    @Override
    public boolean hasNext() {
      return iNextNode < numEntries + numNodes;
    }

    @Override
    public Node next() {
      for (int d = 0; d < getNumDimensions(); d++) {
        currentNode.min[d] = minCoord[d][iNextNode];
        currentNode.max[d] = maxCoord[d][iNextNode];
      }
      currentNode.isLeaf = isLeaf.get(iNextNode);
      currentNode.id = iNextNode;
      prefetchNext();
      return currentNode;
    }

    public void remove() {
      throw new RuntimeException("Not supported");
    }
  }

  protected class LeafNodeIterable extends NodeIterable {
    protected void prefetchNext() {
      if (iNextNode >= numEntries + numNodes)
        return;
      do {
        iNextNode++;
      } while (iNextNode < numEntries + numNodes && !isLeaf.get(iNextNode));
    }
  }

  /**
   * An interface for serializing objects given their entry number
   */
  public interface Serializer {
    /**
     * Serializes teh given object to the DataOutput and returns the total number of bytes written
     * @param out the output to serialize this object to
     * @param iObject the index of the object to serialize
     * @return the number of bytes written
     * @throws IOException if an error happens while writing the output
     */
    int serialize(DataOutput out, int iObject) throws IOException;
  }

  public interface Deserializer {
    O deserialize(DataInput in) throws IOException;
  }

  /**
   * Serializes the tree and its data entries to an output stream. Notice that
   * this is not supposed to be used as an external tree format where you can insert
   * and delete entries. Rather, it is like a static copy of the tree where you
   * can search or load back in memory. The format of the tree on disk is as
   * described below.
   * 
    *
  • * Data Entries: First, all data entries are written in an order that is consistent * with the R-tree structure. This order will guarantee that all data entries * under any node (from the root to leaves) will be adjacent in that order. *
  • *
  • * Tree structure: This part contains the structure of the tree represented * by its nodes. The nodes are stored in a level order traversal. This guarantees * that the root will always be the first node and that all siblings will be * stored consecutively. Each node contains the following information: * (1) (n) Number of children as a 32-bit integer, * (2) n tuples of the form (child offset, MBR=(x1, y1, x2, y2), start offset of data, end offset of data). * The child offset is the offset of the beginning of the child record (node or data entry) in the * tree where 0 is the offset of the first data entry. [Start offset, end offset[ is the range of data entries * that are stored in the subtree under this node. *
  • *
  • * Tree footer: This section contains some meta data about the tree as * follows. All integers are 32-bits. * (1) MBR of the root as (x1, y1, x2, y2), * (2) Number of data entries, * (3) Number of non-leaf nodes, * (4) Number of leaf nodes, * (5) Tree structure offset: offset of the beginning of the tree structure section * (6) Footer offset: offset of the beginning of the footer as a 32-bit integer. * (7) Tree size: Total tree size in bytes including data+structure+footer *
  • * *
* @param out the output to write to * @param ser the object serializer * @throws IOException if an error happens while writing the file */ public void write(DataOutput out, Serializer ser) throws IOException { // Tree data: write the data entries in the tree order // Since we write the data first, we will have to traverse the tree twice first time to visit and write // the data entries in the tree order, and second time to visit and write the tree nodes in the tree order. Deque nodesToVisit = new ArrayDeque<>(); nodesToVisit.add(root); int[] objectStartOffsets = new int[numOfDataEntries() + numOfNodes()]; int[] objectSizes = new int[numOfDataEntries() + numOfNodes()]; // Keep track of the offset of each data object from the beginning of the data section int dataOffset = 0; // Keep track of the offset of each node from the beginning of the tree structure section int nodeOffset = 0; while (!nodesToVisit.isEmpty()) { int node = nodesToVisit.removeFirst(); // The node is supposed to be written in this order. // Measure its offset and accumulate its size objectStartOffsets[node] = nodeOffset; // nodeSizeInBytes = (Number of children) + ((Child Offset + MBR(2k coords) + Data start + Data End) * Number of children) int nodeSizeInBytes = 4 + (4 + 8 * 2 * this.getNumDimensions() + 4 + 4) * Node_size(node); objectSizes[node] = nodeSizeInBytes; nodeOffset += nodeSizeInBytes; if (isLeaf.get(node)) { // Leaf node, write the data entries in order for (int child : children.get(node)) { objectStartOffsets[child] = dataOffset; if (ser != null) { objectSizes[child] = ser.serialize(out, child);; dataOffset += objectSizes[child]; } } } else { // Internal node, recursively traverse its children for (int child : children.get(node)) nodesToVisit.addLast(child); } } LOG.debug(String.format("Total data size in R-tree: %d bytes and %d records", dataOffset, numOfDataEntries())); // Update node offsets as they are written after the data entries for (int i = 0; i < numNodes; i++) objectStartOffsets[i + numEntries] += dataOffset; // Tree structure: Write the nodes in tree order nodesToVisit.add(root); while (!nodesToVisit.isEmpty()) { int node = nodesToVisit.removeFirst(); // (1) Number of children out.writeInt(Node_size(node)); for (int child : children.get(node)) { // (2) Write the offset of the child out.writeInt(objectStartOffsets[child]); // (3) Write the MBR of each child for (int d = 0; d < getNumDimensions(); d++) { out.writeDouble(minCoord[d][child]); out.writeDouble(maxCoord[d][child]); } // (4) Write the start and end offset of the data under this node int i$ = child; while (i$ >= numOfDataEntries()) i$ = children.get(i$).get(0); // Write start offset (Start offset of the left-most child) out.writeInt(objectStartOffsets[i$]); // Write end offset (End offset of the right-most child) i$ = child; while (i$ >= numOfDataEntries()) i$ = children.get(i$).peek(); out.writeInt(objectStartOffsets[i$] + objectSizes[i$]); } // If node is internal, add its children to the nodes to be visited if (!isLeaf.get(node)) { for (int child : children.get(node)) nodesToVisit.addLast(child); } } LOG.debug("Total size of R-tree structure: "+nodeOffset); // Tree footer int footerOffset = dataOffset + nodeOffset; // (1) MBR of the root // The dimension is written once in the footer out.writeInt(getNumDimensions()); for (int d = 0; d < getNumDimensions(); d++) { out.writeDouble(minCoord[d][root]); out.writeDouble(maxCoord[d][root]); } // (2) Number of data entries out.writeInt(numOfDataEntries()); // (3) Number of non-leaf nodes out.writeInt((int) (numOfNodes() - isLeaf.countOnes())); // (4) Number of leaf nodes out.writeInt((int) isLeaf.countOnes()); // (5) Offset of the tree structure section out.writeInt(dataOffset); // (6) Offset of the footer out.writeInt(footerOffset); // (7) Size of the entire tree int footerSize = 4 + 8 * 2 * getNumDimensions() + 4 * 5; out.writeInt(footerOffset + footerSize); } /** * Read an R-tree stored using the method {@link #write(DataOutput, Serializer)} * @param in the input stream to read from * @param length the length of the R-tree in bytes * @param deser the deserializer that reads the data objects * @throws IOException if an error happens while reading the file */ public void readFields(FSDataInputStream in, long length, Deserializer deser) throws IOException { this.seekableIn = in; this.deser = deser; this.treeStartOffset = seekableIn.getPos(); seekableIn.seek(treeStartOffset + length - 8); int footerOffset = seekableIn.readInt(); seekableIn.seek(treeStartOffset + footerOffset); int numDimensions = seekableIn.readInt(); double[] rootMin = new double[numDimensions]; double[] rootMax = new double[numDimensions]; for (int d = 0; d < numDimensions; d++) { rootMin[d] = seekableIn.readDouble(); rootMax[d] = seekableIn.readDouble(); } this.numEntries = seekableIn.readInt(); int numNonLeaves = seekableIn.readInt(); int numLeaves = seekableIn.readInt(); this.numNodes = numNonLeaves + numLeaves; int treeStructureOffset = seekableIn.readInt(); this.totalDataSize = treeStructureOffset; // Initialize the data structures to store objects this.minCoord = new double[numDimensions][numEntries + numNodes]; this.maxCoord = new double[numDimensions][numEntries + numNodes]; this.isLeaf = new BitArray(numEntries + numNodes); this.children = new ArrayList(numEntries + numNodes); for (int i = 0; i < numEntries + numNodes; i++) this.children.add(null); // Read the tree structure and keep it all in memory // First, scan the nodes once to map node offsets to IDs // Map the offset of some nodes to their index in the node list Map nodeOffsetToIndex = new HashMap(); seekableIn.seek(treeStartOffset + treeStructureOffset); int nodeID = numEntries; this.root = nodeID; // First node is always the root // Store root MBR for (int d = 0; d < numDimensions; d++) { minCoord[d][root] = rootMin[d]; maxCoord[d][root] = rootMax[d]; } while (nodeID < this.numNodes + this.numEntries) { int nodeOffset = (int) (seekableIn.getPos() - treeStartOffset); nodeOffsetToIndex.put(nodeOffset, nodeID); int nodeSize = seekableIn.readInt(); seekableIn.skipBytes(nodeSize * (4 + 8 * 2 * numDimensions + 8)); // Skip offset, MBR, and data [start,end[ offsets nodeID++; } // Second, read nodes data and store them in the object seekableIn.seek(treeStartOffset + treeStructureOffset); nodeID = numEntries; int entryID = 0; // Number entries starting at zero entryOffsets = new int[numEntries+1]; while (nodeID < this.numNodes + numEntries) { boolean leafNode = nodeID >= (numEntries + numNonLeaves); isLeaf.set(nodeID, leafNode); // (1) Node size int nodeSize = seekableIn.readInt(); // (2) Offset of the first child IntArray nodeChildren = new IntArray(); children.set(nodeID, nodeChildren); for (int i = 0; i < nodeSize; i++) { int childOffset = seekableIn.readInt(); int childID = leafNode ? entryID++ : nodeOffsetToIndex.get(childOffset); if (leafNode) entryOffsets[childID] = childOffset; nodeChildren.add(childID); // (3) Child MBR // TODO read the entire array instead of looping over them one by one for (int d = 0; d < numDimensions; d++) { minCoord[d][childID] = seekableIn.readDouble(); maxCoord[d][childID] = seekableIn.readDouble(); } seekableIn.skipBytes(8); // Skip start and end offsets } nodeID++; } entryOffsets[entryID] = treeStructureOffset; } public void close() throws IOException { if (seekableIn != null) seekableIn.close(); } public static Iterable search(FSDataInputStream in, long treeLength, double[] minCoord, double[] maxCoord, Deserializer deser) throws IOException { return new DiskSearchIterator(in, treeLength, minCoord, maxCoord, deser); } public static Iterable readAll(FSDataInputStream in, long treeLength, Deserializer deser) throws IOException { return new DiskSearchIterator(in, treeLength, deser); } /** * Seek to the given position or skip bytes to avoid seek. Helps with reading from a remote file system * where the seek operation is very expensive. * @param in the input stream to seek * @param newPos the position to seek to * @throws IOException if an error happens while reading the file */ private static void smartSeek(FSDataInputStream in, long newPos) throws IOException { if (in.getPos() < newPos) in.skip(newPos - in.getPos()); else in.seek(newPos); assert in.getPos() == newPos : "Seek failed"; } /** * Searches data on disk. */ public static class DiskSearchIterator implements Iterable, Iterator, Closeable { /**The starting offset of the tree*/ final long treeStartOffset; /**The data ranges to return as a result. Every pair of offsets make a [start, end[ range to return*/ final IntArray rangesToReturn; /**An input stream to the underlying R-tree*/ final FSDataInputStream in; /**The range where the next record belongs to*/ protected int rangeOfNextElement; /**The deserializer that parses data entries*/ final Deserializer deser; public DiskSearchIterator(FSDataInputStream in, long length, Deserializer deser) throws IOException { this.in = in; rangesToReturn = new IntArray(); this.deser = deser; // Read the header (or whatever is needed out of it) this.treeStartOffset = in.getPos(); in.seek(treeStartOffset + length - 8); int footerOffset = in.readInt(); in.seek(treeStartOffset + footerOffset); int numDimensions = in.readInt(); in.skip(8 * numDimensions * 2); // Skip root MBR in.skip(4 + 4 + 4); // Skip number elements and nodes (leaf and non-leaf) int treeStructureOffset = in.readInt(); rangesToReturn.add(0); rangesToReturn.add(treeStructureOffset); // Move the offset to the first data element (if exists) if (rangeOfNextElement < rangesToReturn.size()) in.seek(treeStartOffset + rangesToReturn.get(rangeOfNextElement)); } public DiskSearchIterator(FSDataInputStream in, long length, double[] searchBoxMin, double[] searchBoxMax, Deserializer deser) throws IOException { this.in = in; rangesToReturn = new IntArray(); this.deser = deser; // Read the header (or whatever is needed out of it) this.treeStartOffset = in.getPos(); in.seek(treeStartOffset + length - 8); int footerOffset = in.readInt(); in.seek(treeStartOffset + footerOffset); int numDimensions = in.readInt(); assert numDimensions == searchBoxMin.length; boolean overlaps = true; // The search box overlaps the node boolean contains = true; // THe search box contains the node for (int d$ = 0; d$ < numDimensions; d$++) { double nodeMin = in.readDouble(); double nodeMax = in.readDouble(); if (!(nodeMin >= searchBoxMin[d$] && nodeMax <= searchBoxMax[d$])) contains = false; if (nodeMin >= searchBoxMax[d$] || searchBoxMin[d$] >= nodeMax) overlaps = false; } if (!overlaps) // No results found return; /*this.numEntries = */in.readInt(); int numNonLeaves = in.readInt(); int numLeaves = in.readInt(); int numNodes = numNonLeaves + numLeaves; int treeStructureOffset = in.readInt(); if (contains) { // The root is fully contained in the search box, all results match this.rangesToReturn.add(0); this.rangesToReturn.add(treeStructureOffset); // Seek to the first element rangeOfNextElement = 0; in.seek(treeStartOffset + rangesToReturn.get(rangeOfNextElement)); return; } // The root overlaps the search box. Search the R-tree starting at the root. Deque nodesToSearch = new ArrayDeque<>(); nodesToSearch.addLast(treeStructureOffset); // Start at the root node (first node) while (!nodesToSearch.isEmpty()) { int nodeToSearch = nodesToSearch.removeFirst(); smartSeek(in, treeStartOffset + nodeToSearch); int numChildren = in.readInt(); while (numChildren-- > 0) { int childOffset = in.readInt(); overlaps = true; // The search box overlaps the node contains = true; // The search box contains the node for (int d$ = 0; d$ < numDimensions; d$++) { double nodeMin = in.readDouble(); double nodeMax = in.readDouble(); if (!(nodeMin >= searchBoxMin[d$] && nodeMax <= searchBoxMax[d$])) contains = false; if (nodeMin >= searchBoxMax[d$] || searchBoxMin[d$] >= nodeMax) overlaps = false; } int dataStartOffset = in.readInt(); int dataEndOffset = in.readInt(); if (contains) { // All data contents match rangesToReturn.add(dataStartOffset); rangesToReturn.add(dataEndOffset); } else if (overlaps) { // Need to further search this child if (childOffset >= treeStructureOffset) { // The child is a node nodesToSearch.addLast(childOffset); } else { // This is not a node, it is an object rangesToReturn.add(dataStartOffset); rangesToReturn.add(dataEndOffset); } } } } // Move the offset to the first data element (if exists) if (rangeOfNextElement < rangesToReturn.size()) in.seek(treeStartOffset + rangesToReturn.get(rangeOfNextElement)); } @Override public Iterator iterator() { return this; } @Override public boolean hasNext() { return rangeOfNextElement < rangesToReturn.size(); } @Override public T next() { // If the iterator has already reached the end if (rangeOfNextElement >= rangesToReturn.size()) return null; try { T result = deser.deserialize(in); // Move on to the next element int offsetOfNextElement = (int) (in.getPos() - treeStartOffset); if (offsetOfNextElement >= rangesToReturn.get(rangeOfNextElement + 1)) { // Done with the current range, go to the next range rangeOfNextElement += 2; if (rangeOfNextElement < rangesToReturn.size()) { // More elements to return on the next call smartSeek(in, treeStartOffset + rangesToReturn.get(rangeOfNextElement)); } } // In all cases, return the current element return result; } catch (IOException e) { throw new RuntimeException("Error reading data from the R-tree", e); } } /** * Returns the progress in the range [0,1]. Helpful to report the progress with MapReduce. * @return the progress of reading. Can be used to report the progress of a long job. * @throws IOException if an error happens while getting the position at the current file */ public float getProgress() throws IOException { float progressOfCurrentRange = (in.getPos() - treeStartOffset) / (float) (rangesToReturn.get(rangeOfNextElement + 1) - rangesToReturn.get(rangeOfNextElement)); float progress = (float) rangeOfNextElement / rangesToReturn.size() + progressOfCurrentRange * 2 / rangesToReturn.size(); return progress; } @Override public void close() throws IOException { in.close(); } } /** * Writes all nodes of the tree in a WKT format to be visualized in QGIS. * @param out the print stream to write the output to, e.g., System.out */ public void toWKT(PrintStream out) { for (Node node : new NodeIterable()) { out.printf("%d\t%s\tPOLYGON((%f %f, %f %f, %f %f, %f %f, %f %f))\n", node.id, isLeaf.get(node.id) ? "leaf" : "inner", node.min[0], node.min[1], node.min[0], node.max[1], node.max[0], node.max[1], node.max[0], node.min[1], node.min[0], node.min[1] ); } for (int i = 0; i < numOfDataEntries(); i++) out.printf("%d\t%s\tPOLYGON((%f %f, %f %f, %f %f, %f %f, %f %f))\n", i, "data", minCoord[0][i], minCoord[1][i], minCoord[0][i], maxCoord[1][i], maxCoord[0][i], maxCoord[1][i], maxCoord[0][i], minCoord[1][i], minCoord[0][i], minCoord[1][i] ); } /** * Assigns an entry to a group based on the R-tree paper (Page 52, Step QS3) * @param mbr1 the MBR of the first group * @param size1 the size of the first group * @param mbr2 the MBR of the second group * @param size2 the size of the second group * @param coords the two-dimensional array of all coordinates * @param iPoint the index of the point to consider * @return the index of the group to assign the record to, 0 or 1 */ static int chooseGroup(EnvelopeNDLite mbr1, int size1, EnvelopeNDLite mbr2, int size2, double[][] coords, int iPoint) { // Compute the volume for each group before and after adding the given point double vol1Before = 1.0, vol1After = 1.0; double vol2Before = 1.0, vol2After = 1.0; for (int d = 0; d < coords.length; d++) { // Compute the side length before/after adding the given point for the first group double sideLength = mbr1.getSideLength(d); vol1Before *= mbr1.getSideLength(d); if (coords[d][iPoint] < mbr1.getMinCoord(d)) sideLength += mbr1.getMinCoord(d) - coords[d][iPoint]; if (coords[d][iPoint] > mbr1.getMaxCoord(d)) sideLength += coords[d][iPoint] - mbr1.getMaxCoord(d); vol1After *= sideLength; // Do the same for the other group sideLength = mbr2.getSideLength(d); vol2Before *= mbr2.getSideLength(d); if (coords[d][iPoint] < mbr2.getMinCoord(d)) sideLength += mbr2.getMinCoord(d) - coords[d][iPoint]; if (coords[d][iPoint] > mbr2.getMaxCoord(d)) sideLength += coords[d][iPoint] - mbr2.getMaxCoord(d); vol2After *= sideLength; } // Compute the volume expansion for each group double d1 = vol1After - vol1Before; double d2 = vol2After - vol2Before; if (d1 == d2) { // Resolve ties by adding the entry to the group with smaller area d1 = vol1Before; d2 = vol2Before; if (d1 == d2) { // then to the one wih fewer entries d1 = size1; d2 = size2; if (d1 == d2) { // ... then to either d1 = 0.5; d2 = Math.random(); } } } return d1 < d2 ? 0 : 1; } /** * Partitions the given set of points using the linear split algorithm to produce a set of partitions where each one * has between minPartitionSize and maxPartitionSize points, inclusively. The parameter fractionMinSplitSize can be * set to any number * @param coords a two-dimensional array of coordinates [axis][point#] * @param minPartitionSize the minimum partition size (inclusive) * @param maxPartitionSize the maximum partition size (inclusive) * @param fractionMinSplitSize the fraction of the minimum split size [0, 0.5] * @return the boundaries of the partitions */ static EnvelopeNDLite[] partitionPoints(double[][] coords, int minPartitionSize, int maxPartitionSize, float fractionMinSplitSize) { class Range { /**The range of points to partition [start, end)*/ int start, end; Range(int s, int e) { this.start = s; this.end = e; } } // The ranges that might need to be split int numDimensions = coords.length; int numPoints = coords[0].length; assert RStarTree.isValid(numPoints, minPartitionSize, maxPartitionSize); Stack rangesToSplit = new Stack(); rangesToSplit.push(new Range(0, numPoints)); // The output list of partitions List partitions = new ArrayList<>(); // Compute the MBR of all points to be able to normalize the separation EnvelopeNDLite mbr = null; // Create the temporary objects here to avoid creating them multiple times double[] tempCoord = new double[numDimensions]; int[] pointWithMinCoord = new int[numDimensions]; int[] pointWithMaxCoord = new int[numDimensions]; EnvelopeNDLite mbr1 = new EnvelopeNDLite(numDimensions); EnvelopeNDLite mbr2 = new EnvelopeNDLite(numDimensions); while (!rangesToSplit.empty()) { Range r = rangesToSplit.pop(); if (r.end - r.start <= maxPartitionSize) { // No need to further split this range. Report it to the answer. EnvelopeNDLite partition = new EnvelopeNDLite(numDimensions); for (int d = 0; d < numDimensions; d++) { for (int iPoint = r.start; iPoint < r.end; iPoint++) { partition.setMinCoord(d, Math.min(partition.getMinCoord(d), coords[d][iPoint])); partition.setMaxCoord(d, Math.max(partition.getMaxCoord(d), coords[d][iPoint])); } } partitions.add(partition); } else { // Apply the linear-time R-tree splitting algorithm // First, pick the two seeds the have the largest separation for (int d = 0; d < numDimensions; d++) { pointWithMinCoord[d] = pointWithMaxCoord[d] = r.start; for (int iPoint = r.start+1; iPoint < r.end; iPoint++) { if (coords[d][iPoint] < coords[d][pointWithMinCoord[d]]) pointWithMinCoord[d] = iPoint; if (coords[d][iPoint] > coords[d][pointWithMaxCoord[d]]) pointWithMaxCoord[d] = iPoint; } } // Compute the MBR for the very first group to normalize separations if (mbr == null) { mbr = new EnvelopeNDLite(numDimensions); for (int d = 0; d < numDimensions; d++) { mbr.setMinCoord(d, coords[d][pointWithMinCoord[d]]); mbr.setMaxCoord(d, coords[d][pointWithMaxCoord[d]]); } } // Pick the two seeds that have the largest normalized separation int seed1 = -1, seed2 = -1; double largestNormalizedSeparation = Double.NEGATIVE_INFINITY; for (int d = 0; d < numDimensions; d++) { double normalizedSeparation = (coords[d][pointWithMaxCoord[d]] - coords[d][pointWithMinCoord[d]]) / mbr.getSideLength(d); if (normalizedSeparation > largestNormalizedSeparation) { seed1 = pointWithMinCoord[d]; seed2 = pointWithMaxCoord[d]; largestNormalizedSeparation = normalizedSeparation; } } assert seed1 != -1 && seed2 != -1; // Swap seed1 with range.start and seed2 with range.end-1 for (int d = 0; d < numDimensions; d++) { double temp = coords[d][r.start]; coords[d][r.start] = coords[d][seed1]; coords[d][seed1] = temp; temp = coords[d][r.end-1]; coords[d][r.end-1] = coords[d][seed2]; coords[d][seed2] = temp; } // Initialize the MBR of the two groups mbr1.setEmpty(); mbr2.setEmpty(); for (int d = 0; d < numDimensions; d++) { mbr1.setMinCoord(d, coords[d][r.start]); mbr1.setMaxCoord(d, coords[d][r.start]); mbr2.setMinCoord(d, coords[d][r.end-1]); mbr2.setMaxCoord(d, coords[d][r.end-1]); } // Split the range [r.start, r.end) so that the first group is [r.start, i] and the second group is // [j, r.end) int i = r.start; int j = r.end - 1; while (i < j) { int group; // Advance i as long as the element at i belongs to the first group while (i < j && chooseGroup(mbr1, i - r.start, mbr2, r.end - j, coords, i) == 0) { for (int d = 0; d < numDimensions; d++) tempCoord[d] = coords[d][i]; mbr1.merge(tempCoord); i++; } // Decrease j as long as the element at j belongs to the second group while (i < j && chooseGroup(mbr1, i - r.start, mbr2, r.end - j, coords, j) == 1) { for (int d = 0; d < numDimensions; d++) tempCoord[d] = coords[d][j]; mbr2.merge(tempCoord); j--; } // Swap the elements at i and j and continue if (i < j) { double temp; for (int d = 0; d < numDimensions; d++) { temp = coords[d][i]; coords[d][i] = coords[d][j]; coords[d][j] = temp; } } // Check if all the remaining items need to be assigned to one group to meet the minimum size constraint if (r.end - i <= minPartitionSize) j=i; else if (j - r.start <= minPartitionSize) i = j; } // Now split around i and j (notice that i == j at this point) // Ensure that the two partitions are valid int separator = i; int diff = r.end - separator > separator - r.start?+1 : -1; while (!RStarTree.isValid(separator - r.start, minPartitionSize, maxPartitionSize) || !RStarTree.isValid(r.end - separator, minPartitionSize, maxPartitionSize)) { separator += diff; } Range newRange = new Range(separator, r.end); r.end = separator; rangesToSplit.push(r); rangesToSplit.push(newRange); } } return partitions.toArray(new EnvelopeNDLite[partitions.size()]); } public static int spatialJoinIntersectsIndex(List r, List s, SpatialJoinAlgorithms.SJResult results) { if (r.size() == 0 || s.size() == 0) return 0; assert GeometryHelper.getCoordinateDimension(r.get(0)) == 2 : "R should be 2D geometries"; assert GeometryHelper.getCoordinateDimension(s.get(0)) == 2 : "S should be 2D geometries"; double[][] coordsR = getMBRs(r); double[][] coordsS = getMBRs(s); RTreeGuttman rtree1 = new RRStarTree(10, 40); rtree1.initializeFromRects(coordsR[0], coordsR[1], coordsR[2], coordsR[3]); RTreeGuttman rtree2 = new RRStarTree(10, 40); rtree2.initializeFromRects(coordsS[0], coordsS[1], coordsS[2], coordsS[3]); double[] refPoint = new double[2]; return RTreeGuttman.spatialJoin(rtree1, rtree2, (i,j) -> { // Compute the reference point if (r.get(i).intersects((s.get(j)))) { refPoint[0] = Math.max(coordsR[0][i], coordsS[0][j]); refPoint[1] = Math.max(coordsR[1][i], coordsS[1][j]); results.accept(i, j, refPoint); } }); } public static int spatialJoinIntersectsIndexFeatures(List r, List s, SpatialJoinAlgorithms.SJResult results) { if (r.size() == 0 || s.size() == 0) return 0; assert GeometryHelper.getCoordinateDimension(r.get(0).getGeometry()) == 2 : "R should be 2D geometries"; assert GeometryHelper.getCoordinateDimension(s.get(0).getGeometry()) == 2 : "S should be 2D geometries"; long t1 = System.nanoTime(); double[][] coordsR = getMBRs2(r); double[][] coordsS = getMBRs2(s); RTreeGuttman rtree1 = new RRStarTree(10, 40); rtree1.initializeFromRects(coordsR[0], coordsR[1], coordsR[2], coordsR[3]); RTreeGuttman rtree2 = new RRStarTree(10, 40); rtree2.initializeFromRects(coordsS[0], coordsS[1], coordsS[2], coordsS[3]); long t2 = System.nanoTime(); double[] refPoint = new double[2]; int resultSize = RTreeGuttman.spatialJoin(rtree1, rtree2, (i, j) -> { // Compute the reference point refPoint[0] = Math.max(coordsR[0][i], coordsS[0][j]); refPoint[1] = Math.max(coordsR[1][i], coordsS[1][j]); results.accept(i, j, refPoint); }); long t3 = System.nanoTime(); LOG.info(String.format("Built two R-trees of sizes %d and %d in %f seconds and joined in %f seconds to produce %d results", rtree1.numOfDataEntries(), rtree2.numOfDataEntries(), (t2-t1)*1E-9, (t3-t2)*1E-9, resultSize)); return resultSize; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy