All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucr.cs.bdlab.beast.indexing.STRPartitioner Maven / Gradle / Ivy

There is a newer version: 0.10.1-RC2
Show newest version
/*
 * Copyright 2018 University of California, Riverside
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.ucr.cs.bdlab.beast.indexing;

import edu.ucr.cs.bdlab.beast.common.BeastOptions;
import edu.ucr.cs.bdlab.beast.geolite.EnvelopeNDLite;
import edu.ucr.cs.bdlab.beast.geolite.GeometryHelper;
import edu.ucr.cs.bdlab.beast.synopses.AbstractHistogram;
import edu.ucr.cs.bdlab.beast.synopses.Summary;
import edu.ucr.cs.bdlab.beast.util.IntArray;
import org.apache.hadoop.util.IndexedSortable;
import org.apache.hadoop.util.QuickSort;

import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.ArrayDeque;
import java.util.Queue;

/**
 * A spatial partitioner based on the Sort-Tile-Recursive algorithm (STR) as described in the following paper.
 * Scott T. Leutenegger, J. M. Edgington, Mario A. López:
 * STR: A Simple and Efficient Algorithm for R-Tree Packing. ICDE 1997: 497-506
 */
@SpatialPartitioner.Metadata(
    disjointSupported = true,
    extension = "str",
    description = "Partitions the space using the Sort-tile-recursive (STR) algorithm for R-tree packing")
public class STRPartitioner implements SpatialPartitioner {

  /**Keep the partitions disjoint by replicating objects to all overlapping partitions*/
  private boolean disjoint;

  /**The degree of the STR tree (number of children for each node).*/
  private int treeDegree;

  /**Number of levels in the tree*/
  private int numTreeLevels;

  /**The splits that define the STR tree stored in level-order traversal.*/
  protected double[] splitCoords;

  /**The level of each node in the in-order traversal. While it can be computed, it is more compute-efficient to cache*/
  protected int[] nodeLevel;

  /**Number of dimensions for the partitions*/
  protected int numDimensions;

  /**Total number of internal nodes in the STR tree. The leaf nodes represent the partitions*/
  protected transient int numInternalNodes;

  /**The envelope of the underlying dataset*/
  protected EnvelopeNDLite envelope = new EnvelopeNDLite();

  @Override
  public void setup(BeastOptions conf, boolean disjoint) {
    this.disjoint = disjoint;
  }

  @Override
  public void construct(Summary summary, @Required double[][] sample, AbstractHistogram histogram, int numBuckets) {
    this.envelope.set(summary);
    this.numDimensions = sample.length;
    final int sampleCount = sample[0].length;
    if (numBuckets == 1) {
      numTreeLevels = 0;
      return; // Special case. No need to make any partitions or splits
    }
    this.treeDegree = (int) Math.ceil(Math.pow(numBuckets, 1.0 / numDimensions));
    // TODO: Make a shorter tree (fewer levels) when treeDegree^numDimensions >> desired # of partitions
    this.numTreeLevels = numDimensions;
    numInternalNodes = (int) ((Math.pow(treeDegree, numDimensions) - 1) / (treeDegree - 1));
    LOG.info(String.format("Creating an STR tree with %d levels and a degree of %d. Total number of nodes will be %d",
        numDimensions, treeDegree, numInternalNodes));
    nodeLevel = new int[numInternalNodes];
    int totalNumberOfSplits = numInternalNodes * (treeDegree - 1);
    // Reserve an array of all split coordinates
    splitCoords = new double[totalNumberOfSplits];

    assert totalNumberOfSplits <= sampleCount;

    // Recursively split the points along each axis
    class AxisSorter implements IndexedSortable {
      int axis;

      @Override
      public int compare(int i, int j) {
        return (int) Math.signum(sample[axis][i] - sample[axis][j]);
      }

      @Override
      public void swap(int i, int j) {
        double t;
        for (int d = 0; d < numDimensions; d++) {
          t = sample[d][i];
          sample[d][i] = sample[d][j];
          sample[d][j] = t;
        }
      }
    }
    AxisSorter sorter = new AxisSorter();
    QuickSort quickSort = new QuickSort();

    class SplitRange {
      int start, end;
      int splitAxis;
      /**The ID of the partition being split. The root partition (input MBR) has an ID of zero*/
      int partitionID;

      SplitRange(int partitionID, int s, int e, int x) {
        this.partitionID = partitionID;
        this.start = s;
        this.end = e;
        this.splitAxis = x;
      }
    }
    // Recursively split the input space starting at the root with the first dimensions
    Queue toSplit = new ArrayDeque<>();
    toSplit.add(new SplitRange(0, 0, sampleCount, 0));
    while (!toSplit.isEmpty()) {
      SplitRange rangeToSplit = toSplit.remove();
      // Sort along this axis
      sorter.axis = rangeToSplit.splitAxis;
      nodeLevel[rangeToSplit.partitionID] = rangeToSplit.splitAxis;
      quickSort.sort(sorter, rangeToSplit.start, rangeToSplit.end);
      // Split into the desired number of splits. Number of splits = tree degree - 1
      // i1: The index of the first point in the split
      int indexOfTheFirstSplitForThisPartition = rangeToSplit.partitionID * (treeDegree - 1);
      int i1 = rangeToSplit.start;
      for (int iSplit = 1; iSplit < treeDegree; iSplit++) {
        // i2: The index after the last point in the split
        int i2 = (rangeToSplit.start * (treeDegree - iSplit) + (rangeToSplit.end + 1) * iSplit) / treeDegree;
        // The split is located at the coordinate between the last point in the split, and the next point
        splitCoords[indexOfTheFirstSplitForThisPartition + iSplit - 1] = (sample[rangeToSplit.splitAxis][i2-1] + sample[rangeToSplit.splitAxis][i2]) / 2.0;
        if (rangeToSplit.splitAxis < (numDimensions - 1)) // Need further split
          toSplit.add(new SplitRange(rangeToSplit.partitionID * treeDegree + iSplit, i1, i2, rangeToSplit.splitAxis + 1));
        i1 = i2;
      }
      // Add the last partition to split
      if (rangeToSplit.splitAxis < (numDimensions - 1))
        toSplit.add(new SplitRange(rangeToSplit.partitionID * treeDegree + treeDegree, i1, rangeToSplit.end, rangeToSplit.splitAxis + 1));
    }
  }

  @Override
  public void overlapPartitions(EnvelopeNDLite mbr, IntArray matchedPartitions) {
    assert mbr.getCoordinateDimension() == this.getCoordinateDimension();
    // Reuse the given array (matchedPartitions) as a search stack
    matchedPartitions.append(0); // Start searching at the root
    if (treeDegree == 0)
      return; // No splits. Just one big partition (id = 0).
    int numNodesToSearch = 1;
    while (numNodesToSearch > 0) {
      int nodeID = matchedPartitions.pop();
      numNodesToSearch--;
      do {
        // Since we expect most rectangles to overlap a few nodes, we binary search to find the first overlapping
        // node and then linearly advance to find any additional overlapping nodes
        int axis = nodeLevel[nodeID];
        // Binary search to find the correct child that contains the lower point of the MBR
        int s = 0;
        int e = treeDegree - 1; // There are only treeDegree - 1 splits
        int firstSplitIndex = nodeID * (treeDegree - 1);
        while (s < e) {
          int m = (s + e) / 2;
          double diff = mbr.getMinCoord(axis) - splitCoords[firstSplitIndex + m];
          if (diff < 0)
            e = m;
          else
            s = m + 1;
        }
        assert s == e;
        // Now, advance e to point to the index after the last node that overlaps the given MBR
        while (e < (treeDegree - 1) && mbr.getMaxCoord(axis) > splitCoords[firstSplitIndex + e])
          e++;

        // We go directly into the first matching node, and keep the others in the stack
        while (e > s) {
          int childNodeID = nodeID * treeDegree + e + 1;
          if (childNodeID < nodeLevel.length) {
            // The child is an internal node, recursively search through it
            matchedPartitions.append(childNodeID - numInternalNodes);
            numNodesToSearch++;
          } else {
            // The child is a leaf node, add to matching partitions
            matchedPartitions.insert(numNodesToSearch, childNodeID - numInternalNodes);
          }
          e--;
        }
        nodeID = nodeID * treeDegree + s + 1;
        if (nodeID >= nodeLevel.length) {
          // Reached a leaf node, add to matching partitions
          matchedPartitions.insert(numNodesToSearch, nodeID - numInternalNodes);
          // Mark the end of the search for this node.
          nodeID = -1;
        }
      } while (nodeID != -1);
    }
  }

  @Override
  public int overlapPartition(EnvelopeNDLite mbr) {
    if (treeDegree == 0)
      return 0; // Special handling for one partition (no splits)
    assert mbr.getCoordinateDimension() == this.getCoordinateDimension();
    // Navigate to the single partition that contains the center of the given MBR
    int nodeID = 0; // Start searching at the root
    for (int d = 0; d < this.getCoordinateDimension(); d++) {
      double mbrCenter = (mbr.getMinCoord(d) + mbr.getMaxCoord(d)) / 2.0;
      // Binary search to find the correct child
      int s = 0;
      int e = treeDegree - 1; // There are only treeDegree - 1 splits
      int firstSplitIndex = nodeID * (treeDegree - 1);
      while (s < e) {
        int m = (s + e) / 2;
        double diff = mbrCenter - splitCoords[firstSplitIndex + m];
        if (diff < 0)
          e = m;
        else
          s = m + 1;
      }
      assert s == e;
      nodeID = nodeID * treeDegree + s + 1;
    }
    return nodeID - numInternalNodes;
  }

  @Override
  public void getPartitionMBR(int partitionID, EnvelopeNDLite mbr) {
    partitionID += numInternalNodes;
    assert mbr.getCoordinateDimension() == this.getCoordinateDimension();
    if (treeDegree == 0) {
      // No splits! Return an infinite rectangle
      for (int d = 0; d < getCoordinateDimension(); d++) {
        mbr.setMinCoord(d, Double.NEGATIVE_INFINITY);
        mbr.setMaxCoord(d, Double.POSITIVE_INFINITY);
      }
      return;
    }
    // Navigate up from the given partitionID (nodeID) to the root while setting the boundaries of the envelope
    assert partitionID >= nodeLevel.length;
    while (partitionID > 0) {
      int parentNodeID = (partitionID - 1) / treeDegree;
      int childIndex = partitionID % treeDegree - 1;
      if (childIndex == -1)
        childIndex += treeDegree;
      int axis = nodeLevel[parentNodeID];
      mbr.setMinCoord(axis, childIndex == 0? Double.NEGATIVE_INFINITY : splitCoords[parentNodeID * (treeDegree - 1) + childIndex - 1]);
      mbr.setMaxCoord(axis, childIndex == (treeDegree - 1)? Double.POSITIVE_INFINITY : splitCoords[parentNodeID * (treeDegree - 1) + childIndex]);
      partitionID = parentNodeID;
    }
  }

  @Override
  public int getPartitionCount() {
    // The tree is a full tree. All nodes have the same degree (treeDegree)
    int numPartitions = 1;
    for (int $l = 0; $l < numTreeLevels; $l++)
      numPartitions *= treeDegree;
    return numPartitions;
  }

  @Override
  public boolean isDisjoint() {
    return disjoint;
  }

  @Override
  public int getCoordinateDimension() {
    return this.numDimensions;
  }

  @Override
  public void writeExternal(ObjectOutput out) throws IOException {
    GeometryHelper.writeIEnvelope(envelope, out);
    out.writeInt(treeDegree);
    out.writeInt(numTreeLevels);
    out.writeInt(numDimensions);
    out.writeBoolean(disjoint);
    if (treeDegree > 0) {
      IntArray.writeIntArray(nodeLevel, out);
      for (double x : splitCoords)
        out.writeDouble(x);
    }
  }

  @Override
  public void readExternal(ObjectInput in) throws IOException {
    GeometryHelper.readIEnvelope(envelope, in);
    this.treeDegree = in.readInt();
    this.numTreeLevels = in.readInt();
    this.numDimensions = in.readInt();
    this.disjoint = in.readBoolean();
    if (treeDegree > 0) {
      nodeLevel = IntArray.readIntArray(nodeLevel, in);
      splitCoords = new double[nodeLevel.length * (treeDegree - 1)];
      for (int i = 0; i < splitCoords.length; i++)
        splitCoords[i] = in.readDouble();
    }
    numInternalNodes = (int) ((Math.pow(treeDegree, numDimensions) - 1) / (treeDegree - 1));
  }

  @Override
  public EnvelopeNDLite getEnvelope() {
    return envelope;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy