All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucr.cs.bdlab.beast.indexing.RSGrovePartitioner Maven / Gradle / Ivy

/*
 * Copyright 2018 University of California, Riverside
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.ucr.cs.bdlab.beast.indexing;

import edu.ucr.cs.bdlab.beast.cg.SpatialPartitioner;
import edu.ucr.cs.bdlab.beast.common.BeastOptions;
import edu.ucr.cs.bdlab.beast.geolite.EnvelopeND;
import edu.ucr.cs.bdlab.beast.geolite.EnvelopeNDLite;
import edu.ucr.cs.bdlab.beast.geolite.GeometryHelper;
import edu.ucr.cs.bdlab.beast.synopses.AbstractHistogram;
import edu.ucr.cs.bdlab.beast.synopses.Summary;
import edu.ucr.cs.bdlab.beast.util.IntArray;
import edu.ucr.cs.bdlab.beast.util.OperationParam;

import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;

/**
 * An implementation of the R*-Grove partitioner. This partitioner uses the method
 * {@link RStarTree#partitionPoints(double[][], int, int, boolean, double, AuxiliarySearchStructure)}
 * to partition a sample of points into rectangles.
 * @author Ahmed Eldawy
 *
 */
@SpatialPartitioner.Metadata(
    disjointSupported = true,
    extension = "rsgrove",
    description = "A partitioner that uses the R*-tree node splitting algorithm on a sample of points to partition the space"
)
public class RSGrovePartitioner extends SpatialPartitioner {

  @OperationParam(
      description = "The desired ratio between the minimum and maximum partitions sizes ]0,1[",
      defaultValue = "0.95",
      required = false
  )
  public static final String MMRatio = "mmratio";

  @OperationParam(
      description = "The minimum fraction of a split considered by the R*-tree and RR*-tree partitioners",
      defaultValue = "0.0",
      required = false
  )
  public static final String MinSplitRatio = "RSGrove.MinSplitRatio";

  /**Configuration name to expand the R*-grove partitions to infinity upon creation. Default is true. */
  public static final String ExpandToInfinity = "RSGrove.ExpandToInf";

  /**MBR of the points used to partition the space*/
  protected final EnvelopeNDLite mbrPoints = new EnvelopeNDLite();

  /**The coordinates of the minimum corner of each partition*/
  protected double[][] minCoord;
  /**The coordinates of the maximum corner of each partition*/
  protected double[][] maxCoord;

  /**The ratio m/M for partitioning the points*/
  transient protected double mMRatio;

  /**An auxiliary search structure to find matching partitions quickly*/
  protected AuxiliarySearchStructure aux;

  /**The minimum fraction of a split considered by teh R*-tree and RR*-tree partitioners*/
  protected double fractionMinSplitSize;

  /**The produced partitions should be disjoint*/
  private boolean disjointPartitions;

  /**A random number generator to assign empty geometries to random partitions for load balance*/
  transient protected Random random;

  /**Expand generated partitions to infinity to ensure it covers the entire space. Defaults to true*/
  private boolean expandToInf;

  @Override
  public void setup(BeastOptions conf, boolean disjoint) {
    this.disjointPartitions = disjoint;
    mMRatio = conf.getDouble(MMRatio, 0.95);
    this.fractionMinSplitSize = conf.getDouble(MinSplitRatio, 0.0);
    this.random = new Random();
    this.expandToInf = conf.getBoolean(ExpandToInfinity, true);
  }

  protected double Partition_expansion(int iPartition, EnvelopeNDLite env) {
    double volBefore = 1.0, volAfter = 1.0;
    assert env.getCoordinateDimension() == this.getCoordinateDimension();
    for (int d = 0; d < getCoordinateDimension(); d++) {
      volBefore *= Math.min(mbrPoints.getMaxCoord(d), maxCoord[d][iPartition]) -
          Math.max(mbrPoints.getMinCoord(d), minCoord[d][iPartition]);
      volAfter *= Math.min(mbrPoints.getMaxCoord(d), Math.max(maxCoord[d][iPartition], env.getMaxCoord(d))) -
          Math.max(mbrPoints.getMinCoord(d), Math.min(minCoord[d][iPartition], env.getMinCoord(d)));
    }
    return volAfter - volBefore;
  }

  /**
   * Tests if a partition overlaps a given rectangle
   * @param partitionID the ID of the partition to check its overlap
   * @param ienv the envelope to check for its overlap with the partition
   * @return {@code true} iff the envelope overlaps the partition.
   */
  protected boolean Partition_overlap(int partitionID, EnvelopeND ienv) {
    for (int d = 0; d < getCoordinateDimension(); d++) {
      if (maxCoord[d][partitionID] <= ienv.getMinCoord(d) || ienv.getMaxCoord(d) <= minCoord[d][partitionID])
        return false;
    }
    return true;
  }

  /**
   * Computes the area of a partition.
   * @param partitionID the ID of the partition to compute its volume
   * @return the volume of the given partition
   */
  protected double Partition_volume(int partitionID) {
    double vol = 1.0;
    for (int d = 0; d < getCoordinateDimension(); d++)
      vol *= maxCoord[d][partitionID] - minCoord[d][partitionID];
    return vol;
  }

  @Override
  public void construct(Summary summary, @Required double[][] sample, @Preferred AbstractHistogram histogram, int numPartitions) {
    if (sample.length == 0) {
      // Generate random points to produce uniform partitions without worrying much about changing many part of the code
      sample = new double[summary.getCoordinateDimension()][1000];
      for (int d = 0; d < summary.getCoordinateDimension(); d++) {
        for (int i = 0; i < 1000; i++) {
          sample[d][i] = Math.random() * summary.getSideLength(d) + summary.getMinCoord(d);
        }
      }
    }
    assert mMRatio > 0 : "mMRatio cannot be zero. Make sure you call #setup() before #construct()";
    int numDimensions = sample.length;
    assert summary.getCoordinateDimension() == sample.length;
    mbrPoints.setCoordinateDimension(summary.getCoordinateDimension());
    mbrPoints.merge(summary);
    int numSamplePoints = sample[0].length;
    aux = new AuxiliarySearchStructure();
    EnvelopeNDLite[] partitionMBRs;
    if (histogram == null) {
      // No histogram! Adjust m and M based on number of sample points
      LOG.info(String.format("Partitioning the points without weight into %d partitions", numPartitions));
      int M = (int) Math.ceil((double) numSamplePoints / numPartitions);
      int m = (int) Math.ceil(mMRatio * M);
      partitionMBRs = partitionPoints(sample, M, m, expandToInf);
    } else {
      // Histogram is available and balanced size is desired. Compute points weights and partition based on the weights.
      long[] weights = computePointWeights(sample, histogram);
      long totalSize = 0;
      for (long w : weights)
        totalSize += w;
      // m and M represent data sizes so they need to be 64-bit long to support > 2G input sizes
      long m, M;
      M = (long) Math.ceil((double) totalSize / numPartitions);
      m = (long) (totalSize * mMRatio / numPartitions);
      partitionMBRs = partitionWeightedPoints(sample, weights, M, m, expandToInf);
      LOG.info(String.format("R*-Grove created %d partitions while %d were requsted", partitionMBRs.length, numPartitions));
    }

    minCoord = new double[numDimensions][partitionMBRs.length];
    maxCoord = new double[numDimensions][partitionMBRs.length];
    for (int i = 0; i < partitionMBRs.length; i++) {
      for (int d = 0; d < numDimensions; d++) {
        minCoord[d][i] = partitionMBRs[i].getMinCoord(d);
        maxCoord[d][i] = partitionMBRs[i].getMaxCoord(d);
      }
    }
  }

  /**
   * Computes the weights of points according to the histogram. The total weight of the points should be roughly
   * equal to the total weight in the histogram. Each point is assigned a weight based on its location in the histogram
   * so that it approximates the weight of its vicinity.
   * @param sample a set of points
   * @param histogram a histogram that covers all the points
   * @return an array that assigns a weight to each of the point such that the total weight is roughly equal to the
   * total weight of the histogram.
   */
  protected static long[] computePointWeights(double[][] sample, AbstractHistogram histogram) {
    int numDimensions = sample.length;
    int numPoints = sample[0].length;
    assert numDimensions == histogram.getCoordinateDimension();
    int[] numPointsPerBin = new int[histogram.getNumBins()];
    double[] coords = new double[numDimensions];
    for (int $i = 0; $i < numPoints; $i++) {
      for (int $d = 0; $d < numDimensions; $d++)
        coords[$d] = sample[$d][$i];
      int binID = histogram.getBinID(coords);
      numPointsPerBin[binID]++;
    }
    // Now compute the weight by distributing the total weight of each bucket
    long[] weights = new long[numPoints];
    for (int $i = 0; $i < numPoints; $i++) {
      for (int $d = 0; $d < numDimensions; $d++)
        coords[$d] = sample[$d][$i];
      int binID = histogram.getBinID(coords);
      weights[$i] = histogram.getBinValue(binID) / numPointsPerBin[binID];
    }
    return weights;
  }

  protected EnvelopeNDLite[] partitionPoints(double[][] coords, int max, int min) {
    return RStarTree.partitionPoints(coords, min, max, true, fractionMinSplitSize, aux);
  }

  protected EnvelopeNDLite[] partitionPoints(double[][] coords, int max, int min, boolean expandToInf) {
    return RStarTree.partitionPoints(coords, min, max, expandToInf, fractionMinSplitSize, aux);
  }

  protected EnvelopeNDLite[] partitionWeightedPoints(double[][] coords, long[] weights, long max, long min) {
    return RStarTree.partitionWeightedPoints(coords, weights, min, max, true, fractionMinSplitSize, aux);
  }

  protected EnvelopeNDLite[] partitionWeightedPoints(double[][] coords, long[] weights, long max, long min, boolean expandToInf) {
    return RStarTree.partitionWeightedPoints(coords, weights, min, max, expandToInf, fractionMinSplitSize, aux);
  }

  @Override
  public void writeExternal(ObjectOutput out) throws IOException {
    GeometryHelper.writeIEnvelope(mbrPoints, out);
    out.writeInt(getCoordinateDimension());
    out.writeInt(numPartitions());
    for (int d = 0; d < getCoordinateDimension(); d++) {
      for (int i = 0; i < numPartitions(); i++) {
        out.writeDouble(minCoord[d][i]);
        out.writeDouble(maxCoord[d][i]);
      }
    }
    aux.writeExternal(out);
    out.writeBoolean(disjointPartitions);
  }

  @Override
  public void readExternal(ObjectInput in) throws IOException {
    GeometryHelper.readIEnvelope(mbrPoints, in);
    int numDimensions = in.readInt();
    int numPartitions = in.readInt();
    if (minCoord == null || numPartitions() != numPartitions || getCoordinateDimension() != numDimensions) {
      minCoord = new double[numDimensions][numPartitions];
      maxCoord = new double[numDimensions][numPartitions];
    }
    for (int d = 0; d < getCoordinateDimension(); d++) {
      for (int i = 0; i < numPartitions(); i++) {
        minCoord[d][i] = in.readDouble();
        maxCoord[d][i] = in.readDouble();
      }
    }
    if (aux == null)
      aux = new AuxiliarySearchStructure();
    aux.readExternal(in);
    disjointPartitions = in.readBoolean();
    if (random == null)
      random = new Random();
  }
  
  @Override
  public int numPartitions() {
    return minCoord == null? 0 : minCoord[0].length;
  }

  @Override
  public boolean isDisjoint() {
    return this.disjointPartitions;
  }

  @Override
  public int getCoordinateDimension() {
    return minCoord == null ? 0 : minCoord.length;
  }

  @Override
  public void overlapPartitions(EnvelopeNDLite mbr, IntArray matchedPartitions) {
    matchedPartitions.clear();
    if (mbr.isEmpty())
      matchedPartitions.add(random.nextInt(this.numPartitions()));
    else
      aux.search(mbr, matchedPartitions);
  }

  /**
   * Multiple temporary arrays to use with the method {@link #overlapPartition(EnvelopeND)}, one per running thread
   */
  protected Map tempIntArrays = new HashMap<>();

  @Override
  public int overlapPartition(EnvelopeNDLite mbr) {
    if (mbr.isEmpty()) {
      // Special case: Assign an empty geometry to a random partition for load balance
      return random.nextInt(this.numPartitions());
    }
    double minExpansion = Double.POSITIVE_INFINITY;
    int chosenPartition = -1;
    IntArray tempPartitions = tempIntArrays.get(Thread.currentThread());
    if (tempPartitions == null) {
      tempPartitions = new IntArray();
      tempIntArrays.put(Thread.currentThread(), tempPartitions);
    }
    aux.search(new EnvelopeNDLite(mbr), tempPartitions);
    // NB tempPartitions cannot be empty because aux covers the entire space (-Infinity,+Infinity)
    if (tempPartitions.size() == 1)
      return tempPartitions.get(0);
    for (int overlappingPartition : tempPartitions) {
      double expansion = Partition_expansion(overlappingPartition, new EnvelopeNDLite(mbr));
      if (expansion < minExpansion) {
        minExpansion = expansion;
        chosenPartition = overlappingPartition;
      } else if (expansion == minExpansion) {
        // Resolve ties by choosing the entry with the rectangle of smallest area
        if (Partition_volume(overlappingPartition) < Partition_volume(chosenPartition))
          chosenPartition = overlappingPartition;
      }
    }
    assert chosenPartition >= 0;
    return chosenPartition;
  }

  @Override
  public void getPartitionMBR(int partitionID, EnvelopeNDLite mbr) {
    mbr.setCoordinateDimension(getCoordinateDimension());
    mbr.setEmpty();
    // TODO find a way to avoid creating a temporary array (not major .. used only when each partition is finalized)
    double[] coord = new double[getCoordinateDimension()];
    for (int d = 0; d < getCoordinateDimension(); d++)
      coord[d] = minCoord[d][partitionID];
    mbr.merge(coord);
    for (int d = 0; d < getCoordinateDimension(); d++)
      coord[d] = maxCoord[d][partitionID];
    mbr.merge(coord);
  }

  @Override
  public EnvelopeNDLite getEnvelope() {
    return mbrPoints;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy