All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucr.cs.bdlab.beast.synopses.UniformHistogram Maven / Gradle / Ivy

/*
 * Copyright 2018 University of California, Riverside
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.ucr.cs.bdlab.beast.synopses;

import edu.ucr.cs.bdlab.beast.geolite.EnvelopeNDLite;
import edu.ucr.cs.bdlab.beast.geolite.GeometryHelper;
import edu.ucr.cs.bdlab.beast.geolite.PointND;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.PrintStream;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

/**
 * A uniform grid histogram storing Long values.
 */
public class UniformHistogram extends AbstractHistogram implements Externalizable {
  /**Logger for this class*/
  private static final Log LOG = LogFactory.getLog(UniformHistogram.class);

  /**Dimensions of the grid of the histogram*/
  protected int[] numPartitions;

  /**The frequency value of the histogram*/
  protected long[] values;

  /**Default constructor is needed for deserialization*/
  public UniformHistogram() { }

  public UniformHistogram(EnvelopeNDLite mbr, int ... numPartitions) {
    this.set(mbr);
    this.numPartitions = Arrays.copyOf(numPartitions, numPartitions.length);
    int totalLength = 1;
    for (int $d = 0; $d < numPartitions.length; $d++)
      totalLength *= numPartitions[$d];
    values = new long[totalLength];
  }

  /**
   * Computes a reasonable number of partitions along each axis to produce at most (but not much lower) than the
   * given number of buckets in the grid. It tries to make the side lengths of each cell as equal as possible.
   * In other words, it tries to produce cells that are closes to squares (or cubes ...).
   * @param mbr the minimum bounding rectangle of the input space
   * @param numBuckets the desired number of buckets to create, does not have to be followed strictly
   * @return the number of partitions along each dimension
   */
  public static int[] computeNumPartitions(EnvelopeNDLite mbr, int numBuckets) {
    double totalVolume = mbr.getArea();
    double cellVolume = totalVolume / numBuckets;
    double cellSideLength = Math.pow(cellVolume, 1.0 / mbr.getCoordinateDimension());
    int[] numPartitions = new int[mbr.getCoordinateDimension()];
    long totalNumPartitions = 1;
    for (int $d = 0; $d < mbr.getCoordinateDimension(); $d++) {
      numPartitions[$d] = Math.max(1, (int) Math.round(mbr.getSideLength($d) / cellSideLength));
      if (totalNumPartitions * numPartitions[$d] > numBuckets)
        numPartitions[$d] = (int) Math.max(1, numBuckets / totalNumPartitions);
      totalNumPartitions *= numPartitions[$d];
    }
    return numPartitions;
  }

  @Override
  public void writeExternal(ObjectOutput out) throws IOException {
    GeometryHelper.writeIEnvelope(this, out);
    for (int d = 0; d < getCoordinateDimension(); d++)
      out.writeInt(numPartitions[d]);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(new GZIPOutputStream(baos));
    for (int i$ = 0; i$ < values.length; i$++)
      dos.writeLong(values[i$]);
    dos.close();

    byte[] serializedData = baos.toByteArray();
    out.writeInt(serializedData.length);
    out.write(serializedData);
    LOG.info("Serialized a histogram into "+serializedData.length+" bytes");
  }

  @Override
  public void readExternal(ObjectInput in) throws IOException {
    GeometryHelper.readIEnvelope(this, in);
    this.numPartitions = new int[getCoordinateDimension()];
    int numBins = 1;
    for (int d = 0; d < getCoordinateDimension(); d++) {
      numPartitions[d] = in.readInt();
      numBins *= numPartitions[d];
    }
    if (values == null || numBins != values.length)
      values = new long[numBins];
    int compressedDataLength = in.readInt();
    byte[] compressedData = new byte[compressedDataLength];
    in.readFully(compressedData);
    DataInputStream din = new DataInputStream(new GZIPInputStream(new ByteArrayInputStream(compressedData)));
    for (int i = 0; i < numBins; i++)
      values[i] = din.readLong();
    LOG.info("Deserialized a histogram with "+numBins+" bins");
  }

  /**
   * Adds a value to a specific entry position in the histogram
   * @param position the position of the bin
   * @param size the size to add to the given bin
   */
  public void addEntry(int[] position, long size) {
    assert position.length == getCoordinateDimension();
    int pos = getBinID(position, numPartitions);
    if (pos >= 0 && pos < values.length)
      values[pos] += size;
  }

  public static final int getBinID(int[] position, int[] numPartitions) {
    assert position.length == numPartitions.length;
    int d = position.length;
    int pos = 0;
    while (d-- > 0) {
      pos *= numPartitions[d];
      pos += position[d];
    }
    return pos;
  }

  /**
   * Add a value in the entry at the given point location.
   * @param coord the coordinates of the point
   * @param size the size of the point
   */
  public void addPoint(double[] coord, long size) {
    assert coord.length == getCoordinateDimension();
    int binID = getPointBinID(coord, this, this.numPartitions);
    if (binID >= 0 && binID < values.length)
      values[binID] += size;
  }

  /**
   * Returns the ID of the bin that contains the given point.
   * @param coord the coordinate of the point
   * @param mbb the minimum bounding box of the histogram
   * @param numPartitions the number of partitions per dimension in the histogram
   * @return the ID of the bin that contains the point or -1 if it is out of range
   */
  public static int getPointBinID(double[] coord, EnvelopeNDLite mbb, int[] numPartitions) {
    assert coord.length == numPartitions.length;
    int d = coord.length;
    int binID = 0;
    while (d-- > 0) {
      binID *= numPartitions[d];
      int position;
      if (coord[d] == mbb.getMaxCoord(d))
        position = numPartitions[d] - 1;
      else
        position = (int) Math.floor((coord[d] - mbb.getMinCoord(d)) * numPartitions[d] / mbb.getSideLength(d));
      if (position < 0 || position >= numPartitions[d])
        return -1;
      binID += position;
    }
    return binID;
  }

  /**
   * Returns the ID of the bin that contains the given point.
   * @param point the point
   * @param mbb the minimum bounding box of the histogram
   * @param numPartitions the number of partitions per dimension in the histogram
   * @return the ID of the bin that contains the point or -1 if it is out of range
   */
  public static int getPointBinID(PointND point, EnvelopeNDLite mbb, int[] numPartitions) {
    assert point.getCoordinateDimension() == numPartitions.length;
    int d = numPartitions.length;
    int binID = 0;
    while (d-- > 0) {
      binID *= numPartitions[d];
      // Special case for points that lie exactly at the end of this dimension
      // This case allows points lying at the upper sides of the MBR to be accounted.
      int position;
      if (point.getCoordinate(d) == mbb.getMaxCoord(d))
        position = numPartitions[d] - 1;
      else
        position = (int) Math.floor((point.getCoordinate(d) - mbb.getMinCoord(d)) * numPartitions[d] / mbb.getSideLength(d));
      if (position < 0 || position >= numPartitions[d])
        return -1;
      binID += position;
    }
    return binID;
  }

  public void addPoint(PointND p, long size) {
    assert p.getCoordinateDimension() == getCoordinateDimension();
    int[] position = new int[p.getCoordinateDimension()];
    for (int d = 0; d < p.getCoordinateDimension(); d++) {
      position[d] = (int) Math.floor((p.getCoordinate(d) - this.getMinCoord(d)) * this.numPartitions[d] / this.getSideLength(d));
      position[d] = Math.min(position[d], numPartitions[d] - 1);
    }
    addEntry(position, size);
  }

  /**
   * Merges with another histogram that is perfectly aligned with this histogram, i.e., the same MBR and the same
   * number of rows and columns
   * @param another the other histogram to merge with
   * @return this histogram to call serially
   */
  public UniformHistogram mergeAligned(UniformHistogram another) {
    assert this.isAligned(another);
    for (int i = 0; i < values.length; i++)
      this.values[i] += another.values[i];
    return this;
  }

  /**
   * Compute the volume of the overlap between two buckets in two histogram.
   * @param h1 the first histogram
   * @param p1 the position of the bucket in the first histogram
   * @param h2 the second histogram
   * @param p2 the position of the bucket in the second histogram
   * @return the volume of the overlap region between the two buckets. This number will be zero if the two buckets
   * do not overlap.
   */
  public static double getOverlapVolume(UniformHistogram h1, int[] p1, UniformHistogram h2, int[] p2)
  {
    assert h1.getCoordinateDimension() == p1.length : "Invalid bucket ID for the first partition";
    assert h2.getCoordinateDimension() == p2.length : "Invalid bucket ID for the second partition";
    assert p1.length == p2.length : "Mismatching number of dimensions for the two histogram";

    double overlapVolume = 1.0;

    for(int d = 0; d < h1.getCoordinateDimension(); d++){
      double overlapD = Math.min(h1.getPartitionMax(p1[d], d), h2.getPartitionMax(p2[d], d)) -
          Math.max(h1.getPartitionMin(p1[d], d), h2.getPartitionMin(p2[d], d));
      // If the buckets are disjoint, return 0
      if (overlapD <= 0)
        return 0;
      overlapVolume *= overlapD;
    }
    return overlapVolume;
  }

  public int getBucketID(int[] i)
  {
    int dims = getCoordinateDimension();
    int pos = 0;
    while (dims-- > 0) {
      pos *= getNumPartitions(dims);
      pos += i[dims];
    }

    return pos;
  }

  /**
   * Returns the lower coordinate of the buckets in the given dimension (inclusive)
   * @param i the index of the partition
   * @param d the dimension
   * @return the lower coordinate of the buckets at the ith partition along the d dimension
   */
  public double getPartitionMin(int i, int d) {
    return (getMinCoord(d) * (numPartitions[d] - i) + getMaxCoord(d) * i) / numPartitions[d];
  }

  /**
   * Returns the higher coordinate of the buckets in the given dimension (exclusive)
   * @param i the index of the partition
   * @param d the dimension
   * @return the higher coordinate of the buckets at the ith partition along the d dimension
   */
  public double getPartitionMax(int i, int d) {
    return getPartitionMin(i + 1, d);
  }

  public UniformHistogram mergeNonAligned(UniformHistogram another){
    assert this.getCoordinateDimension() == another.getCoordinateDimension() :
        String.format("Mismatching number of dimensions %d != %d", this.getCoordinateDimension(), another.getCoordinateDimension());
    int numDimensions = this.getCoordinateDimension();
    double sourceBucketVolume = 1.0;
    for (int d = 0; d < numDimensions; d++)
      sourceBucketVolume *= another.getPartitionMax(0, d) - another.getPartitionMin(0, d);
    // The position of the source and target buckets initialized to {0}^d
    int[] i = new int[numDimensions]; // The position in this histogram
    int bucketIDI = 0;
    int[] j = new int[numDimensions]; // The position in the other histogram
    int bucketIDJ = 0;
    boolean finished = false;
    while (!finished) {
      // Update the values at buckets i, j
      double overlap = getOverlapVolume(this, i, another, j);
      assert another.getBucketID(j) == bucketIDJ : String.format("%d != %d", another.getBucketID(j), bucketIDJ);
      assert this.getBucketID(i) == bucketIDI : String.format("%d != %d", this.getBucketID(i), bucketIDI);
      this.values[bucketIDI] += (long) (another.values[bucketIDJ] * overlap / sourceBucketVolume);

      // Advance i and j
      // Dimension being advanced
      int d = 0;
      // The advance in bucket ID for each increment in dimension d
      int multiplierI = 1;
      int multiplierJ = 1;
      // A flag this is set when a dimension is reset. When this happens, the next dimension is incremented
      boolean dimensionReset;
      do {
        dimensionReset = false;
        if (this.getPartitionMax(i[d], d) < another.getPartitionMax(j[d], d)) {
          i[d]++;
          bucketIDI += multiplierI;
        } else {
          j[d]++;
          bucketIDJ += multiplierJ;
        }
        if (i[d] >= this.getNumPartitions(d) || j[d] >= another.getNumPartitions(d)) {
          // Reset this dimension and advance to the next dimension
          bucketIDI -= i[d] * multiplierI;
          bucketIDJ -= j[d] * multiplierJ;
          i[d] = j[d] = 0;
          multiplierI *= numPartitions[d];
          multiplierJ *= another.numPartitions[d];
          d++;
          dimensionReset = true;
        }
      } while (d < numDimensions && dimensionReset);
      finished = d == numDimensions;
    }
    return this;
  }

  /**
   * Tests if the histogram is perfectly aligned with another histogram, i.e., the same MBR and the same number of
   * rows and columns.
   * @param another the other histogram to test for alignment
   * @return {@code true} if the two histograms are aligned
   */
  public boolean isAligned(UniformHistogram another) {
    return super.equalsExact(another) && Arrays.equals(this.numPartitions, another.numPartitions);
  }

  /**
   * Returns the envelope of a cell given its column and row position. To avoid unnecessary object creation, the given
   * envelope is filled in with the coordinates of the given cell.
   * @param position the position of the point to test
   * @param mbr the MBR to fill with the information. If {@code null}, a {@link NullPointerException} is thrown.
   */
  public void getCellEnvelope(int[] position, EnvelopeNDLite mbr) {
    mbr.setCoordinateDimension(this.getCoordinateDimension());
    for (int d = 0; d < this.getCoordinateDimension(); d++) {
      mbr.setMinCoord(d, (this.getMinCoord(d) * (numPartitions[d] - position[d]) + this.getMaxCoord(d) * position[d]) / numPartitions[d]);
      mbr.setMaxCoord(d, (this.getMinCoord(d) * (numPartitions[d] - (position[d]+ 1)) + this.getMaxCoord(d) * (position[d] + 1)) / numPartitions[d]);
    }
  }

  /**
   * Computes the sum of all values in the given range of grid cells.
   * @param minPos the position of the lower corner in grid coordinates
   * @param sizes the size (number of cells) along each dimension
   * @return the value of the given range of bins
   */
  @Override
  public long getValue(int[] minPos, int[] sizes) {
    assert minPos.length == getCoordinateDimension();
    assert sizes.length == getCoordinateDimension();
    int[] pos = new int[minPos.length];
    int totalNumberOfCells = 1;
    boolean copyMade = false;
    for (int d = 0; d < minPos.length; d++) {
      if (minPos[d] < 0 || minPos[d] + sizes[d] > numPartitions[d]) {
        // Adjust minPos and sizes to account for the negative value
        if (!copyMade) {
          minPos = Arrays.copyOf(minPos, minPos.length);
          sizes = Arrays.copyOf(sizes, sizes.length);
          copyMade = true;
        }
        if (minPos[d] < 0) {
          sizes[d] += minPos[d]; // Reduce the size along this dimension
          minPos[d] = 0; // Reset the minimum position to zero along this dimension
        }
        if (minPos[d] + sizes[d] > numPartitions[d])
          sizes[d] = numPartitions[d] - minPos[d];
        if (sizes[d] <= 0)
          return 0;
      }
      totalNumberOfCells *= sizes[d];
    }
    long sum = 0;
    for (int i = 0; i < totalNumberOfCells; i++) {
      int d = minPos.length;
      int offset = 0;
      while (d-- > 0) {
        offset *= numPartitions[d];
        offset += minPos[d] + pos[d];
      }
      sum += values[offset];
      // Move to the next position
      d = 0;
      while (d < pos.length && ++pos[d] >= sizes[d])
        pos[d++] = 0;
    }
    return sum;
  }

  /**
   * Print the histogram information to a text output stream for debugging purposes.
   * @param out the print stream to write to, e.g., System.out
   */
  public void print(PrintStream out) {
    out.println("Column\tRow\tGeometry\tFrequency");
    EnvelopeNDLite env = new EnvelopeNDLite();
    for (int row = 0; row < getNumPartitions(1); row++) {
      for (int col = 0; col < getNumPartitions(0); col++) {
        getCellEnvelope(new int[] {col, row}, env);
        out.printf("%d\t%d\t%s\t%d\n", col, row, env.toString(), getValue(new int[] {col, row}, new int[] {1, 1}));
      }
    }
  }

  /**
   * Computes the sum of all entries in a given rectangle. It is assumed that x1 ≤ x2 and y1 ≤ y2
   * @param x1 the lower x-coordinate
   * @param y1 the lower y-coordinate
   * @param x2 the upper x-coordinate
   * @param y2 the upper y-coordinate
   * @return the sum of values in the given rectangle
   */
  public long sumRectangle(double x1, double y1, double x2, double y2) {
    int col1 = (int) Math.floor((x1 - this.getMinCoord(0)) * numPartitions[0] / this.getSideLength(0));
    int col2 = (int) Math.ceil((x2 - this.getMinCoord(0)) * numPartitions[0] / this.getSideLength(0));
    int row1 = (int) Math.floor((y1 - this.getMinCoord(1)) * numPartitions[1] / this.getSideLength(1));
    int row2 = (int) Math.ceil((y2 - this.getMinCoord(1)) * numPartitions[1] / this.getSideLength(1));
    // Compute the fraction in each direction that is outside the expanded region. These are the parts that need
    // to be subtracted. The variable naming assumes the coordinates increase from left to right and from top to bottom
    double fractionLeft = (x1 - this.getMinCoord(0)) / (this.getSideLength(0) / this.numPartitions[0]);
    fractionLeft -= Math.floor(fractionLeft);
    double fractionTop = (y1 - this.getMinCoord(1)) / (this.getSideLength(1) / this.numPartitions[1]);
    fractionTop -= Math.floor(fractionTop);
    double fractionRight = (this.getMaxCoord(0) - x2) / (this.getSideLength(0) / this.numPartitions[0]);
    fractionRight -= Math.floor(fractionRight);
    double fractionBottom = (this.getMaxCoord(1) - y2) / (this.getSideLength(1) / this.numPartitions[1]);
    fractionBottom -= Math.floor(fractionBottom);

    double expandedSum = getValue(new int[] {col1, row1}, new int[] {col2 - col1, row2 - row1});
    // Subtract the left column
    expandedSum -= fractionLeft * getValue(new int[] {col1, row1}, new int[] {1, row2 - row1});
    // Subtract the right column
    expandedSum -= fractionRight * getValue(new int[] {col2-1, row1}, new int[] {1, row2 - row1});
    // Subtract the top row
    expandedSum -= fractionTop * getValue(new int[] {col1, row1}, new int[] {col2 - col1, 1});
    // Subtract the bottom row
    expandedSum -= fractionBottom * getValue(new int[] {col1, row2-1}, new int[] {col2 - col1, 1});

    // Add back the top-left fraction that was subtracted twice
    expandedSum += fractionTop * fractionLeft * getValue(new int[] {col1, row1}, new int[] {1, 1});
    // Add back the bottom-left fraction that was subtracted twice
    expandedSum += fractionBottom * fractionLeft * getValue(new int[] {col1, row2-1}, new int[] {1, 1});
    // Add back the top-right fraction that was subtracted twice
    expandedSum += fractionTop * fractionRight * getValue(new int[] {col2-1, row1}, new int[] {1, 1});
    // Add back the bottom-right fraction that was subtracted twice
    expandedSum += fractionBottom * fractionRight * getValue(new int[] {col2-1, row2-1}, new int[] {1, 1});
    return Math.round(expandedSum);
  }

  public int getNumPartitions(int d) {
    return numPartitions[d];
  }

  /**
   * Increment a bin given its position in the array of bins
   * @param binID the ID of the bin to increment. Must be in the range [0, {@link #getNumBins()}[.
   * @param count the increment amount
   * @see #getBinID(double[])
   */
  public void incrementBin(int binID, long count) {
    values[binID] += count;
  }

  @Override
  public int getNumBins() {
    return values.length;
  }

  @Override
  public long getBinValue(int binID) {
    return values[binID];
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy