All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.spatial.prefix.HeatmapFacetCounter Maven / Gradle / Ivy

There is a newer version: 10.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.spatial.prefix;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
import org.locationtech.spatial4j.shape.Shape;
import org.locationtech.spatial4j.shape.SpatialRelation;

/**
 * Computes spatial facets in two dimensions as a grid of numbers. The data is often visualized as a
 * so-called "heatmap", hence the name.
 *
 * @lucene.experimental
 */
public class HeatmapFacetCounter {
  // TODO where should this code live? It could go to PrefixTreeFacetCounter, or maybe here in its
  // own class is fine.

  /** Maximum number of supported rows (or columns). */
  public static final int MAX_ROWS_OR_COLUMNS = (int) Math.sqrt(ArrayUtil.MAX_ARRAY_LENGTH);

  static {
    Math.multiplyExact(
        MAX_ROWS_OR_COLUMNS, MAX_ROWS_OR_COLUMNS); // will throw if doesn't stay within integer
  }

  /** Response structure */
  public static class Heatmap {
    public final int columns;
    public final int rows;
    public final int[] counts; // in order of 1st column (all rows) then 2nd column (all rows) etc.
    public final Rectangle region;

    public Heatmap(int columns, int rows, Rectangle region) {
      this.columns = columns;
      this.rows = rows;
      this.counts = new int[columns * rows];
      this.region = region;
    }

    public int getCount(int x, int y) {
      return counts[x * rows + y];
    }

    @Override
    public String toString() {
      return "Heatmap{" + columns + "x" + rows + " " + region + '}';
    }
  }

  /**
   * Calculates spatial 2D facets (aggregated counts) in a grid, sometimes called a heatmap. Facet
   * computation is implemented by navigating the underlying indexed terms efficiently. If you don't
   * know exactly what facetLevel to go to for a given input box but you have some sense of how many
   * cells there should be relative to the size of the shape, then consider using the logic that
   * {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy} uses when approximating what level
   * to go to when indexing a shape given a distErrPct.
   *
   * @param context the IndexReader's context
   * @param topAcceptDocs a Bits to limit counted docs. If null, live docs are counted.
   * @param inputShape the shape to gather grid squares for; typically a {@link Rectangle}. The
   *     actual heatmap area will usually be larger since the cells on the edge that
   *     overlap are returned. We always return a rectangle of integers even if the inputShape isn't
   *     a rectangle -- the non-intersecting cells will all be 0. If null is given, the entire world
   *     is assumed.
   * @param facetLevel the target depth (detail) of cells.
   * @param maxCells the maximum number of cells to return. If the cells exceed this count, an
   */
  public static Heatmap calcFacets(
      PrefixTreeStrategy strategy,
      IndexReaderContext context,
      Bits topAcceptDocs,
      Shape inputShape,
      final int facetLevel,
      int maxCells)
      throws IOException {
    if (maxCells > (MAX_ROWS_OR_COLUMNS * MAX_ROWS_OR_COLUMNS)) {
      throw new IllegalArgumentException(
          "maxCells (" + maxCells + ") should be <= " + MAX_ROWS_OR_COLUMNS);
    }
    if (inputShape == null) {
      inputShape = strategy.getSpatialContext().getWorldBounds();
    }
    final Rectangle inputRect = inputShape.getBoundingBox();
    // First get the rect of the cell at the bottom-left at depth facetLevel
    final SpatialPrefixTree grid = strategy.getGrid();
    final SpatialContext ctx = grid.getSpatialContext();
    final Point cornerPt = ctx.getShapeFactory().pointXY(inputRect.getMinX(), inputRect.getMinY());
    final CellIterator cellIterator = grid.getTreeCellIterator(cornerPt, facetLevel);
    Cell cornerCell = null;
    while (cellIterator.hasNext()) {
      cornerCell = cellIterator.next();
    }
    assert cornerCell != null && cornerCell.getLevel() == facetLevel
        : "Cell not at target level: " + cornerCell;
    final Rectangle cornerRect = (Rectangle) cornerCell.getShape();
    assert cornerRect.hasArea();
    // Now calculate the number of columns and rows necessary to cover the inputRect
    double heatMinX = cornerRect.getMinX(); // note: we might change this below...
    final double cellWidth = cornerRect.getWidth();
    final Rectangle worldRect = ctx.getWorldBounds();
    final int columns =
        calcRowsOrCols(
            cellWidth, heatMinX, inputRect.getWidth(), inputRect.getMinX(), worldRect.getWidth());
    final double heatMinY = cornerRect.getMinY();
    final double cellHeight = cornerRect.getHeight();
    final int rows =
        calcRowsOrCols(
            cellHeight,
            heatMinY,
            inputRect.getHeight(),
            inputRect.getMinY(),
            worldRect.getHeight());
    assert rows > 0 && columns > 0;
    if (columns > MAX_ROWS_OR_COLUMNS || rows > MAX_ROWS_OR_COLUMNS || columns * rows > maxCells) {
      throw new IllegalArgumentException(
          "Too many cells ("
              + columns
              + " x "
              + rows
              + ") for level "
              + facetLevel
              + " shape "
              + inputRect);
    }

    // Create resulting heatmap bounding rectangle & Heatmap object.
    final double halfCellWidth = cellWidth / 2.0;
    // if X world-wraps, use world bounds' range
    if (columns * cellWidth + halfCellWidth > worldRect.getWidth()) {
      heatMinX = worldRect.getMinX();
    }
    double heatMaxX = heatMinX + columns * cellWidth;
    if (Math.abs(heatMaxX - worldRect.getMaxX()) < halfCellWidth) { // numeric conditioning issue
      heatMaxX = worldRect.getMaxX();
    } else if (heatMaxX > worldRect.getMaxX()) { // wraps dateline (won't happen if !geo)
      heatMaxX = heatMaxX - worldRect.getMaxX() + worldRect.getMinX();
    }
    final double halfCellHeight = cellHeight / 2.0;
    double heatMaxY = heatMinY + rows * cellHeight;
    if (Math.abs(heatMaxY - worldRect.getMaxY()) < halfCellHeight) { // numeric conditioning issue
      heatMaxY = worldRect.getMaxY();
    }

    final Heatmap heatmap =
        new Heatmap(
            columns, rows, ctx.getShapeFactory().rect(heatMinX, heatMaxX, heatMinY, heatMaxY));
    if (topAcceptDocs instanceof Bits.MatchNoBits) {
      return heatmap; // short-circuit
    }

    // All ancestor cell counts (of facetLevel) will be captured during facet visiting and applied
    // later. If the data is
    // just points then there won't be any ancestors.
    // Facet count of ancestors covering all of the heatmap:
    // single-element array so it can be accumulated in the inner class
    int[] allCellsAncestorCount = new int[1];
    // All other ancestors:
    Map ancestors = new HashMap<>();

    // Now lets count some facets!
    PrefixTreeFacetCounter.compute(
        strategy,
        context,
        topAcceptDocs,
        inputShape,
        facetLevel,
        new PrefixTreeFacetCounter.FacetVisitor() {
          @Override
          public void visit(Cell cell, int count) {
            final double heatMinX = heatmap.region.getMinX();
            final Rectangle rect = (Rectangle) cell.getShape();
            if (cell.getLevel() == facetLevel) { // heatmap level; count it directly
              // convert to col & row
              int column;
              if (rect.getMinX() >= heatMinX) {
                column = (int) Math.round((rect.getMinX() - heatMinX) / cellWidth);
              } else { // due to dateline wrap
                column = (int) Math.round((rect.getMinX() + 360 - heatMinX) / cellWidth);
              }
              int row = (int) Math.round((rect.getMinY() - heatMinY) / cellHeight);
              // note: unfortunately, it's possible for us to visit adjacent cells to the heatmap
              // (if the SpatialPrefixTree
              // allows adjacent cells to overlap on the seam), so we need to skip them
              if (column < 0 || column >= heatmap.columns || row < 0 || row >= heatmap.rows) {
                return;
              }
              // increment
              heatmap.counts[column * heatmap.rows + row] += count;

            } else if (rect.relate(heatmap.region) == SpatialRelation.CONTAINS) {
              allCellsAncestorCount[0] += count;
            } else { // ancestor
              // note: not particularly efficient (possible put twice, and Integer wrapper); oh well
              Integer existingCount = ancestors.put(rect, count);
              if (existingCount != null) {
                ancestors.put(rect, count + existingCount);
              }
            }
          }
        });

    // Update the heatmap counts with ancestor counts

    // Apply allCellsAncestorCount
    if (allCellsAncestorCount[0] > 0) {
      for (int i = 0; i < heatmap.counts.length; i++) {
        heatmap.counts[i] += allCellsAncestorCount[0];
      }
    }

    // Apply ancestors
    //  note: This approach isn't optimized for a ton of ancestor cells. We'll potentially increment
    // the same cells
    //    multiple times in separate passes if any ancestors overlap. IF this poses a problem, we
    // could optimize it
    //    with additional complication by keeping track of intervals in a sorted tree structure
    // (possible TreeMap/Set)
    //    and iterate them cleverly such that we just make one pass at this stage.

    int[] pair = new int[2]; // output of intersectInterval
    for (Map.Entry entry : ancestors.entrySet()) {
      Rectangle rect = entry.getKey(); // from a cell (thus doesn't cross DL)
      final int count = entry.getValue();

      // note: we approach this in a way that eliminates int overflow/underflow (think huge cell,
      // tiny heatmap)
      intersectInterval(heatMinY, heatMaxY, cellHeight, rows, rect.getMinY(), rect.getMaxY(), pair);
      final int startRow = pair[0];
      final int endRow = pair[1];

      if (!heatmap.region.getCrossesDateLine()) {
        intersectInterval(
            heatMinX, heatMaxX, cellWidth, columns, rect.getMinX(), rect.getMaxX(), pair);
        final int startCol = pair[0];
        final int endCol = pair[1];
        incrementRange(heatmap, startCol, endCol, startRow, endRow, count);

      } else {
        // note: the cell rect might intersect 2 disjoint parts of the heatmap, so we do the left &
        // right separately
        final int leftColumns = (int) Math.round((180 - heatMinX) / cellWidth);
        final int rightColumns = heatmap.columns - leftColumns;
        // left half of dateline:
        if (rect.getMaxX() > heatMinX) {
          intersectInterval(
              heatMinX, 180, cellWidth, leftColumns, rect.getMinX(), rect.getMaxX(), pair);
          final int startCol = pair[0];
          final int endCol = pair[1];
          incrementRange(heatmap, startCol, endCol, startRow, endRow, count);
        }
        // right half of dateline
        if (rect.getMinX() < heatMaxX) {
          intersectInterval(
              -180, heatMaxX, cellWidth, rightColumns, rect.getMinX(), rect.getMaxX(), pair);
          final int startCol = pair[0] + leftColumns;
          final int endCol = pair[1] + leftColumns;
          incrementRange(heatmap, startCol, endCol, startRow, endRow, count);
        }
      }
    }

    return heatmap;
  }

  private static void intersectInterval(
      double heatMin,
      double heatMax,
      double heatCellLen,
      int numCells,
      double cellMin,
      double cellMax,
      int[] out) {
    assert heatMin < heatMax && cellMin < cellMax;
    // precondition: we know there's an intersection
    if (heatMin >= cellMin) {
      out[0] = 0;
    } else {
      out[0] = (int) Math.round((cellMin - heatMin) / heatCellLen);
    }
    if (heatMax <= cellMax) {
      out[1] = numCells - 1;
    } else {
      out[1] = (int) Math.round((cellMax - heatMin) / heatCellLen) - 1;
    }
  }

  private static void incrementRange(
      Heatmap heatmap, int startColumn, int endColumn, int startRow, int endRow, int count) {
    // startColumn & startRow are not necessarily within the heatmap range; likewise numRows/columns
    // may overlap.
    if (startColumn < 0) {
      endColumn += startColumn;
      startColumn = 0;
    }
    endColumn = Math.min(heatmap.columns - 1, endColumn);

    if (startRow < 0) {
      endRow += startRow;
      startRow = 0;
    }
    endRow = Math.min(heatmap.rows - 1, endRow);

    if (startRow > endRow) {
      return; // short-circuit
    }
    for (int c = startColumn; c <= endColumn; c++) {
      int cBase = c * heatmap.rows;
      for (int r = startRow; r <= endRow; r++) {
        heatmap.counts[cBase + r] += count;
      }
    }
  }

  /** Computes the number of intervals (rows or columns) to cover a range given the sizes. */
  private static int calcRowsOrCols(
      double cellRange, double cellMin, double requestRange, double requestMin, double worldRange) {
    assert requestMin >= cellMin;
    // Idealistically this wouldn't be so complicated but we concern ourselves with overflow and
    // edge cases
    double range = (requestRange + (requestMin - cellMin));
    if (range == 0) {
      return 1;
    }
    final double intervals = Math.ceil(range / cellRange);
    if (intervals > Integer.MAX_VALUE) {
      return Integer.MAX_VALUE; // should result in an error soon (exceed thresholds)
    }
    // ensures we don't have more intervals than world bounds (possibly due to rounding/edge issue)
    final long intervalsMax = Math.round(worldRange / cellRange);
    if (intervalsMax > Integer.MAX_VALUE) {
      // just return intervals
      return (int) intervals;
    }
    return Math.min((int) intervalsMax, (int) intervals);
  }

  private HeatmapFacetCounter() {}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy