org.apache.lucene.spatial.prefix.HeatmapFacetCounter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-spatial-extras Show documentation
Show all versions of lucene-spatial-extras Show documentation
Advanced Spatial Shape Strategies for Apache Lucene
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.spatial.prefix;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
import org.locationtech.spatial4j.shape.Shape;
import org.locationtech.spatial4j.shape.SpatialRelation;
/**
* Computes spatial facets in two dimensions as a grid of numbers. The data is often visualized as a
* so-called "heatmap", hence the name.
*
* @lucene.experimental
*/
public class HeatmapFacetCounter {
// TODO where should this code live? It could go to PrefixTreeFacetCounter, or maybe here in its
// own class is fine.
/** Maximum number of supported rows (or columns). */
public static final int MAX_ROWS_OR_COLUMNS = (int) Math.sqrt(ArrayUtil.MAX_ARRAY_LENGTH);
static {
Math.multiplyExact(
MAX_ROWS_OR_COLUMNS, MAX_ROWS_OR_COLUMNS); // will throw if doesn't stay within integer
}
/** Response structure */
public static class Heatmap {
public final int columns;
public final int rows;
public final int[] counts; // in order of 1st column (all rows) then 2nd column (all rows) etc.
public final Rectangle region;
public Heatmap(int columns, int rows, Rectangle region) {
this.columns = columns;
this.rows = rows;
this.counts = new int[columns * rows];
this.region = region;
}
public int getCount(int x, int y) {
return counts[x * rows + y];
}
@Override
public String toString() {
return "Heatmap{" + columns + "x" + rows + " " + region + '}';
}
}
/**
* Calculates spatial 2D facets (aggregated counts) in a grid, sometimes called a heatmap. Facet
* computation is implemented by navigating the underlying indexed terms efficiently. If you don't
* know exactly what facetLevel to go to for a given input box but you have some sense of how many
* cells there should be relative to the size of the shape, then consider using the logic that
* {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy} uses when approximating what level
* to go to when indexing a shape given a distErrPct.
*
* @param context the IndexReader's context
* @param topAcceptDocs a Bits to limit counted docs. If null, live docs are counted.
* @param inputShape the shape to gather grid squares for; typically a {@link Rectangle}. The
* actual heatmap area will usually be larger since the cells on the edge that
* overlap are returned. We always return a rectangle of integers even if the inputShape isn't
* a rectangle -- the non-intersecting cells will all be 0. If null is given, the entire world
* is assumed.
* @param facetLevel the target depth (detail) of cells.
* @param maxCells the maximum number of cells to return. If the cells exceed this count, an
*/
public static Heatmap calcFacets(
PrefixTreeStrategy strategy,
IndexReaderContext context,
Bits topAcceptDocs,
Shape inputShape,
final int facetLevel,
int maxCells)
throws IOException {
if (maxCells > (MAX_ROWS_OR_COLUMNS * MAX_ROWS_OR_COLUMNS)) {
throw new IllegalArgumentException(
"maxCells (" + maxCells + ") should be <= " + MAX_ROWS_OR_COLUMNS);
}
if (inputShape == null) {
inputShape = strategy.getSpatialContext().getWorldBounds();
}
final Rectangle inputRect = inputShape.getBoundingBox();
// First get the rect of the cell at the bottom-left at depth facetLevel
final SpatialPrefixTree grid = strategy.getGrid();
final SpatialContext ctx = grid.getSpatialContext();
final Point cornerPt = ctx.getShapeFactory().pointXY(inputRect.getMinX(), inputRect.getMinY());
final CellIterator cellIterator = grid.getTreeCellIterator(cornerPt, facetLevel);
Cell cornerCell = null;
while (cellIterator.hasNext()) {
cornerCell = cellIterator.next();
}
assert cornerCell != null && cornerCell.getLevel() == facetLevel
: "Cell not at target level: " + cornerCell;
final Rectangle cornerRect = (Rectangle) cornerCell.getShape();
assert cornerRect.hasArea();
// Now calculate the number of columns and rows necessary to cover the inputRect
double heatMinX = cornerRect.getMinX(); // note: we might change this below...
final double cellWidth = cornerRect.getWidth();
final Rectangle worldRect = ctx.getWorldBounds();
final int columns =
calcRowsOrCols(
cellWidth, heatMinX, inputRect.getWidth(), inputRect.getMinX(), worldRect.getWidth());
final double heatMinY = cornerRect.getMinY();
final double cellHeight = cornerRect.getHeight();
final int rows =
calcRowsOrCols(
cellHeight,
heatMinY,
inputRect.getHeight(),
inputRect.getMinY(),
worldRect.getHeight());
assert rows > 0 && columns > 0;
if (columns > MAX_ROWS_OR_COLUMNS || rows > MAX_ROWS_OR_COLUMNS || columns * rows > maxCells) {
throw new IllegalArgumentException(
"Too many cells ("
+ columns
+ " x "
+ rows
+ ") for level "
+ facetLevel
+ " shape "
+ inputRect);
}
// Create resulting heatmap bounding rectangle & Heatmap object.
final double halfCellWidth = cellWidth / 2.0;
// if X world-wraps, use world bounds' range
if (columns * cellWidth + halfCellWidth > worldRect.getWidth()) {
heatMinX = worldRect.getMinX();
}
double heatMaxX = heatMinX + columns * cellWidth;
if (Math.abs(heatMaxX - worldRect.getMaxX()) < halfCellWidth) { // numeric conditioning issue
heatMaxX = worldRect.getMaxX();
} else if (heatMaxX > worldRect.getMaxX()) { // wraps dateline (won't happen if !geo)
heatMaxX = heatMaxX - worldRect.getMaxX() + worldRect.getMinX();
}
final double halfCellHeight = cellHeight / 2.0;
double heatMaxY = heatMinY + rows * cellHeight;
if (Math.abs(heatMaxY - worldRect.getMaxY()) < halfCellHeight) { // numeric conditioning issue
heatMaxY = worldRect.getMaxY();
}
final Heatmap heatmap =
new Heatmap(
columns, rows, ctx.getShapeFactory().rect(heatMinX, heatMaxX, heatMinY, heatMaxY));
if (topAcceptDocs instanceof Bits.MatchNoBits) {
return heatmap; // short-circuit
}
// All ancestor cell counts (of facetLevel) will be captured during facet visiting and applied
// later. If the data is
// just points then there won't be any ancestors.
// Facet count of ancestors covering all of the heatmap:
// single-element array so it can be accumulated in the inner class
int[] allCellsAncestorCount = new int[1];
// All other ancestors:
Map ancestors = new HashMap<>();
// Now lets count some facets!
PrefixTreeFacetCounter.compute(
strategy,
context,
topAcceptDocs,
inputShape,
facetLevel,
new PrefixTreeFacetCounter.FacetVisitor() {
@Override
public void visit(Cell cell, int count) {
final double heatMinX = heatmap.region.getMinX();
final Rectangle rect = (Rectangle) cell.getShape();
if (cell.getLevel() == facetLevel) { // heatmap level; count it directly
// convert to col & row
int column;
if (rect.getMinX() >= heatMinX) {
column = (int) Math.round((rect.getMinX() - heatMinX) / cellWidth);
} else { // due to dateline wrap
column = (int) Math.round((rect.getMinX() + 360 - heatMinX) / cellWidth);
}
int row = (int) Math.round((rect.getMinY() - heatMinY) / cellHeight);
// note: unfortunately, it's possible for us to visit adjacent cells to the heatmap
// (if the SpatialPrefixTree
// allows adjacent cells to overlap on the seam), so we need to skip them
if (column < 0 || column >= heatmap.columns || row < 0 || row >= heatmap.rows) {
return;
}
// increment
heatmap.counts[column * heatmap.rows + row] += count;
} else if (rect.relate(heatmap.region) == SpatialRelation.CONTAINS) {
allCellsAncestorCount[0] += count;
} else { // ancestor
// note: not particularly efficient (possible put twice, and Integer wrapper); oh well
Integer existingCount = ancestors.put(rect, count);
if (existingCount != null) {
ancestors.put(rect, count + existingCount);
}
}
}
});
// Update the heatmap counts with ancestor counts
// Apply allCellsAncestorCount
if (allCellsAncestorCount[0] > 0) {
for (int i = 0; i < heatmap.counts.length; i++) {
heatmap.counts[i] += allCellsAncestorCount[0];
}
}
// Apply ancestors
// note: This approach isn't optimized for a ton of ancestor cells. We'll potentially increment
// the same cells
// multiple times in separate passes if any ancestors overlap. IF this poses a problem, we
// could optimize it
// with additional complication by keeping track of intervals in a sorted tree structure
// (possible TreeMap/Set)
// and iterate them cleverly such that we just make one pass at this stage.
int[] pair = new int[2]; // output of intersectInterval
for (Map.Entry entry : ancestors.entrySet()) {
Rectangle rect = entry.getKey(); // from a cell (thus doesn't cross DL)
final int count = entry.getValue();
// note: we approach this in a way that eliminates int overflow/underflow (think huge cell,
// tiny heatmap)
intersectInterval(heatMinY, heatMaxY, cellHeight, rows, rect.getMinY(), rect.getMaxY(), pair);
final int startRow = pair[0];
final int endRow = pair[1];
if (!heatmap.region.getCrossesDateLine()) {
intersectInterval(
heatMinX, heatMaxX, cellWidth, columns, rect.getMinX(), rect.getMaxX(), pair);
final int startCol = pair[0];
final int endCol = pair[1];
incrementRange(heatmap, startCol, endCol, startRow, endRow, count);
} else {
// note: the cell rect might intersect 2 disjoint parts of the heatmap, so we do the left &
// right separately
final int leftColumns = (int) Math.round((180 - heatMinX) / cellWidth);
final int rightColumns = heatmap.columns - leftColumns;
// left half of dateline:
if (rect.getMaxX() > heatMinX) {
intersectInterval(
heatMinX, 180, cellWidth, leftColumns, rect.getMinX(), rect.getMaxX(), pair);
final int startCol = pair[0];
final int endCol = pair[1];
incrementRange(heatmap, startCol, endCol, startRow, endRow, count);
}
// right half of dateline
if (rect.getMinX() < heatMaxX) {
intersectInterval(
-180, heatMaxX, cellWidth, rightColumns, rect.getMinX(), rect.getMaxX(), pair);
final int startCol = pair[0] + leftColumns;
final int endCol = pair[1] + leftColumns;
incrementRange(heatmap, startCol, endCol, startRow, endRow, count);
}
}
}
return heatmap;
}
private static void intersectInterval(
double heatMin,
double heatMax,
double heatCellLen,
int numCells,
double cellMin,
double cellMax,
int[] out) {
assert heatMin < heatMax && cellMin < cellMax;
// precondition: we know there's an intersection
if (heatMin >= cellMin) {
out[0] = 0;
} else {
out[0] = (int) Math.round((cellMin - heatMin) / heatCellLen);
}
if (heatMax <= cellMax) {
out[1] = numCells - 1;
} else {
out[1] = (int) Math.round((cellMax - heatMin) / heatCellLen) - 1;
}
}
private static void incrementRange(
Heatmap heatmap, int startColumn, int endColumn, int startRow, int endRow, int count) {
// startColumn & startRow are not necessarily within the heatmap range; likewise numRows/columns
// may overlap.
if (startColumn < 0) {
endColumn += startColumn;
startColumn = 0;
}
endColumn = Math.min(heatmap.columns - 1, endColumn);
if (startRow < 0) {
endRow += startRow;
startRow = 0;
}
endRow = Math.min(heatmap.rows - 1, endRow);
if (startRow > endRow) {
return; // short-circuit
}
for (int c = startColumn; c <= endColumn; c++) {
int cBase = c * heatmap.rows;
for (int r = startRow; r <= endRow; r++) {
heatmap.counts[cBase + r] += count;
}
}
}
/** Computes the number of intervals (rows or columns) to cover a range given the sizes. */
private static int calcRowsOrCols(
double cellRange, double cellMin, double requestRange, double requestMin, double worldRange) {
assert requestMin >= cellMin;
// Idealistically this wouldn't be so complicated but we concern ourselves with overflow and
// edge cases
double range = (requestRange + (requestMin - cellMin));
if (range == 0) {
return 1;
}
final double intervals = Math.ceil(range / cellRange);
if (intervals > Integer.MAX_VALUE) {
return Integer.MAX_VALUE; // should result in an error soon (exceed thresholds)
}
// ensures we don't have more intervals than world bounds (possibly due to rounding/edge issue)
final long intervalsMax = Math.round(worldRange / cellRange);
if (intervalsMax > Integer.MAX_VALUE) {
// just return intervals
return (int) intervals;
}
return Math.min((int) intervalsMax, (int) intervals);
}
private HeatmapFacetCounter() {}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy