
org.apache.lucene.spatial.prefix.PrefixTreeStrategy Maven / Gradle / Ivy
Show all versions of lucene-spatial-extras Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.spatial.prefix;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource;
import org.apache.lucene.util.Bits;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Shape;
/**
* An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two subclasses are {@link
* RecursivePrefixTreeStrategy} and {@link TermQueryPrefixTreeStrategy}. This strategy is most
* effective as a fast approximate spatial search filter.
*
* Characteristics:
*
*
* - Can index any shape; however only {@link RecursivePrefixTreeStrategy} can effectively
* search non-point shapes.
*
- Can index a variable number of shapes per field value. This strategy can do it via multiple
* calls to {@link #createIndexableFields(org.locationtech.spatial4j.shape.Shape)} for a
* document or by giving it some sort of Shape aggregate (e.g. JTS WKT MultiPoint). The
* shape's boundary is approximated to a grid precision.
*
- Can query with any shape. The shape's boundary is approximated to a grid precision.
*
- Only {@link org.apache.lucene.spatial.query.SpatialOperation#Intersects} is supported. If
* only points are indexed then this is effectively equivalent to IsWithin.
*
- The strategy supports {@link
* #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point,double)} even for
* multi-valued data, so long as the indexed data is all points; the behavior is undefined
* otherwise. However, it will likely be removed in the future in lieu of using
* another strategy with a more scalable implementation. Use of this call is the only
* circumstance in which a cache is used. The cache is simple but as such it doesn't scale to
* large numbers of points nor is it real-time-search friendly.
*
*
* Implementation:
*
*
The {@link SpatialPrefixTree} does most of the work, for example returning a list of terms
* representing grids of various sizes for a supplied shape. An important configuration item is
* {@link #setDistErrPct(double)} which balances shape precision against scalability. See those
* javadocs.
*
* @lucene.experimental
*/
public abstract class PrefixTreeStrategy extends SpatialStrategy {
protected final SpatialPrefixTree grid;
private final Map provider = new ConcurrentHashMap<>();
protected int defaultFieldValuesArrayLen = 2;
protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT; // [ 0 TO 0.5 ]
protected boolean pointsOnly = false; // if true, there are no leaves
public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
super(grid.getSpatialContext(), fieldName);
this.grid = grid;
}
public SpatialPrefixTree getGrid() {
return grid;
}
/**
* A memory hint used by {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)}
* for how big the initial size of each Document's array should be. The default is 2. Set this to
* slightly more than the default expected number of points per document.
*/
public void setDefaultFieldValuesArrayLen(int defaultFieldValuesArrayLen) {
this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen;
}
public double getDistErrPct() {
return distErrPct;
}
/**
* The default measure of shape precision affecting shapes at index and query times. Points don't
* use this as they are always indexed at the configured maximum precision ({@link
* org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree#getMaxLevels()}); this applies to all
* other shapes. Specific shapes at index and query time can use something different than this
* default value. If you don't set a default then the default is {@link
* SpatialArgs#DEFAULT_DISTERRPCT} -- 2.5%.
*
* @see org.apache.lucene.spatial.query.SpatialArgs#getDistErrPct()
*/
public void setDistErrPct(double distErrPct) {
this.distErrPct = distErrPct;
}
public boolean isPointsOnly() {
return pointsOnly;
}
/**
* True if only indexed points shall be supported. There are no "leafs" in such a case, except
* those at maximum precision.
*/
public void setPointsOnly(boolean pointsOnly) {
this.pointsOnly = pointsOnly;
}
@Override
public Field[] createIndexableFields(Shape shape) {
double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
return createIndexableFields(shape, distErr);
}
/**
* Turns {@link SpatialPrefixTree#getTreeCellIterator(Shape, int)} into a {@link
* org.apache.lucene.analysis.TokenStream}.
*/
public Field[] createIndexableFields(Shape shape, double distErr) {
int detailLevel = grid.getLevelForDistance(distErr);
return createIndexableFields(shape, detailLevel);
}
public Field[] createIndexableFields(Shape shape, int detailLevel) {
// TODO re-use TokenStream LUCENE-5776: Subclass Field, put cell iterator there, override
// tokenStream()
Iterator cells = createCellIteratorToIndex(shape, detailLevel, null);
CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator();
cellToBytesRefIterator.reset(cells);
BytesRefIteratorTokenStream tokenStream = new BytesRefIteratorTokenStream();
tokenStream.setBytesRefIterator(cellToBytesRefIterator);
Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE);
return new Field[] {field};
}
/** Tokenstream for indexing cells of a shape */
public class ShapeTokenStream extends BytesRefIteratorTokenStream {
public void setShape(Shape shape) {
double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
int detailLevel = grid.getLevelForDistance(distErr);
Iterator cells = createCellIteratorToIndex(shape, detailLevel, null);
CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator();
cellToBytesRefIterator.reset(cells);
setBytesRefIterator(cellToBytesRefIterator);
}
}
public ShapeTokenStream tokenStream() {
return new ShapeTokenStream();
}
protected CellToBytesRefIterator newCellToBytesRefIterator() {
// subclasses could return one that never emits leaves, or does both, or who knows.
return new CellToBytesRefIterator();
}
protected Iterator createCellIteratorToIndex(
Shape shape, int detailLevel, Iterator reuse) {
if (pointsOnly && !isPointShape(shape)) {
throw new IllegalArgumentException(
"pointsOnly is true yet a " + shape.getClass() + " is given for indexing");
}
return grid.getTreeCellIterator(shape, detailLevel); // TODO should take a re-use iterator
}
/* Indexed, tokenized, not stored. */
public static final FieldType FIELD_TYPE = new FieldType();
static {
FIELD_TYPE.setTokenized(true);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.freeze();
}
@Override
public DoubleValuesSource makeDistanceValueSource(Point queryPoint, double multiplier) {
PointPrefixTreeFieldCacheProvider p = provider.get(getFieldName());
if (p == null) {
synchronized (this) { // double checked locking idiom is okay since provider is threadsafe
p = provider.get(getFieldName());
if (p == null) {
p =
new PointPrefixTreeFieldCacheProvider(
grid, getFieldName(), defaultFieldValuesArrayLen);
provider.put(getFieldName(), p);
}
}
}
return new ShapeFieldCacheDistanceValueSource(ctx, p, queryPoint, multiplier);
}
/**
* Computes spatial facets in two dimensions as a grid of numbers. The data is often visualized as
* a so-called "heatmap".
*
* @see HeatmapFacetCounter#calcFacets(PrefixTreeStrategy, IndexReaderContext, Bits, Shape, int,
* int)
*/
public HeatmapFacetCounter.Heatmap calcFacets(
IndexReaderContext context,
Bits topAcceptDocs,
Shape inputShape,
final int facetLevel,
int maxCells)
throws IOException {
return HeatmapFacetCounter.calcFacets(
this, context, topAcceptDocs, inputShape, facetLevel, maxCells);
}
/**
* Returns true if the {@code shape} is a {@link Point}. For custom spatial contexts, it may make
* sense to have certain other shapes return true.
*
* @lucene.experimental
*/
protected boolean isPointShape(Shape shape) {
return shape instanceof Point;
}
}
| | | |