org.apache.lucene.spatial.prefix.PrefixTreeStrategy Maven / Gradle / Ivy
Show all versions of lucene-spatial-extras Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.spatial.prefix;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource;
import org.apache.lucene.util.Bits;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Shape;
/**
* An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two
* subclasses are {@link RecursivePrefixTreeStrategy} and {@link
* TermQueryPrefixTreeStrategy}. This strategy is most effective as a fast
* approximate spatial search filter.
*
* Characteristics:
*
*
* - Can index any shape; however only {@link RecursivePrefixTreeStrategy}
* can effectively search non-point shapes.
* - Can index a variable number of shapes per field value. This strategy
* can do it via multiple calls to {@link #createIndexableFields(org.locationtech.spatial4j.shape.Shape)}
* for a document or by giving it some sort of Shape aggregate (e.g. JTS
* WKT MultiPoint). The shape's boundary is approximated to a grid precision.
*
* - Can query with any shape. The shape's boundary is approximated to a grid
* precision.
* - Only {@link org.apache.lucene.spatial.query.SpatialOperation#Intersects}
* is supported. If only points are indexed then this is effectively equivalent
* to IsWithin.
* - The strategy supports {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point,double)}
* even for multi-valued data, so long as the indexed data is all points; the
* behavior is undefined otherwise. However, it will likely be removed in
* the future in lieu of using another strategy with a more scalable
* implementation. Use of this call is the only
* circumstance in which a cache is used. The cache is simple but as such
* it doesn't scale to large numbers of points nor is it real-time-search
* friendly.
*
*
* Implementation:
*
* The {@link SpatialPrefixTree} does most of the work, for example returning
* a list of terms representing grids of various sizes for a supplied shape.
* An important
* configuration item is {@link #setDistErrPct(double)} which balances
* shape precision against scalability. See those javadocs.
*
* @lucene.experimental
*/
public abstract class PrefixTreeStrategy extends SpatialStrategy {
protected final SpatialPrefixTree grid;
private final Map provider = new ConcurrentHashMap<>();
protected int defaultFieldValuesArrayLen = 2;
protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ]
protected boolean pointsOnly = false;//if true, there are no leaves
public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
super(grid.getSpatialContext(), fieldName);
this.grid = grid;
}
public SpatialPrefixTree getGrid() {
return grid;
}
/**
* A memory hint used by {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)}
* for how big the initial size of each Document's array should be. The
* default is 2. Set this to slightly more than the default expected number
* of points per document.
*/
public void setDefaultFieldValuesArrayLen(int defaultFieldValuesArrayLen) {
this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen;
}
public double getDistErrPct() {
return distErrPct;
}
/**
* The default measure of shape precision affecting shapes at index and query
* times. Points don't use this as they are always indexed at the configured
* maximum precision ({@link org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree#getMaxLevels()});
* this applies to all other shapes. Specific shapes at index and query time
* can use something different than this default value. If you don't set a
* default then the default is {@link SpatialArgs#DEFAULT_DISTERRPCT} --
* 2.5%.
*
* @see org.apache.lucene.spatial.query.SpatialArgs#getDistErrPct()
*/
public void setDistErrPct(double distErrPct) {
this.distErrPct = distErrPct;
}
public boolean isPointsOnly() {
return pointsOnly;
}
/** True if only indexed points shall be supported. There are no "leafs" in such a case, except those
* at maximum precision. */
public void setPointsOnly(boolean pointsOnly) {
this.pointsOnly = pointsOnly;
}
@Override
public Field[] createIndexableFields(Shape shape) {
double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
return createIndexableFields(shape, distErr);
}
/**
* Turns {@link SpatialPrefixTree#getTreeCellIterator(Shape, int)} into a
* {@link org.apache.lucene.analysis.TokenStream}.
*/
public Field[] createIndexableFields(Shape shape, double distErr) {
int detailLevel = grid.getLevelForDistance(distErr);
return createIndexableFields(shape, detailLevel);
}
public Field[] createIndexableFields(Shape shape, int detailLevel) {
//TODO re-use TokenStream LUCENE-5776: Subclass Field, put cell iterator there, override tokenStream()
Iterator cells = createCellIteratorToIndex(shape, detailLevel, null);
CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator();
cellToBytesRefIterator.reset(cells);
BytesRefIteratorTokenStream tokenStream = new BytesRefIteratorTokenStream();
tokenStream.setBytesRefIterator(cellToBytesRefIterator);
Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE);
return new Field[]{field};
}
public class ShapeTokenStream extends BytesRefIteratorTokenStream {
public void setShape(Shape shape) {
double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
int detailLevel = grid.getLevelForDistance(distErr);
Iterator cells = createCellIteratorToIndex(shape, detailLevel, null);
CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator();
cellToBytesRefIterator.reset(cells);
setBytesRefIterator(cellToBytesRefIterator);
}
}
public ShapeTokenStream tokenStream() {
return new ShapeTokenStream();
}
protected CellToBytesRefIterator newCellToBytesRefIterator() {
//subclasses could return one that never emits leaves, or does both, or who knows.
return new CellToBytesRefIterator();
}
protected Iterator createCellIteratorToIndex(Shape shape, int detailLevel, Iterator reuse) {
if (pointsOnly && !isPointShape(shape)) {
throw new IllegalArgumentException("pointsOnly is true yet a " + shape.getClass() + " is given for indexing");
}
return grid.getTreeCellIterator(shape, detailLevel);//TODO should take a re-use iterator
}
/* Indexed, tokenized, not stored. */
public static final FieldType FIELD_TYPE = new FieldType();
static {
FIELD_TYPE.setTokenized(true);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.freeze();
}
@Override
public DoubleValuesSource makeDistanceValueSource(Point queryPoint, double multiplier) {
PointPrefixTreeFieldCacheProvider p = provider.get( getFieldName() );
if( p == null ) {
synchronized (this) {//double checked locking idiom is okay since provider is threadsafe
p = provider.get( getFieldName() );
if (p == null) {
p = new PointPrefixTreeFieldCacheProvider(grid, getFieldName(), defaultFieldValuesArrayLen);
provider.put(getFieldName(),p);
}
}
}
return new ShapeFieldCacheDistanceValueSource(ctx, p, queryPoint, multiplier);
}
/**
* Computes spatial facets in two dimensions as a grid of numbers. The data is often visualized as a so-called
* "heatmap".
*
* @see HeatmapFacetCounter#calcFacets(PrefixTreeStrategy, IndexReaderContext, Bits, Shape, int, int)
*/
public HeatmapFacetCounter.Heatmap calcFacets(IndexReaderContext context, Bits topAcceptDocs,
Shape inputShape, final int facetLevel, int maxCells) throws IOException {
return HeatmapFacetCounter.calcFacets(this, context, topAcceptDocs, inputShape, facetLevel, maxCells);
}
/**
* Returns true if the {@code shape} is a {@link Point}. For custom spatial contexts, it may make sense to
* have certain other shapes return true.
* @lucene.experimental
*/
protected boolean isPointShape(Shape shape) {
return shape instanceof Point;
}
}
| | | |