All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.spatial.prefix.PrefixTreeStrategy Maven / Gradle / Ivy

There is a newer version: 10.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.spatial.prefix;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource;
import org.apache.lucene.util.Bits;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Shape;

/**
 * An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two
 * subclasses are {@link RecursivePrefixTreeStrategy} and {@link
 * TermQueryPrefixTreeStrategy}.  This strategy is most effective as a fast
 * approximate spatial search filter.
 * 

* Characteristics: *
*

    *
  • Can index any shape; however only {@link RecursivePrefixTreeStrategy} * can effectively search non-point shapes.
  • *
  • Can index a variable number of shapes per field value. This strategy * can do it via multiple calls to {@link #createIndexableFields(org.locationtech.spatial4j.shape.Shape)} * for a document or by giving it some sort of Shape aggregate (e.g. JTS * WKT MultiPoint). The shape's boundary is approximated to a grid precision. *
  • *
  • Can query with any shape. The shape's boundary is approximated to a grid * precision.
  • *
  • Only {@link org.apache.lucene.spatial.query.SpatialOperation#Intersects} * is supported. If only points are indexed then this is effectively equivalent * to IsWithin.
  • *
  • The strategy supports {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point,double)} * even for multi-valued data, so long as the indexed data is all points; the * behavior is undefined otherwise. However, it will likely be removed in * the future in lieu of using another strategy with a more scalable * implementation. Use of this call is the only * circumstance in which a cache is used. The cache is simple but as such * it doesn't scale to large numbers of points nor is it real-time-search * friendly.
  • *
*

* Implementation: *

* The {@link SpatialPrefixTree} does most of the work, for example returning * a list of terms representing grids of various sizes for a supplied shape. * An important * configuration item is {@link #setDistErrPct(double)} which balances * shape precision against scalability. See those javadocs. * * @lucene.experimental */ public abstract class PrefixTreeStrategy extends SpatialStrategy { protected final SpatialPrefixTree grid; private final Map provider = new ConcurrentHashMap<>(); protected int defaultFieldValuesArrayLen = 2; protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ] protected boolean pointsOnly = false;//if true, there are no leaves public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) { super(grid.getSpatialContext(), fieldName); this.grid = grid; } public SpatialPrefixTree getGrid() { return grid; } /** * A memory hint used by {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)} * for how big the initial size of each Document's array should be. The * default is 2. Set this to slightly more than the default expected number * of points per document. */ public void setDefaultFieldValuesArrayLen(int defaultFieldValuesArrayLen) { this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen; } public double getDistErrPct() { return distErrPct; } /** * The default measure of shape precision affecting shapes at index and query * times. Points don't use this as they are always indexed at the configured * maximum precision ({@link org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree#getMaxLevels()}); * this applies to all other shapes. Specific shapes at index and query time * can use something different than this default value. If you don't set a * default then the default is {@link SpatialArgs#DEFAULT_DISTERRPCT} -- * 2.5%. * * @see org.apache.lucene.spatial.query.SpatialArgs#getDistErrPct() */ public void setDistErrPct(double distErrPct) { this.distErrPct = distErrPct; } public boolean isPointsOnly() { return pointsOnly; } /** True if only indexed points shall be supported. There are no "leafs" in such a case, except those * at maximum precision. */ public void setPointsOnly(boolean pointsOnly) { this.pointsOnly = pointsOnly; } @Override public Field[] createIndexableFields(Shape shape) { double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx); return createIndexableFields(shape, distErr); } /** * Turns {@link SpatialPrefixTree#getTreeCellIterator(Shape, int)} into a * {@link org.apache.lucene.analysis.TokenStream}. */ public Field[] createIndexableFields(Shape shape, double distErr) { int detailLevel = grid.getLevelForDistance(distErr); return createIndexableFields(shape, detailLevel); } public Field[] createIndexableFields(Shape shape, int detailLevel) { //TODO re-use TokenStream LUCENE-5776: Subclass Field, put cell iterator there, override tokenStream() Iterator cells = createCellIteratorToIndex(shape, detailLevel, null); CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator(); cellToBytesRefIterator.reset(cells); BytesRefIteratorTokenStream tokenStream = new BytesRefIteratorTokenStream(); tokenStream.setBytesRefIterator(cellToBytesRefIterator); Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE); return new Field[]{field}; } public class ShapeTokenStream extends BytesRefIteratorTokenStream { public void setShape(Shape shape) { double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx); int detailLevel = grid.getLevelForDistance(distErr); Iterator cells = createCellIteratorToIndex(shape, detailLevel, null); CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator(); cellToBytesRefIterator.reset(cells); setBytesRefIterator(cellToBytesRefIterator); } } public ShapeTokenStream tokenStream() { return new ShapeTokenStream(); } protected CellToBytesRefIterator newCellToBytesRefIterator() { //subclasses could return one that never emits leaves, or does both, or who knows. return new CellToBytesRefIterator(); } protected Iterator createCellIteratorToIndex(Shape shape, int detailLevel, Iterator reuse) { if (pointsOnly && !isPointShape(shape)) { throw new IllegalArgumentException("pointsOnly is true yet a " + shape.getClass() + " is given for indexing"); } return grid.getTreeCellIterator(shape, detailLevel);//TODO should take a re-use iterator } /* Indexed, tokenized, not stored. */ public static final FieldType FIELD_TYPE = new FieldType(); static { FIELD_TYPE.setTokenized(true); FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); FIELD_TYPE.freeze(); } @Override public DoubleValuesSource makeDistanceValueSource(Point queryPoint, double multiplier) { PointPrefixTreeFieldCacheProvider p = provider.get( getFieldName() ); if( p == null ) { synchronized (this) {//double checked locking idiom is okay since provider is threadsafe p = provider.get( getFieldName() ); if (p == null) { p = new PointPrefixTreeFieldCacheProvider(grid, getFieldName(), defaultFieldValuesArrayLen); provider.put(getFieldName(),p); } } } return new ShapeFieldCacheDistanceValueSource(ctx, p, queryPoint, multiplier); } /** * Computes spatial facets in two dimensions as a grid of numbers. The data is often visualized as a so-called * "heatmap". * * @see HeatmapFacetCounter#calcFacets(PrefixTreeStrategy, IndexReaderContext, Bits, Shape, int, int) */ public HeatmapFacetCounter.Heatmap calcFacets(IndexReaderContext context, Bits topAcceptDocs, Shape inputShape, final int facetLevel, int maxCells) throws IOException { return HeatmapFacetCounter.calcFacets(this, context, topAcceptDocs, inputShape, facetLevel, maxCells); } /** * Returns true if the {@code shape} is a {@link Point}. For custom spatial contexts, it may make sense to * have certain other shapes return true. * @lucene.experimental */ protected boolean isPointShape(Shape shape) { return shape instanceof Point; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy