org.apache.lucene.spatial.prefix.ContainsPrefixTreeQuery Maven / Gradle / Ivy
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.lucene.spatial.prefix;
import java.util.Arrays;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.SentinelIntSet;
import org.locationtech.spatial4j.shape.Shape;
import org.locationtech.spatial4j.shape.SpatialRelation;
* Finds docs where its indexed shape {@link
* org.apache.lucene.spatial.query.SpatialOperation#Contains CONTAINS} the query shape. For use on
* {@link RecursivePrefixTreeStrategy}.
* @lucene.experimental
public class ContainsPrefixTreeQuery extends AbstractPrefixTreeQuery {
* If the spatial data for a document is comprised of multiple overlapping or adjacent parts, it
* might fail to match a query shape when doing the CONTAINS predicate when the sum of those
* shapes contain the query shape but none do individually. Set this to false to increase
* performance if you don't care about that circumstance (such as if your indexed data doesn't
* even have such conditions). See LUCENE-5062.
protected final boolean multiOverlappingIndexedShapes;
public ContainsPrefixTreeQuery(
Shape queryShape,
String fieldName,
SpatialPrefixTree grid,
int detailLevel,
boolean multiOverlappingIndexedShapes) {
super(queryShape, fieldName, grid, detailLevel);
this.multiOverlappingIndexedShapes = multiOverlappingIndexedShapes;
public boolean equals(Object o) {
if (!super.equals(o)) return false;
return multiOverlappingIndexedShapes
== ((ContainsPrefixTreeQuery) o).multiOverlappingIndexedShapes;
public int hashCode() {
return super.hashCode() + (multiOverlappingIndexedShapes ? 1 : 0);
public String toString(String field) {
return getClass().getSimpleName()
+ "("
+ ("fieldName=" + fieldName + ",")
+ ("queryShape=" + queryShape + ",")
+ ("detailLevel=" + detailLevel + ",")
+ ("multiOverlappingIndexedShapes=" + multiOverlappingIndexedShapes)
+ ")";
protected DocIdSet getDocIdSet(LeafReaderContext context) throws IOException {
return new ContainsVisitor(context).visit(grid.getWorldCell(), null);
private class ContainsVisitor extends BaseTermsEnumTraverser {
public ContainsVisitor(LeafReaderContext context) throws IOException {
if (termsEnum != null) {
nextTerm(); // advance to first
BytesRef seekTerm = new BytesRef(); // temp; see seek()
BytesRef thisTerm; // current term in termsEnum
Cell indexedCell; // the cell wrapper around thisTerm
/** This is the primary algorithm; recursive. Returns null if finds none. */
private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException {
if (thisTerm == null) // signals all done
return null;
// Get the AND of all child results (into combinedSubResults)
SmallDocSet combinedSubResults = null;
// Optimization: use null subCellsFilter when we know cell is within the query shape.
Shape subCellsFilter = queryShape;
if (cell.getLevel() != 0
&& ((cell.getShapeRel() == null || cell.getShapeRel() == SpatialRelation.WITHIN))) {
subCellsFilter = null;
assert cell.getShape().relate(queryShape) == SpatialRelation.WITHIN;
CellIterator subCells = cell.getNextLevelCells(subCellsFilter);
while (subCells.hasNext()) {
Cell subCell =;
if (!seek(subCell)) {
combinedSubResults = null;
} else if (subCell.getLevel() == detailLevel) {
combinedSubResults = getDocs(subCell, acceptContains);
} else if (!multiOverlappingIndexedShapes
&& subCell.getShapeRel() == SpatialRelation.WITHIN) {
combinedSubResults = getLeafDocs(subCell, acceptContains);
} else {
// OR the leaf docs with all child results
SmallDocSet leafDocs = getLeafDocs(subCell, acceptContains);
SmallDocSet subDocs = visit(subCell, acceptContains); // recursion
combinedSubResults = union(leafDocs, subDocs);
if (combinedSubResults == null) break;
acceptContains = combinedSubResults; // has the 'AND' effect on next iteration
return combinedSubResults;
private boolean seek(Cell cell) throws IOException {
if (thisTerm == null) return false;
final int compare = indexedCell.compareToNoLeaf(cell);
if (compare > 0) {
return false; // leap-frog effect
} else if (compare == 0) {
return true; // already there!
} else { // compare > 0
// seek!
seekTerm = cell.getTokenBytesNoLeaf(seekTerm);
final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(seekTerm);
if (seekStatus == TermsEnum.SeekStatus.END) {
thisTerm = null; // all done
return false;
thisTerm = termsEnum.term();
indexedCell = grid.readCell(thisTerm, indexedCell);
if (seekStatus == TermsEnum.SeekStatus.FOUND) {
return true;
return indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0;
/** Get prefix & leaf docs at this cell. */
private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException {
assert indexedCell.compareToNoLeaf(cell) == 0;
// called when we've reached detailLevel.
if (indexedCell.isLeaf()) { // only a leaf
SmallDocSet result = collectDocs(acceptContains);
return result;
} else {
SmallDocSet docsAtPrefix = collectDocs(acceptContains);
if (!nextTerm()) {
return docsAtPrefix;
// collect leaf too
if (indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0) {
SmallDocSet docsAtLeaf = collectDocs(acceptContains);
return union(docsAtPrefix, docsAtLeaf);
} else {
return docsAtPrefix;
/** Gets docs on the leaf of the given cell, _if_ there is a leaf cell, otherwise null. */
private SmallDocSet getLeafDocs(Cell cell, Bits acceptContains) throws IOException {
assert indexedCell.compareToNoLeaf(cell) == 0;
// Advance past prefix if we're at a prefix; return null if no leaf
if (!indexedCell.isLeaf()) {
if (!nextTerm() || !indexedCell.isLeaf() || indexedCell.getLevel() != cell.getLevel()) {
return null;
SmallDocSet result = collectDocs(acceptContains);
return result;
private boolean nextTerm() throws IOException {
if ((thisTerm = == null) return false;
indexedCell = grid.readCell(thisTerm, indexedCell);
return true;
private SmallDocSet union(SmallDocSet aSet, SmallDocSet bSet) {
if (bSet != null) {
if (aSet == null) return bSet;
return aSet.union(bSet); // union is 'or'
return aSet;
private SmallDocSet collectDocs(Bits acceptContains) throws IOException {
SmallDocSet set = null;
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
int docid;
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (acceptContains != null && acceptContains.get(docid) == false) {
if (set == null) {
int size = termsEnum.docFreq();
if (size <= 0) size = 16;
set = new SmallDocSet(size);
return set;
} // class ContainsVisitor
* A hash based mutable set of docIds. If this were Solr code then we might use a combination of
* HashDocSet and SortedIntDocSet instead.
// TODO use DocIdSetBuilder?
private static class SmallDocSet extends DocIdSet implements Bits {
private final SentinelIntSet intSet;
private int maxInt = 0;
public SmallDocSet(int size) {
intSet = new SentinelIntSet(size, -1);
public boolean get(int index) {
return intSet.exists(index);
public void set(int index) {
if (index > maxInt) maxInt = index;
/** Largest docid. */
public int length() {
return maxInt;
/** Number of docids. */
public int size() {
return intSet.size();
/** NOTE: modifies and returns either "this" or "other" */
public SmallDocSet union(SmallDocSet other) {
SmallDocSet bigger;
SmallDocSet smaller;
if (other.intSet.size() > this.intSet.size()) {
bigger = other;
smaller = this;
} else {
bigger = this;
smaller = other;
// modify bigger
for (int v : smaller.intSet.keys) {
if (v == smaller.intSet.emptyVal) continue;
return bigger;
public Bits bits() throws IOException {
// if the # of docids is super small, return null since iteration is going
// to be faster
return size() > 4 ? this : null;
public DocIdSetIterator iterator() throws IOException {
if (size() == 0) return null;
// copy the unsorted values to a new array then sort them
int d = 0;
final int[] docs = new int[intSet.size()];
for (int v : intSet.keys) {
if (v == intSet.emptyVal) continue;
docs[d++] = v;
assert d == intSet.size();
final int size = d;
// sort them
Arrays.sort(docs, 0, size);
return new DocIdSetIterator() {
int idx = -1;
public int docID() {
if (idx < 0) {
return -1;
} else if (idx < size) {
return docs[idx];
} else {
return NO_MORE_DOCS;
public int nextDoc() throws IOException {
if (++idx < size) return docs[idx];
return NO_MORE_DOCS;
public int advance(int target) throws IOException {
// for this small set this is likely faster vs. a binary search
// into the sorted array
return slowAdvance(target);
public long cost() {
return size;
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_REF + Integer.BYTES)
+ intSet.ramBytesUsed();
} // class SmallDocSet
© 2015 - 2025 Weber Informatics LLC | Privacy Policy