All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.PointRangeQuery Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.function.Predicate;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IntsRef;

/**
 * Abstract class for range queries against single or multidimensional points such as {@link
 * IntPoint}.
 *
 * 

This is for subclasses and works on the underlying binary encoding: to create range queries * for lucene's standard {@code Point} types, refer to factory methods on those classes, e.g. {@link * IntPoint#newRangeQuery IntPoint.newRangeQuery()} for fields indexed with {@link IntPoint}. * *

For a single-dimensional field this query is a simple range query; in a multi-dimensional * field it's a box shape. * * @see PointValues * @lucene.experimental */ public abstract class PointRangeQuery extends Query { final String field; final int numDims; final int bytesPerDim; final byte[] lowerPoint; final byte[] upperPoint; /** * Expert: create a multidimensional range query for point values. * * @param field field name. must not be {@code null}. * @param lowerPoint lower portion of the range (inclusive). * @param upperPoint upper portion of the range (inclusive). * @param numDims number of dimensions. * @throws IllegalArgumentException if {@code field} is null, or if {@code lowerValue.length != * upperValue.length} */ protected PointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims) { checkArgs(field, lowerPoint, upperPoint); this.field = field; if (numDims <= 0) { throw new IllegalArgumentException("numDims must be positive, got " + numDims); } if (lowerPoint.length == 0) { throw new IllegalArgumentException("lowerPoint has length of zero"); } if (lowerPoint.length % numDims != 0) { throw new IllegalArgumentException("lowerPoint is not a fixed multiple of numDims"); } if (lowerPoint.length != upperPoint.length) { throw new IllegalArgumentException( "lowerPoint has length=" + lowerPoint.length + " but upperPoint has different length=" + upperPoint.length); } this.numDims = numDims; this.bytesPerDim = lowerPoint.length / numDims; this.lowerPoint = lowerPoint; this.upperPoint = upperPoint; } /** * Check preconditions for all factory methods * * @throws IllegalArgumentException if {@code field}, {@code lowerPoint} or {@code upperPoint} are * null. */ public static void checkArgs(String field, Object lowerPoint, Object upperPoint) { if (field == null) { throw new IllegalArgumentException("field must not be null"); } if (lowerPoint == null) { throw new IllegalArgumentException("lowerPoint must not be null"); } if (upperPoint == null) { throw new IllegalArgumentException("upperPoint must not be null"); } } @Override public void visit(QueryVisitor visitor) { if (visitor.acceptField(field)) { visitor.visitLeaf(this); } } @Override public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". // This is an inverted structure and should be used in the first pass: return new ConstantScoreWeight(this, boost) { private final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(bytesPerDim); private boolean matches(byte[] packedValue) { int offset = 0; for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) { if (comparator.compare(packedValue, offset, lowerPoint, offset) < 0) { // Doc's value is too low, in this dimension return false; } if (comparator.compare(packedValue, offset, upperPoint, offset) > 0) { // Doc's value is too high, in this dimension return false; } } return true; } private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { boolean crosses = false; int offset = 0; for (int dim = 0; dim < numDims; dim++, offset += bytesPerDim) { if (comparator.compare(minPackedValue, offset, upperPoint, offset) > 0 || comparator.compare(maxPackedValue, offset, lowerPoint, offset) < 0) { return Relation.CELL_OUTSIDE_QUERY; } crosses |= comparator.compare(minPackedValue, offset, lowerPoint, offset) < 0 || comparator.compare(maxPackedValue, offset, upperPoint, offset) > 0; } if (crosses) { return Relation.CELL_CROSSES_QUERY; } else { return Relation.CELL_INSIDE_QUERY; } } private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) { return new IntersectVisitor() { DocIdSetBuilder.BulkAdder adder; @Override public void grow(int count) { adder = result.grow(count); } @Override public void visit(int docID) { adder.add(docID); } @Override public void visit(DocIdSetIterator iterator) throws IOException { adder.add(iterator); } @Override public void visit(IntsRef ref) { adder.add(ref); } @Override public void visit(int docID, byte[] packedValue) { if (matches(packedValue)) { visit(docID); } } @Override public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { if (matches(packedValue)) { adder.add(iterator); } } @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { return relate(minPackedValue, maxPackedValue); } }; } /** Create a visitor that sets documents that do NOT match the range. */ private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, long[] cost) { return new IntersectVisitor() { @Override public void visit(int docID) { result.set(docID); cost[0]++; } @Override public void visit(DocIdSetIterator iterator) throws IOException { result.or(iterator); cost[0] += iterator.cost(); } @Override public void visit(IntsRef ref) { for (int i = ref.offset; i < ref.offset + ref.length; i++) { result.set(ref.ints[i]); } cost[0] += ref.length; } @Override public void visit(int docID, byte[] packedValue) { if (matches(packedValue) == false) { visit(docID); } } @Override public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { if (matches(packedValue) == false) { visit(iterator); } } @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { Relation relation = relate(minPackedValue, maxPackedValue); switch (relation) { case CELL_INSIDE_QUERY: // all points match, skip this subtree return Relation.CELL_OUTSIDE_QUERY; case CELL_OUTSIDE_QUERY: // none of the points match, clear all documents return Relation.CELL_INSIDE_QUERY; case CELL_CROSSES_QUERY: default: return relation; } } }; } private boolean checkValidPointValues(PointValues values) throws IOException { if (values == null) { // No docs in this segment/field indexed any points return false; } if (values.getNumIndexDimensions() != numDims) { throw new IllegalArgumentException( "field=\"" + field + "\" was indexed with numIndexDimensions=" + values.getNumIndexDimensions() + " but this query has numDims=" + numDims); } if (bytesPerDim != values.getBytesPerDimension()) { throw new IllegalArgumentException( "field=\"" + field + "\" was indexed with bytesPerDim=" + values.getBytesPerDimension() + " but this query has bytesPerDim=" + bytesPerDim); } return true; } @Override public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); if (checkValidPointValues(values) == false) { return null; } if (values.getDocCount() == 0) { return null; } else { final byte[] fieldPackedLower = values.getMinPackedValue(); final byte[] fieldPackedUpper = values.getMaxPackedValue(); for (int i = 0; i < numDims; ++i) { int offset = i * bytesPerDim; if (comparator.compare(lowerPoint, offset, fieldPackedUpper, offset) > 0 || comparator.compare(upperPoint, offset, fieldPackedLower, offset) < 0) { // If this query is a required clause of a boolean query, then returning null here // will help make sure that we don't call ScorerSupplier#get on other required clauses // of the same boolean query, which is an expensive operation for some queries (e.g. // multi-term queries). return null; } } } boolean allDocsMatch; if (values.getDocCount() == reader.maxDoc()) { final byte[] fieldPackedLower = values.getMinPackedValue(); final byte[] fieldPackedUpper = values.getMaxPackedValue(); allDocsMatch = true; for (int i = 0; i < numDims; ++i) { int offset = i * bytesPerDim; if (comparator.compare(lowerPoint, offset, fieldPackedLower, offset) > 0 || comparator.compare(upperPoint, offset, fieldPackedUpper, offset) < 0) { allDocsMatch = false; break; } } } else { allDocsMatch = false; } if (allDocsMatch) { // all docs have a value and all points are within bounds, so everything matches return ConstantScoreScorerSupplier.matchAll(score(), scoreMode, reader.maxDoc()); } else { return new ConstantScoreScorerSupplier(score(), scoreMode, reader.maxDoc()) { final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values); final IntersectVisitor visitor = getIntersectVisitor(result); long cost = -1; @Override public DocIdSetIterator iterator(long leadCost) throws IOException { if (values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size() && cost() > reader.maxDoc() / 2) { // If all docs have exactly one value and the cost is greater // than half the leaf size then maybe we can make things faster // by computing the set of documents that do NOT match the range final FixedBitSet result = new FixedBitSet(reader.maxDoc()); long[] cost = new long[1]; values.intersect(getInverseIntersectVisitor(result, cost)); // Flip the bit set and cost result.flip(0, reader.maxDoc()); cost[0] = Math.max(0, reader.maxDoc() - cost[0]); return new BitSetIterator(result, cost[0]); } values.intersect(visitor); return result.build().iterator(); } @Override public long cost() { if (cost == -1) { // Computing the cost may be expensive, so only do it if necessary cost = values.estimateDocCount(visitor); assert cost >= 0; } return cost; } }; } } @Override public int count(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); PointValues values = reader.getPointValues(field); if (checkValidPointValues(values) == false) { return 0; } if (reader.hasDeletions() == false) { if (relate(values.getMinPackedValue(), values.getMaxPackedValue()) == Relation.CELL_INSIDE_QUERY) { return values.getDocCount(); } // only 1D: we have the guarantee that it will actually run fast since there are at most 2 // crossing leaves. // docCount == size : counting according number of points in leaf node, so must be // single-valued. if (numDims == 1 && values.getDocCount() == values.size()) { return (int) pointCount(values.getPointTree(), this::relate, this::matches); } } return super.count(context); } /** * Finds the number of points matching the provided range conditions. Using this method is * faster than calling {@link PointValues#intersect(IntersectVisitor)} to get the count of * intersecting points. This method does not enforce live documents, therefore it should only * be used when there are no deleted documents. * * @param pointTree start node of the count operation * @param nodeComparator comparator to be used for checking whether the internal node is * inside the range * @param leafComparator comparator to be used for checking whether the leaf node is inside * the range * @return count of points that match the range */ private long pointCount( PointValues.PointTree pointTree, BiFunction nodeComparator, Predicate leafComparator) throws IOException { final long[] matchingNodeCount = {0}; // create a custom IntersectVisitor that records the number of leafNodes that matched final IntersectVisitor visitor = new IntersectVisitor() { @Override public void visit(int docID) { // this branch should be unreachable throw new UnsupportedOperationException( "This IntersectVisitor does not perform any actions on a " + "docID=" + docID + " node being visited"); } @Override public void visit(int docID, byte[] packedValue) { if (leafComparator.test(packedValue)) { matchingNodeCount[0]++; } } @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { return nodeComparator.apply(minPackedValue, maxPackedValue); } }; pointCount(visitor, pointTree, matchingNodeCount); return matchingNodeCount[0]; } private void pointCount( IntersectVisitor visitor, PointValues.PointTree pointTree, long[] matchingNodeCount) throws IOException { Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); switch (r) { case CELL_OUTSIDE_QUERY: // This cell is fully outside the query shape: return 0 as the count of its nodes return; case CELL_INSIDE_QUERY: // This cell is fully inside the query shape: return the size of the entire node as the // count matchingNodeCount[0] += pointTree.size(); return; case CELL_CROSSES_QUERY: /* The cell crosses the shape boundary, or the cell fully contains the query, so we fall through and do full counting. */ if (pointTree.moveToChild()) { do { pointCount(visitor, pointTree, matchingNodeCount); } while (pointTree.moveToSibling()); pointTree.moveToParent(); } else { // we have reached a leaf node here. pointTree.visitDocValues(visitor); // leaf node count is saved in the matchingNodeCount array by the visitor } return; default: throw new IllegalArgumentException("Unreachable code"); } } @Override public boolean isCacheable(LeafReaderContext ctx) { return true; } }; } public String getField() { return field; } public int getNumDims() { return numDims; } public int getBytesPerDim() { return bytesPerDim; } public byte[] getLowerPoint() { return lowerPoint.clone(); } public byte[] getUpperPoint() { return upperPoint.clone(); } @Override public final int hashCode() { int hash = classHash(); hash = 31 * hash + field.hashCode(); hash = 31 * hash + Arrays.hashCode(lowerPoint); hash = 31 * hash + Arrays.hashCode(upperPoint); hash = 31 * hash + numDims; hash = 31 * hash + Objects.hashCode(bytesPerDim); return hash; } @Override public final boolean equals(Object o) { return sameClassAs(o) && equalsTo(getClass().cast(o)); } private boolean equalsTo(PointRangeQuery other) { return Objects.equals(field, other.field) && numDims == other.numDims && bytesPerDim == other.bytesPerDim && Arrays.equals(lowerPoint, other.lowerPoint) && Arrays.equals(upperPoint, other.upperPoint); } @Override public final String toString(String field) { final StringBuilder sb = new StringBuilder(); if (this.field.equals(field) == false) { sb.append(this.field); sb.append(':'); } // print ourselves as "range per dimension" for (int i = 0; i < numDims; i++) { if (i > 0) { sb.append(','); } int startOffset = bytesPerDim * i; sb.append('['); sb.append( toString( i, ArrayUtil.copyOfSubArray(lowerPoint, startOffset, startOffset + bytesPerDim))); sb.append(" TO "); sb.append( toString( i, ArrayUtil.copyOfSubArray(upperPoint, startOffset, startOffset + bytesPerDim))); sb.append(']'); } return sb.toString(); } /** * Returns a string of a single value in a human-readable format for debugging. This is used by * {@link #toString()}. * * @param dimension dimension of the particular value * @param value single value, never null * @return human readable value for debugging */ protected abstract String toString(int dimension, byte[] value); }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy