All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.FieldComparator Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;



import java.io.IOException;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;

/**
 * Expert: a FieldComparator compares hits so as to determine their
 * sort order when collecting the top results with {@link
 * TopFieldCollector}.  The concrete public FieldComparator
 * classes here correspond to the SortField types.
 *
 * 

The document IDs passed to these methods must only * move forwards, since they are using doc values iterators * to retrieve sort values.

* *

This API is designed to achieve high performance * sorting, by exposing a tight interaction with {@link * FieldValueHitQueue} as it visits hits. Whenever a hit is * competitive, it's enrolled into a virtual slot, which is * an int ranging from 0 to numHits-1. Segment transitions are * handled by creating a dedicated per-segment * {@link LeafFieldComparator} which also needs to interact * with the {@link FieldValueHitQueue} but can optimize based * on the segment to collect.

* *

The following functions need to be implemented

*
    *
  • {@link #compare} Compare a hit at 'slot a' * with hit 'slot b'. * *
  • {@link #setTopValue} This method is called by * {@link TopFieldCollector} to notify the * FieldComparator of the top most value, which is * used by future calls to * {@link LeafFieldComparator#compareTop}. * *
  • {@link #getLeafComparator(org.apache.lucene.index.LeafReaderContext)} Invoked * when the search is switching to the next segment. * You may need to update internal state of the * comparator, for example retrieving new values from * DocValues. * *
  • {@link #value} Return the sort value stored in * the specified slot. This is only called at the end * of the search, in order to populate {@link * FieldDoc#fields} when returning the top results. *
* * @see LeafFieldComparator * @lucene.experimental */ public abstract class FieldComparator { /** * Compare hit at slot1 with hit at slot2. * * @param slot1 first slot to compare * @param slot2 second slot to compare * @return any {@code N < 0} if slot2's value is sorted after * slot1, any {@code N > 0} if the slot2's value is sorted before * slot1 and {@code 0} if they are equal */ public abstract int compare(int slot1, int slot2); /** * Record the top value, for future calls to {@link * LeafFieldComparator#compareTop}. This is only called for searches that * use searchAfter (deep paging), and is called before any * calls to {@link #getLeafComparator(LeafReaderContext)}. */ public abstract void setTopValue(T value); /** * Return the actual value in the slot. * * @param slot the value * @return value in this slot */ public abstract T value(int slot); /** * Get a per-segment {@link LeafFieldComparator} to collect the given * {@link org.apache.lucene.index.LeafReaderContext}. All docIDs supplied to * this {@link LeafFieldComparator} are relative to the current reader (you * must add docBase if you need to map it to a top-level docID). * * @param context current reader context * @return the comparator to use for this segment * @throws IOException if there is a low-level IO error */ public abstract LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException; /** Returns a negative integer if first is less than second, * 0 if they are equal and a positive integer otherwise. Default * impl to assume the type implements Comparable and * invoke .compareTo; be sure to override this method if * your FieldComparator's type isn't a Comparable or * if your values may sometimes be null */ @SuppressWarnings("unchecked") public int compareValues(T first, T second) { if (first == null) { if (second == null) { return 0; } else { return -1; } } else if (second == null) { return 1; } else { return ((Comparable) first).compareTo(second); } } /** * Informs the comparator that sort is done on this single field. * This is useful to enable some optimizations for skipping non-competitive documents. */ public void setSingleSort() { } /** * Informs the comparator that the skipping of documents should be disabled. * This function is called in cases when the skipping functionality * should not be applied or not necessary. * * One example for numeric comparators is when we don't know if the same numeric data has * been indexed with docValues and points if these two fields have the same name. * As the skipping functionality relies on these fields to have the same data * and as we don't know if it is true, we have to disable it. * * Another example could be when search sort is a part of the index sort, * and can be already efficiently handled by TopFieldCollector, * and doing extra work for skipping in the comparator is redundant. */ public void disableSkipping() { } /** Sorts by descending relevance. NOTE: if you are * sorting only by descending relevance and then * secondarily by ascending docID, performance is faster * using {@link TopScoreDocCollector} directly (which {@link * IndexSearcher#search} uses when no {@link Sort} is * specified). */ public static final class RelevanceComparator extends FieldComparator implements LeafFieldComparator { private final float[] scores; private float bottom; private Scorable scorer; private float topValue; /** Creates a new comparator based on relevance for {@code numHits}. */ public RelevanceComparator(int numHits) { scores = new float[numHits]; } @Override public int compare(int slot1, int slot2) { return Float.compare(scores[slot2], scores[slot1]); } @Override public int compareBottom(int doc) throws IOException { float score = scorer.score(); assert !Float.isNaN(score); return Float.compare(score, bottom); } @Override public void copy(int slot, int doc) throws IOException { scores[slot] = scorer.score(); assert !Float.isNaN(scores[slot]); } @Override public LeafFieldComparator getLeafComparator(LeafReaderContext context) { return this; } @Override public void setBottom(final int bottom) { this.bottom = scores[bottom]; } @Override public void setTopValue(Float value) { topValue = value; } @Override public void setScorer(Scorable scorer) { // wrap with a ScoreCachingWrappingScorer so that successive calls to // score() will not incur score computation over and // over again. this.scorer = ScoreCachingWrappingScorer.wrap(scorer); } @Override public Float value(int slot) { return Float.valueOf(scores[slot]); } // Override because we sort reverse of natural Float order: @Override public int compareValues(Float first, Float second) { // Reversed intentionally because relevance by default // sorts descending: return second.compareTo(first); } @Override public int compareTop(int doc) throws IOException { float docValue = scorer.score(); assert !Float.isNaN(docValue); return Float.compare(docValue, topValue); } } /** Sorts by field's natural Term sort order, using * ordinals. This is functionally equivalent to {@link * org.apache.lucene.search.FieldComparator.TermValComparator}, but it first resolves the string * to their relative ordinal positions (using the index * returned by {@link org.apache.lucene.index.LeafReader#getSortedDocValues(String)}), and * does most comparisons using the ordinals. For medium * to large results, this comparator will be much faster * than {@link org.apache.lucene.search.FieldComparator.TermValComparator}. For very small * result sets it may be slower. */ public static class TermOrdValComparator extends FieldComparator implements LeafFieldComparator { /* Ords for each slot. @lucene.internal */ final int[] ords; /* Values for each slot. @lucene.internal */ final BytesRef[] values; private final BytesRefBuilder[] tempBRs; /* Which reader last copied a value into the slot. When we compare two slots, we just compare-by-ord if the readerGen is the same; else we must compare the values (slower). @lucene.internal */ final int[] readerGen; /* Gen of current reader we are on. @lucene.internal */ int currentReaderGen = -1; /* Current reader's doc ord/values. @lucene.internal */ SortedDocValues termsIndex; private final String field; /* Bottom slot, or -1 if queue isn't full yet @lucene.internal */ int bottomSlot = -1; /* Bottom ord (same as ords[bottomSlot] once bottomSlot is set). Cached for faster compares. @lucene.internal */ int bottomOrd; /* True if current bottom slot matches the current reader. @lucene.internal */ boolean bottomSameReader; /* Bottom value (same as values[bottomSlot] once bottomSlot is set). Cached for faster compares. @lucene.internal */ BytesRef bottomValue; /** Set by setTopValue. */ BytesRef topValue; boolean topSameReader; int topOrd; /** -1 if missing values are sorted first, 1 if they are * sorted last */ final int missingSortCmp; /** Which ordinal to use for a missing value. */ final int missingOrd; /** Creates this, sorting missing values first. */ public TermOrdValComparator(int numHits, String field) { this(numHits, field, false); } /** Creates this, with control over how missing values * are sorted. Pass sortMissingLast=true to put * missing values at the end. */ public TermOrdValComparator(int numHits, String field, boolean sortMissingLast) { ords = new int[numHits]; values = new BytesRef[numHits]; tempBRs = new BytesRefBuilder[numHits]; readerGen = new int[numHits]; this.field = field; if (sortMissingLast) { missingSortCmp = 1; missingOrd = Integer.MAX_VALUE; } else { missingSortCmp = -1; missingOrd = -1; } } private int getOrdForDoc(int doc) throws IOException { if (termsIndex.advanceExact(doc)) { return termsIndex.ordValue(); } else { return -1; } } @Override public int compare(int slot1, int slot2) { if (readerGen[slot1] == readerGen[slot2]) { return ords[slot1] - ords[slot2]; } final BytesRef val1 = values[slot1]; final BytesRef val2 = values[slot2]; if (val1 == null) { if (val2 == null) { return 0; } return missingSortCmp; } else if (val2 == null) { return -missingSortCmp; } return val1.compareTo(val2); } @Override public int compareBottom(int doc) throws IOException { assert bottomSlot != -1; int docOrd = getOrdForDoc(doc); if (docOrd == -1) { docOrd = missingOrd; } if (bottomSameReader) { // ord is precisely comparable, even in the equal case return bottomOrd - docOrd; } else if (bottomOrd >= docOrd) { // the equals case always means bottom is > doc // (because we set bottomOrd to the lower bound in // setBottom): return 1; } else { return -1; } } @Override public void copy(int slot, int doc) throws IOException { int ord = getOrdForDoc(doc); if (ord == -1) { ord = missingOrd; values[slot] = null; } else { assert ord >= 0; if (tempBRs[slot] == null) { tempBRs[slot] = new BytesRefBuilder(); } tempBRs[slot].copyBytes(termsIndex.lookupOrd(ord)); values[slot] = tempBRs[slot].get(); } ords[slot] = ord; readerGen[slot] = currentReaderGen; } /** Retrieves the SortedDocValues for the field in this segment */ protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) throws IOException { return DocValues.getSorted(context.reader(), field); } @Override public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { termsIndex = getSortedDocValues(context, field); currentReaderGen++; if (topValue != null) { // Recompute topOrd/SameReader int ord = termsIndex.lookupTerm(topValue); if (ord >= 0) { topSameReader = true; topOrd = ord; } else { topSameReader = false; topOrd = -ord-2; } } else { topOrd = missingOrd; topSameReader = true; } //System.out.println(" getLeafComparator topOrd=" + topOrd + " topSameReader=" + topSameReader); if (bottomSlot != -1) { // Recompute bottomOrd/SameReader setBottom(bottomSlot); } return this; } @Override public void setBottom(final int bottom) throws IOException { bottomSlot = bottom; bottomValue = values[bottomSlot]; if (currentReaderGen == readerGen[bottomSlot]) { bottomOrd = ords[bottomSlot]; bottomSameReader = true; } else { if (bottomValue == null) { // missingOrd is null for all segments assert ords[bottomSlot] == missingOrd; bottomOrd = missingOrd; bottomSameReader = true; readerGen[bottomSlot] = currentReaderGen; } else { final int ord = termsIndex.lookupTerm(bottomValue); if (ord < 0) { bottomOrd = -ord - 2; bottomSameReader = false; } else { bottomOrd = ord; // exact value match bottomSameReader = true; readerGen[bottomSlot] = currentReaderGen; ords[bottomSlot] = bottomOrd; } } } } @Override public void setTopValue(BytesRef value) { // null is fine: it means the last doc of the prior // search was missing this value topValue = value; //System.out.println("setTopValue " + topValue); } @Override public BytesRef value(int slot) { return values[slot]; } @Override public int compareTop(int doc) throws IOException { int ord = getOrdForDoc(doc); if (ord == -1) { ord = missingOrd; } if (topSameReader) { // ord is precisely comparable, even in the equal // case //System.out.println("compareTop doc=" + doc + " ord=" + ord + " ret=" + (topOrd-ord)); return topOrd - ord; } else if (ord <= topOrd) { // the equals case always means doc is < value // (because we set lastOrd to the lower bound) return 1; } else { return -1; } } @Override public int compareValues(BytesRef val1, BytesRef val2) { if (val1 == null) { if (val2 == null) { return 0; } return missingSortCmp; } else if (val2 == null) { return -missingSortCmp; } return val1.compareTo(val2); } @Override public void setScorer(Scorable scorer) {} } /** Sorts by field's natural Term sort order. All * comparisons are done using BytesRef.compareTo, which is * slow for medium to large result sets but possibly * very fast for very small results sets. */ public static class TermValComparator extends FieldComparator implements LeafFieldComparator { private final BytesRef[] values; private final BytesRefBuilder[] tempBRs; private BinaryDocValues docTerms; private final String field; private BytesRef bottom; private BytesRef topValue; private final int missingSortCmp; /** Sole constructor. */ public TermValComparator(int numHits, String field, boolean sortMissingLast) { values = new BytesRef[numHits]; tempBRs = new BytesRefBuilder[numHits]; this.field = field; missingSortCmp = sortMissingLast ? 1 : -1; } private BytesRef getValueForDoc(int doc) throws IOException { if (docTerms.advanceExact(doc)) { return docTerms.binaryValue(); } else { return null; } } @Override public int compare(int slot1, int slot2) { final BytesRef val1 = values[slot1]; final BytesRef val2 = values[slot2]; return compareValues(val1, val2); } @Override public int compareBottom(int doc) throws IOException { final BytesRef comparableBytes = getValueForDoc(doc); return compareValues(bottom, comparableBytes); } @Override public void copy(int slot, int doc) throws IOException { final BytesRef comparableBytes = getValueForDoc(doc); if (comparableBytes == null) { values[slot] = null; } else { if (tempBRs[slot] == null) { tempBRs[slot] = new BytesRefBuilder(); } tempBRs[slot].copyBytes(comparableBytes); values[slot] = tempBRs[slot].get(); } } /** Retrieves the BinaryDocValues for the field in this segment */ protected BinaryDocValues getBinaryDocValues(LeafReaderContext context, String field) throws IOException { return DocValues.getBinary(context.reader(), field); } @Override public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { docTerms = getBinaryDocValues(context, field); return this; } @Override public void setBottom(final int bottom) { this.bottom = values[bottom]; } @Override public void setTopValue(BytesRef value) { // null is fine: it means the last doc of the prior // search was missing this value topValue = value; } @Override public BytesRef value(int slot) { return values[slot]; } @Override public int compareValues(BytesRef val1, BytesRef val2) { // missing always sorts first: if (val1 == null) { if (val2 == null) { return 0; } return missingSortCmp; } else if (val2 == null) { return -missingSortCmp; } return val1.compareTo(val2); } @Override public int compareTop(int doc) throws IOException { return compareValues(topValue, getValueForDoc(doc)); } @Override public void setScorer(Scorable scorer) {} } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy