All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.facet.range.LongRangeCounter Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.facet.range;

import java.util.Arrays;
import java.util.Comparator;

/**
 * Counter for numeric ranges. Works for both single- and multi-valued cases (assuming you use it
 * correctly).
 *
 * 

Usage notes: When counting a document field that only has a single value, callers should call * addSingleValued() with the value. Whenever a document field has multiple values, callers should * call startMultiValuedDoc() at the beginning of processing the document, followed by * addMultiValued() with each value before finally calling endMultiValuedDoc() at the end of * processing the document. The call to endMultiValuedDoc() will respond with a boolean indicating * whether-or-not the specific document matched against at least one of the ranges being counted. * Finally, after processing all documents, the caller should call finish(). This final call will * ensure the contents of the user-provided {@code countBuffer} contains accurate counts (each index * corresponding to the provided {@code LongRange} in {@code ranges}). The final call to finish() * will also report how many additional documents did not match against any ranges. The combination * of the endMultiValuedDoc() boolean responses and the number reported by finish() communicates the * total number of missing documents. Note that the call to finish() will not report any documents * already reported missing by endMultiValuedDoc(). */ abstract class LongRangeCounter { /** accumulated counts for all of the ranges */ private final int[] countBuffer; /** * for multi-value docs, we keep track of the last elementary interval we've counted so we can use * that as a lower-bound when counting subsequent values. this takes advantage of the fact that * values within a given doc are sorted. */ protected int multiValuedDocLastSeenElementaryInterval; static LongRangeCounter create(LongRange[] ranges, int[] countBuffer) { if (hasOverlappingRanges(ranges)) { return new OverlappingLongRangeCounter(ranges, countBuffer); } else { return new ExclusiveLongRangeCounter(ranges, countBuffer); } } protected LongRangeCounter(int[] countBuffer) { // We'll populate the user-provided count buffer with range counts: this.countBuffer = countBuffer; } /** Start processing a new doc. It's unnecessary to call this for single-value cases. */ void startMultiValuedDoc() { multiValuedDocLastSeenElementaryInterval = -1; } /** * Finish processing a new doc. Returns whether-or-not the document contributed a count to at * least one range. It's unnecessary to call this for single-value cases. */ abstract boolean endMultiValuedDoc(); /** Count a single valued doc */ void addSingleValued(long v) { // NOTE: this works too, but it's ~6% slower on a simple // test with a high-freq TermQuery w/ range faceting on // wikimediumall: /* int index = Arrays.binarySearch(boundaries, v); if (index < 0) { index = -index-1; } leafCounts[index]++; */ // Binary search to find matched elementary range; we // are guaranteed to find a match because the last // boundary is Long.MAX_VALUE: long[] boundaries = boundaries(); int lo = 0; int hi = boundaries.length - 1; while (true) { int mid = (lo + hi) >>> 1; if (v <= boundaries[mid]) { if (mid == 0) { processSingleValuedHit(mid); return; } else { hi = mid - 1; } } else if (v > boundaries[mid + 1]) { lo = mid + 1; } else { processSingleValuedHit(mid + 1); return; } } } /** Count a multi-valued doc value */ void addMultiValued(long v) { if (rangeCount() == 0) { return; // don't bother if there aren't any requested ranges } long[] boundaries = boundaries(); // First check if we've "advanced" beyond the last elementary interval we counted for this doc. // If we haven't, there's no sense doing anything else: if (multiValuedDocLastSeenElementaryInterval != -1 && v <= boundaries[multiValuedDocLastSeenElementaryInterval]) { return; } // Also check if we've already counted the last elementary interval. If so, there's nothing // else to count for this doc: final int nextCandidateElementaryInterval = multiValuedDocLastSeenElementaryInterval + 1; if (nextCandidateElementaryInterval == boundaries.length) { return; } // Binary search in the range of the next candidate interval up to the last interval: int lo = nextCandidateElementaryInterval; int hi = boundaries.length - 1; while (true) { int mid = (lo + hi) >>> 1; if (v <= boundaries[mid]) { if (mid == nextCandidateElementaryInterval) { processMultiValuedHit(mid); multiValuedDocLastSeenElementaryInterval = mid; return; } else { hi = mid - 1; } } else if (v > boundaries[mid + 1]) { lo = mid + 1; } else { int idx = mid + 1; processMultiValuedHit(idx); multiValuedDocLastSeenElementaryInterval = idx; return; } } } /** * Finish processing all documents. This will return the number of docs that didn't contribute to * any ranges (that weren't already reported when calling endMultiValuedDoc()). */ abstract int finish(); /** Provide boundary information for elementary intervals (max inclusive value per interval) */ protected abstract long[] boundaries(); /** Process a single-value "hit" against an elementary interval. */ protected abstract void processSingleValuedHit(int elementaryIntervalNum); /** Process a multi-value "hit" against an elementary interval. */ protected abstract void processMultiValuedHit(int elementaryIntervalNum); /** Increment the specified range by one. */ protected final void increment(int rangeNum) { countBuffer[rangeNum]++; } /** Increment the specified range by the specified count. */ protected final void increment(int rangeNum, int count) { countBuffer[rangeNum] += count; } /** Number of ranges requested by the caller. */ protected final int rangeCount() { return countBuffer.length; } /** Determine whether-or-not any requested ranges overlap */ private static boolean hasOverlappingRanges(LongRange[] ranges) { if (ranges.length == 0) { return false; } // Copy before sorting so we don't mess with the caller's original ranges: LongRange[] sortedRanges = new LongRange[ranges.length]; System.arraycopy(ranges, 0, sortedRanges, 0, ranges.length); Arrays.sort(sortedRanges, Comparator.comparingLong(r -> r.min)); long previousMax = sortedRanges[0].max; for (int i = 1; i < sortedRanges.length; i++) { // Ranges overlap if the next min is <= the previous max (note that LongRange models // closed ranges, so equal limit points are considered overlapping): if (sortedRanges[i].min <= previousMax) { return true; } previousMax = sortedRanges[i].max; } return false; } protected static final class InclusiveRange { final long start; final long end; InclusiveRange(long start, long end) { assert end >= start; this.start = start; this.end = end; } @Override public String toString() { return start + " to " + end; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy