org.apache.solr.search.facet.FacetFieldProcessorByHashDV Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Show all versions of solr-core Show documentation
Apache Solr (module: core)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.io.IOException;
import java.text.ParseException;
import java.util.function.IntFunction;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocSetUtil;
import org.apache.solr.search.facet.SlotAcc.SlotContext;
/**
* Facets numbers into a hash table. The number is either a raw numeric DocValues value, or a term
* global ordinal integer. Limitations:
*
*
* - doesn't handle prefix, but could easily be added
*
- doesn't handle mincount==0 -- you're better off with an array alg
*
*/
class FacetFieldProcessorByHashDV extends FacetFieldProcessor {
// must be a power of two, non-final to support setting by tests
static int MAXIMUM_STARTING_TABLE_SIZE = 1024;
/** a hash table with long keys (what we're counting) and integer values (counts) */
private static class LongCounts {
static final float LOAD_FACTOR = 0.7f;
long[] vals;
// maintain the counts here since we need them to tell if there was actually a value anyway
long[] counts;
long[] oldToNewMapping;
int cardinality;
int threshold;
/** sz must be a power of two */
LongCounts(int sz) {
vals = new long[sz];
counts = new long[sz];
threshold = (int) (sz * LOAD_FACTOR);
}
/** Current number of slots in the hash table */
int numSlots() {
return vals.length;
}
private int hash(long val) {
// For floats: exponent bits start at bit 23 for single precision, and bit 52 for double
// precision. Many values will only have significant bits just to the right of that, and the
// leftmost bits will all be zero.
// For now, lets just settle to get first 8 significant mantissa bits of double or float in
// the lowest bits of our hash. The upper bits of our hash will be irrelevant.
int h = (int) (val + (val >>> 44) + (val >>> 15));
return h;
}
/** returns the slot */
int add(long val) {
if (cardinality >= threshold) {
rehash();
}
int h = hash(val);
for (int slot = h & (vals.length - 1); ; slot = (slot + ((h >> 7) | 1)) & (vals.length - 1)) {
long count = counts[slot];
if (count == 0) {
counts[slot] = 1;
vals[slot] = val;
cardinality++;
return slot;
} else if (vals[slot] == val) {
// val is already in the set
counts[slot] = count + 1;
return slot;
}
}
}
protected void rehash() {
long[] oldVals = vals;
// after retrieving the count, this array is reused as a mapping to new array
long[] oldCounts = counts;
int newCapacity = vals.length << 1;
vals = new long[newCapacity];
counts = new long[newCapacity];
threshold = (int) (newCapacity * LOAD_FACTOR);
for (int i = 0; i < oldVals.length; i++) {
long count = oldCounts[i];
if (count == 0) {
oldCounts[i] = -1;
continue;
}
long val = oldVals[i];
int h = hash(val);
int slot = h & (vals.length - 1);
while (counts[slot] != 0) {
slot = (slot + ((h >> 7) | 1)) & (vals.length - 1);
}
counts[slot] = count;
vals[slot] = val;
oldCounts[i] = slot;
}
oldToNewMapping = oldCounts;
}
int cardinality() {
return cardinality;
}
}
/** A hack instance of Calc for Term ordinals in DocValues. */
// TODO consider making FacetRangeProcessor.Calc facet top level; then less of a hack?
private class TermOrdCalc extends FacetRangeProcessor.Calc {
IntFunction lookupOrdFunction; // set in collectDocs()!
TermOrdCalc() throws IOException {
super(sf);
}
@Override
public long bitsToSortableBits(long globalOrd) {
return globalOrd;
}
/** To be returned in "buckets"/"val" */
@Override
@SuppressWarnings({"rawtypes"})
public Comparable bitsToValue(long globalOrd) {
BytesRef bytesRef = lookupOrdFunction.apply((int) globalOrd);
// note FacetFieldProcessorByArray.findTopSlots also calls SchemaFieldType.toObject
return sf.getType().toObject(sf, bytesRef).toString();
}
@Override
@SuppressWarnings({"rawtypes"})
public String formatValue(Comparable val) {
return (String) val;
}
@Override
@SuppressWarnings({"rawtypes"})
protected Comparable parseStr(String rawval) throws ParseException {
throw new UnsupportedOperationException();
}
@Override
@SuppressWarnings({"rawtypes"})
protected Comparable parseAndAddGap(Comparable value, String gap) throws ParseException {
throw new UnsupportedOperationException();
}
}
FacetRangeProcessor.Calc calc;
LongCounts table;
int allBucketsSlot = -1;
FacetFieldProcessorByHashDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
super(fcontext, freq, sf);
if (freq.mincount == 0) {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST, getClass() + " doesn't support mincount=0");
}
if (freq.prefix != null) {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,
getClass() + " doesn't support prefix"); // yet, but it could
}
FieldInfo fieldInfo = fcontext.searcher.getFieldInfos().fieldInfo(sf.getName());
if (fieldInfo != null
&& fieldInfo.getDocValuesType() != DocValuesType.NUMERIC
&& fieldInfo.getDocValuesType() != DocValuesType.SORTED
&& fieldInfo.getDocValuesType() != DocValuesType.SORTED_NUMERIC) {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,
getClass() + " only support single valued number/string with docValues");
}
}
@Override
public void process() throws IOException {
super.process();
response = calcFacets();
table = null; // gc
}
private SimpleOrderedMap