All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.uninverting.FieldCache Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.uninverting;

import java.io.IOException;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.solr.legacy.LegacyNumericUtils;

/**
 * Expert: Maintains caches of term values.
 *
 * 

Created: May 19, 2004 11:13:14 AM * * @since lucene 1.4 * @lucene.internal */ public interface FieldCache { /** Placeholder indicating creation of this cache is currently in-progress. */ public static final class CreationPlaceholder implements Accountable { Accountable value; @Override public long ramBytesUsed() { // don't call on the in-progress value, might make things angry. return RamUsageEstimator.NUM_BYTES_OBJECT_REF; } } /** interface to all parsers. It is used to parse different numeric types. */ public interface Parser { /** * Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers to * filter the actual TermsEnum before the field cache is filled. * * @param terms the {@link Terms} instance to create the {@link TermsEnum} from. * @return a possibly filtered {@link TermsEnum} instance, this method must not return * null. * @throws IOException if an {@link IOException} occurs * @deprecated index with Points instead */ @Deprecated public TermsEnum termsEnum(Terms terms) throws IOException; /** Parse's this field's value */ public long parseValue(BytesRef term); } /** * Base class for points parsers. These parsers do not use the inverted index, but instead * uninvert point data. * *

This abstraction can be cleaned up when Parser.termsEnum is removed. */ public abstract class PointParser implements Parser { @Override public final TermsEnum termsEnum(Terms terms) throws IOException { throw new UnsupportedOperationException("makes no sense for parsing points"); } } /** Expert: The cache used internally by sorting and range query classes. */ public static FieldCache DEFAULT = new FieldCacheImpl(); /** * A parser instance for int values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. * when indexed via {@link org.apache.lucene.document.IntPoint}. */ public static final Parser INT_POINT_PARSER = new PointParser() { @Override public long parseValue(BytesRef point) { return NumericUtils.sortableBytesToInt(point.bytes, point.offset); } @Override public String toString() { return FieldCache.class.getName() + ".INT_POINT_PARSER"; } }; /** * A parser instance for long values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. * when indexed via {@link org.apache.lucene.document.LongPoint}. */ public static final Parser LONG_POINT_PARSER = new PointParser() { @Override public long parseValue(BytesRef point) { return NumericUtils.sortableBytesToLong(point.bytes, point.offset); } @Override public String toString() { return FieldCache.class.getName() + ".LONG_POINT_PARSER"; } }; /** * A parser instance for float values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. * when indexed via {@link org.apache.lucene.document.FloatPoint}. */ public static final Parser FLOAT_POINT_PARSER = new PointParser() { @Override public long parseValue(BytesRef point) { return NumericUtils.sortableFloatBits( NumericUtils.sortableBytesToInt(point.bytes, point.offset)); } @Override public String toString() { return FieldCache.class.getName() + ".FLOAT_POINT_PARSER"; } }; /** * A parser instance for double values encoded by {@link org.apache.lucene.util.NumericUtils}, * e.g. when indexed via {@link org.apache.lucene.document.DoublePoint}. */ public static final Parser DOUBLE_POINT_PARSER = new PointParser() { @Override public long parseValue(BytesRef point) { return NumericUtils.sortableDoubleBits( NumericUtils.sortableBytesToLong(point.bytes, point.offset)); } @Override public String toString() { return FieldCache.class.getName() + ".DOUBLE_POINT_PARSER"; } }; /** * A parser instance for int values encoded by {@link org.apache.solr.legacy.LegacyNumericUtils}, * e.g. when indexed via {@link org.apache.solr.legacy.LegacyIntField}/{@link * org.apache.solr.legacy.LegacyNumericTokenStream}. * * @deprecated Index with points and use {@link #INT_POINT_PARSER} instead. */ @Deprecated public static final Parser LEGACY_INT_PARSER = new Parser() { @Override public long parseValue(BytesRef term) { return LegacyNumericUtils.prefixCodedToInt(term); } @Override public TermsEnum termsEnum(Terms terms) throws IOException { return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator()); } @Override public String toString() { return FieldCache.class.getName() + ".LEGACY_INT_PARSER"; } }; /** * A parser instance for float values encoded with {@link * org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed via {@link * org.apache.solr.legacy.LegacyFloatField}/{@link * org.apache.solr.legacy.LegacyNumericTokenStream}. * * @deprecated Index with points and use {@link #FLOAT_POINT_PARSER} instead. */ @Deprecated public static final Parser LEGACY_FLOAT_PARSER = new Parser() { @Override public long parseValue(BytesRef term) { int val = LegacyNumericUtils.prefixCodedToInt(term); if (val < 0) val ^= 0x7fffffff; return val; } @Override public String toString() { return FieldCache.class.getName() + ".LEGACY_FLOAT_PARSER"; } @Override public TermsEnum termsEnum(Terms terms) throws IOException { return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator()); } }; /** * A parser instance for long values encoded by {@link org.apache.solr.legacy.LegacyNumericUtils}, * e.g. when indexed via {@link org.apache.solr.legacy.LegacyLongField}/{@link * org.apache.solr.legacy.LegacyNumericTokenStream}. * * @deprecated Index with points and use {@link #LONG_POINT_PARSER} instead. */ @Deprecated public static final Parser LEGACY_LONG_PARSER = new Parser() { @Override public long parseValue(BytesRef term) { return LegacyNumericUtils.prefixCodedToLong(term); } @Override public String toString() { return FieldCache.class.getName() + ".LEGACY_LONG_PARSER"; } @Override public TermsEnum termsEnum(Terms terms) throws IOException { return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator()); } }; /** * A parser instance for double values encoded with {@link * org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed via {@link * org.apache.solr.legacy.LegacyDoubleField}/{@link * org.apache.solr.legacy.LegacyNumericTokenStream}. * * @deprecated Index with points and use {@link #DOUBLE_POINT_PARSER} instead. */ @Deprecated public static final Parser LEGACY_DOUBLE_PARSER = new Parser() { @Override public long parseValue(BytesRef term) { long val = LegacyNumericUtils.prefixCodedToLong(term); if (val < 0) val ^= 0x7fffffffffffffffL; return val; } @Override public String toString() { return FieldCache.class.getName() + ".LEGACY_DOUBLE_PARSER"; } @Override public TermsEnum termsEnum(Terms terms) throws IOException { return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator()); } }; /** * Checks the internal cache for an appropriate entry, and if none is found, reads the * terms/points in field and returns a bit set at the size of reader.maxDoc() * , with turned on bits for each docid that does have a value for this field. * * @param parser May be {@code null} if coming from the inverted index, otherwise can be a {@link * PointParser} to compute from point values. */ public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException; /** * Returns a {@link NumericDocValues} over the values found in documents in the given field. If * the field was indexed as {@link NumericDocValuesField}, it simply uses {@link * org.apache.lucene.index.LeafReader#getNumericDocValues(String)} to read the values. Otherwise, * it checks the internal cache for an appropriate entry, and if none is found, reads the * terms/points in field as longs and returns an array of size reader.maxDoc() * of the value each document has in the given field. * * @param reader Used to get field values. * @param field Which field contains the longs. * @param parser Computes long for string values. May be {@code null} if the requested field was * indexed as {@link NumericDocValuesField} or {@link org.apache.solr.legacy.LegacyLongField}. * @return The values in the given field for each document. * @throws IOException If any error occurs. */ public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser) throws IOException; /** * Checks the internal cache for an appropriate entry, and if none is found, reads the term values * in field and returns a {@link BinaryDocValues} instance, providing a method to * retrieve the term (as a BytesRef) per document. * * @param reader Used to get field values. * @param field Which field contains the strings. * @return The values in the given field for each document. * @throws IOException If any error occurs. */ public BinaryDocValues getTerms(LeafReader reader, String field) throws IOException; /** * Expert: just like {@link #getTerms(org.apache.lucene.index.LeafReader,String)}, but you can * specify whether more RAM should be consumed in exchange for faster lookups (default is "true"). * Note that the first call for a given reader and field "wins", subsequent calls will share the * same cache entry. */ public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException; /** * Checks the internal cache for an appropriate entry, and if none is found, reads the term values * in field and returns a {@link SortedDocValues} instance, providing methods to * retrieve sort ordinals and terms (as a ByteRef) per document. * * @param reader Used to get field values. * @param field Which field contains the strings. * @return The values in the given field for each document. * @throws IOException If any error occurs. */ public SortedDocValues getTermsIndex(LeafReader reader, String field) throws IOException; /** * Expert: just like {@link #getTermsIndex(org.apache.lucene.index.LeafReader,String)}, but you * can specify whether more RAM should be consumed in exchange for faster lookups (default is * "true"). Note that the first call for a given reader and field "wins", subsequent calls will * share the same cache entry. */ public SortedDocValues getTermsIndex( LeafReader reader, String field, float acceptableOverheadRatio) throws IOException; /** Can be passed to {@link #getDocTermOrds} to filter for 32-bit numeric terms */ public static final BytesRef INT32_TERM_PREFIX = new BytesRef(new byte[] {LegacyNumericUtils.SHIFT_START_INT}); /** Can be passed to {@link #getDocTermOrds} to filter for 64-bit numeric terms */ public static final BytesRef INT64_TERM_PREFIX = new BytesRef(new byte[] {LegacyNumericUtils.SHIFT_START_LONG}); /** * Checks the internal cache for an appropriate entry, and if none is found, reads the term values * in field and returns a {@link DocTermOrds} instance, providing a method to * retrieve the terms (as ords) per document. * * @param reader Used to build a {@link DocTermOrds} instance * @param field Which field contains the strings. * @param prefix prefix for a subset of the terms which should be uninverted. Can be null or * {@link #INT32_TERM_PREFIX} or {@link #INT64_TERM_PREFIX} * @return a {@link DocTermOrds} instance * @throws IOException If any error occurs. */ public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException; /** * EXPERT: A unique Identifier/Description for each item in the FieldCache. Can be useful for * logging/debugging. * * @lucene.experimental */ public final class CacheEntry { private final Object readerKey; private final String fieldName; private final Class cacheType; private final Object custom; private final Accountable value; public CacheEntry( IndexReader.CacheKey readerKey, String fieldName, Class cacheType, Object custom, Accountable value) { this.readerKey = readerKey; this.fieldName = fieldName; this.cacheType = cacheType; this.custom = custom; this.value = value; } public Object getReaderKey() { return readerKey; } public String getFieldName() { return fieldName; } public Class getCacheType() { return cacheType; } public Object getCustom() { return custom; } public Accountable getValue() { return value; } /** The most recently estimated size of the value, null unless estimateSize has been called. */ public String getEstimatedSize() { long bytesUsed = value == null ? 0L : value.ramBytesUsed(); return RamUsageEstimator.humanReadableUnits(bytesUsed); } @Override public String toString() { StringBuilder b = new StringBuilder(100); b.append("segment='").append(getReaderKey().toString()).append("', "); b.append("field='").append(getFieldName()).append("', "); String s = getEstimatedSize(); b.append("size =~ ").append(s); return b.toString(); } } /** * EXPERT: Generates an array of CacheEntry objects representing all items currently in the * FieldCache. * *

NOTE: These CacheEntry objects maintain a strong reference to the Cached Values. Maintaining * references to a CacheEntry the AtomicIndexReader associated with it has garbage collected will * prevent the Value itself from being garbage collected when the Cache drops the WeakReference. * * @lucene.experimental */ public CacheEntry[] getCacheEntries(); /** * EXPERT: Instructs the FieldCache to forcibly expunge all entries from the underlying caches. * This is intended only to be used for test methods as a way to ensure a known base state of the * Cache (with out needing to rely on GC to free WeakReferences). It should not be relied on for * "Cache maintenance" in general application code. * * @lucene.experimental */ public void purgeAllCaches(); /** * Expert: drops all cache entries associated with this reader {@link * org.apache.lucene.index.IndexReader.CacheHelper#getKey()}. NOTE: this cache key must precisely * match the reader that the cache entry is keyed on. If you pass a top-level reader, it usually * will have no effect as Lucene now caches at the segment reader level. */ public void purgeByCacheKey(IndexReader.CacheKey coreCacheKey); }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy