org.apache.solr.search.SolrDocumentFetcher Maven / Gradle / Ivy
Show all versions of solr-core Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;
import java.util.function.Supplier;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.InvertableType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StoredValue;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.misc.document.LazyDocument;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentBase;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.response.DocsStreamer;
import org.apache.solr.response.ResultContext;
import org.apache.solr.schema.AbstractEnumField;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.LatLonPointSpatialField;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A helper class of {@link org.apache.solr.search.SolrIndexSearcher} for stored Document related
* matters including DocValue substitutions.
*/
public class SolrDocumentFetcher {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final SolrIndexSearcher searcher;
private final int nLeaves;
private final boolean enableLazyFieldLoading;
private final SolrCache documentCache;
private final Set allStored;
private final Set dvsCanSubstituteStored;
/** Contains the names/patterns of all docValues=true,stored=false fields in the schema. */
private final Set allNonStoredDVs;
/**
* Contains the names/patterns of all docValues=true,stored=false,useDocValuesAsStored=true fields
* in the schema.
*/
private final Set nonStoredDVsUsedAsStored;
/**
* Contains the names/patterns of all docValues=true,stored=false fields, excluding those that are
* copyField targets in the schema.
*/
private final Set nonStoredDVsWithoutCopyTargets;
private static int largeValueLengthCacheThreshold =
Integer.getInteger("solr.largeField.cacheThreshold", 512 * 1024); // internal setting
private final Set largeFields;
private Collection storedHighlightFieldNames; // lazy populated; use getter
private Collection indexedFieldNames; // lazy populated; use getter
@SuppressWarnings({"unchecked"})
SolrDocumentFetcher(SolrIndexSearcher searcher, SolrConfig solrConfig, boolean cachingEnabled) {
this.searcher = searcher;
this.nLeaves = searcher.getTopReaderContext().leaves().size();
this.enableLazyFieldLoading = solrConfig.enableLazyFieldLoading;
if (cachingEnabled) {
documentCache =
solrConfig.documentCacheConfig == null
? null
: solrConfig.documentCacheConfig.newInstance();
} else {
documentCache = null;
}
final Set nonStoredDVsUsedAsStored = new HashSet<>();
final Set allNonStoredDVs = new HashSet<>();
final Set nonStoredDVsWithoutCopyTargets = new HashSet<>();
final Set storedLargeFields = new HashSet<>();
final Set dvsCanSubstituteStored = new HashSet<>();
final Set allStoreds = new HashSet<>();
// can find materialized dynamic fields, unlike using the Solr IndexSchema.
for (FieldInfo fieldInfo : searcher.getFieldInfos()) {
final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldInfo.name);
if (schemaField == null) {
continue;
}
if (canSubstituteDvForStored(fieldInfo, schemaField)) {
dvsCanSubstituteStored.add(fieldInfo.name);
}
if (schemaField.stored()) {
allStoreds.add(fieldInfo.name);
}
if (!schemaField.stored() && schemaField.hasDocValues()) {
if (schemaField.useDocValuesAsStored()) {
nonStoredDVsUsedAsStored.add(fieldInfo.name);
}
allNonStoredDVs.add(fieldInfo.name);
if (!searcher.getSchema().isCopyFieldTarget(schemaField)) {
nonStoredDVsWithoutCopyTargets.add(fieldInfo.name);
}
}
if (schemaField.stored() && schemaField.isLarge()) {
storedLargeFields.add(schemaField.getName());
}
}
this.nonStoredDVsUsedAsStored = Collections.unmodifiableSet(nonStoredDVsUsedAsStored);
this.allNonStoredDVs = Collections.unmodifiableSet(allNonStoredDVs);
this.nonStoredDVsWithoutCopyTargets =
Collections.unmodifiableSet(nonStoredDVsWithoutCopyTargets);
this.largeFields = Collections.unmodifiableSet(storedLargeFields);
this.dvsCanSubstituteStored = Collections.unmodifiableSet(dvsCanSubstituteStored);
this.allStored = Collections.unmodifiableSet(allStoreds);
}
// Does this field have both stored=true and docValues=true and is otherwise
// eligible for getting the field's value from DV?
private boolean canSubstituteDvForStored(FieldInfo fieldInfo, SchemaField schemaField) {
if (!schemaField.hasDocValues() || !schemaField.stored()) return false;
if (schemaField.multiValued()) return false;
DocValuesType docValuesType = fieldInfo.getDocValuesType();
NumberType numberType = schemaField.getType().getNumberType();
// can not decode a numeric without knowing its numberType
if (numberType == null
&& (docValuesType == DocValuesType.SORTED_NUMERIC
|| docValuesType == DocValuesType.NUMERIC)) {
return false;
}
return true;
}
public boolean isLazyFieldLoadingEnabled() {
return enableLazyFieldLoading;
}
public SolrCache getDocumentCache() {
return documentCache;
}
/**
* Returns a collection of the names of all stored fields which can be highlighted the index
* reader knows about.
*/
public Collection getStoredHighlightFieldNames() {
synchronized (this) {
if (storedHighlightFieldNames == null) {
storedHighlightFieldNames = new ArrayList<>();
for (FieldInfo fieldInfo : searcher.getFieldInfos()) {
final String fieldName = fieldInfo.name;
try {
SchemaField field = searcher.getSchema().getField(fieldName);
if (field.stored()
&& ((field.getType() instanceof org.apache.solr.schema.TextField)
|| (field.getType() instanceof org.apache.solr.schema.StrField))) {
storedHighlightFieldNames.add(fieldName);
}
} catch (RuntimeException e) {
// getField() throws a SolrException, but it arrives as a RuntimeException
log.warn("Field [{}] found in index, but not defined in schema.", fieldName);
}
}
}
return storedHighlightFieldNames;
}
}
/** Returns a collection of the names of all indexed fields which the index reader knows about. */
public Collection getIndexedFieldNames() {
synchronized (this) {
if (indexedFieldNames == null) {
indexedFieldNames = new ArrayList<>();
for (FieldInfo fieldInfo : searcher.getFieldInfos()) {
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
indexedFieldNames.add(fieldInfo.name);
}
}
}
return indexedFieldNames;
}
}
/**
* @see SolrIndexSearcher#doc(int)
*/
public Document doc(int docId) throws IOException {
return doc(docId, (Set) null);
}
/**
* Retrieve the {@link Document} instance corresponding to the document id.
*
* NOTE: the document will have all fields accessible, but if a field filter is
* provided, only the provided fields will be loaded (the remainder will be available lazily).
*
* @see SolrIndexSearcher#doc(int, Set)
*/
public Document doc(int i, Set fields) throws IOException {
Document d;
if (documentCache != null) {
final Set getFields = enableLazyFieldLoading ? fields : null;
d = documentCache.computeIfAbsent(i, docId -> docNC(docId, getFields));
if (d == null) {
// failed to retrieve due to an earlier exception, try again?
return docNC(i, fields);
} else {
return d;
}
} else {
return docNC(i, fields);
}
}
private Document docNC(int i, Set fields) throws IOException {
final DirectoryReader reader = searcher.getIndexReader();
final SolrDocumentStoredFieldVisitor visitor =
new SolrDocumentStoredFieldVisitor(fields, reader, i);
reader.document(i, visitor);
return visitor.getDocument();
}
/**
* This is an optimized version for populating a SolrDocument that:
*
* 1. fetches all fields from docValues if possible. If no decompression of the stored data is
* necessary, we can avoid a disk seek and decompression cycle. This step is only used if all
* requested fields are {code docValues=true stored=false multiValued=false}. This last
* restriction because multiValued docValues fields do not faithfully reflect the input order in
* all cases. the values are returned and no decompression is necessary.
*
*
2. if 1 is impossible, try to fetch all requested fields from the stored values. If the
* stored data has to be decompressed anyway, it's more efficient to just get all field values
* from the stored values. If we got all the requested fields, return.
*
*
3. add fields where docValues=true stored=false thus could not be fetched in step 2
*
* @param luceneDocId The Lucene doc ID
* @param solrReturnFields the structure holding the fields to be returned. The first time this
* method is called for a particular document list, it will be modified by adding a
* RetrieveFieldsOptimizer for use in future calls.
* @return The SolrDocument with values requested.
*
This method is designed to be as simple as possible to use, just call it. e.g. {code
* SolrDocument sdoc = docFetcher.solrDoc(id, solrReturnFields);} then process the resulting
* SolrDocument as usual. Subsequent calls with the same solrReturnFields will re-use the
* optimizer created the first time.
*
NOTE: DO NOT re-use the same SolrReturnFields object if the fields requested change.
*/
public SolrDocument solrDoc(int luceneDocId, SolrReturnFields solrReturnFields) {
Supplier rfoSupplier =
() -> new RetrieveFieldsOptimizer(solrReturnFields);
return solrReturnFields.getFetchOptimizer(rfoSupplier).getSolrDoc(luceneDocId);
}
/**
* {@link StoredFieldVisitor} which loads the specified fields eagerly (or all if null). If {@link
* #enableLazyFieldLoading} then the rest get special lazy field entries. Designated "large"
* fields will always get a special field entry.
*/
private class SolrDocumentStoredFieldVisitor extends DocumentStoredFieldVisitor {
private final Document doc;
private final LazyDocument
lazyFieldProducer; // arguably a better name than LazyDocument; at least how we use it here
private final int docId;
private final boolean addLargeFieldsLazily;
SolrDocumentStoredFieldVisitor(Set toLoad, IndexReader reader, int docId) {
super(toLoad);
this.docId = docId;
this.doc = getDocument();
this.lazyFieldProducer =
toLoad != null && enableLazyFieldLoading ? new LazyDocument(reader, docId) : null;
this.addLargeFieldsLazily = (documentCache != null && !largeFields.isEmpty());
// TODO can we return Status.STOP after a val is loaded and we know there are no other fields
// of interest?
// When: toLoad is one single-valued field, no lazyFieldProducer
}
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
Predicate readAsBytes = ResultContext.READASBYTES.get();
if (readAsBytes != null && readAsBytes.test(fieldInfo.name)) {
final FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions());
Objects.requireNonNull(value, "String value should not be null");
doc.add(new StoredField(fieldInfo.name, value, ft));
} else {
super.stringField(fieldInfo, value);
}
}
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
Status status = super.needsField(fieldInfo);
assert status != Status.STOP : "Status.STOP not supported or expected";
// load "large" fields using this lazy mechanism
if (addLargeFieldsLazily && largeFields.contains(fieldInfo.name)) {
if (lazyFieldProducer != null || status == Status.YES) {
doc.add(new LargeLazyField(fieldInfo.name, docId));
}
return Status.NO;
}
if (status == Status.NO && lazyFieldProducer != null) { // lazy
doc.add(lazyFieldProducer.getField(fieldInfo));
}
return status;
}
}
/**
* @see SolrIndexSearcher#doc(int, StoredFieldVisitor)
*/
public void doc(int docId, StoredFieldVisitor visitor) throws IOException {
if (documentCache != null) {
// get cached document or retrieve it including all fields (and cache it)
Document cached = doc(docId);
visitFromCached(cached, visitor);
} else {
searcher.getIndexReader().document(docId, visitor);
}
}
/** Executes a stored field visitor against a hit from the document cache */
private void visitFromCached(Document document, StoredFieldVisitor visitor) throws IOException {
for (IndexableField f : document) {
final FieldInfo info = searcher.getFieldInfos().fieldInfo(f.name());
final StoredFieldVisitor.Status needsField = visitor.needsField(info);
if (needsField == StoredFieldVisitor.Status.STOP) return;
if (needsField == StoredFieldVisitor.Status.NO) continue;
BytesRef binaryValue = f.binaryValue();
if (binaryValue != null) {
visitor.binaryField(info, toByteArrayUnwrapIfPossible(binaryValue));
continue;
}
Number numericValue = f.numericValue();
if (numericValue != null) {
if (numericValue instanceof Double) {
visitor.doubleField(info, numericValue.doubleValue());
} else if (numericValue instanceof Integer) {
visitor.intField(info, numericValue.intValue());
} else if (numericValue instanceof Float) {
visitor.floatField(info, numericValue.floatValue());
} else if (numericValue instanceof Long) {
visitor.longField(info, numericValue.longValue());
} else {
throw new AssertionError();
}
continue;
}
// must be String
if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion
visitor.stringField(info, toStringUnwrapIfPossible(((LargeLazyField) f).readBytes()));
} else {
visitor.stringField(info, f.stringValue());
}
}
}
private byte[] toByteArrayUnwrapIfPossible(BytesRef bytesRef) {
if (bytesRef.offset == 0 && bytesRef.bytes.length == bytesRef.length) {
return bytesRef.bytes;
} else {
return Arrays.copyOfRange(bytesRef.bytes, bytesRef.offset, bytesRef.offset + bytesRef.length);
}
}
private String toStringUnwrapIfPossible(BytesRef bytesRef) {
if (bytesRef.offset == 0 && bytesRef.bytes.length == bytesRef.length) {
return new String(bytesRef.bytes, StandardCharsets.UTF_8);
} else {
return new String(
bytesRef.bytes,
bytesRef.offset,
bytesRef.offset + bytesRef.length,
StandardCharsets.UTF_8);
}
}
/**
* Unlike LazyDocument.LazyField, we (a) don't cache large values, and (b) provide access to the
* byte[].
*/
class LargeLazyField implements IndexableField {
final String name;
final int docId;
// synchronize on 'this' to access:
BytesRef cachedBytes; // we only conditionally populate this if it's big enough
private LargeLazyField(String name, int docId) {
this.name = name;
this.docId = docId;
}
@Override
public String toString() {
return fieldType().toString() + "<" + name() + ">"; // mimic Field.java
}
@Override
public String name() {
return name;
}
@Override
public IndexableFieldType fieldType() {
return searcher.getSchema().getField(name());
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
// or we could throw unsupported exception?
return analyzer.tokenStream(name(), stringValue());
}
/** (for tests) */
synchronized boolean hasBeenLoaded() {
return cachedBytes != null;
}
@Override
public synchronized String stringValue() {
try {
return readBytes().utf8ToString();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
synchronized BytesRef readBytes() throws IOException {
if (cachedBytes != null) {
return cachedBytes;
} else {
BytesRef bytesRef = new BytesRef();
searcher
.getIndexReader()
.document(
docId,
new StoredFieldVisitor() {
boolean done = false;
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
if (done) {
return Status.STOP;
}
return fieldInfo.name.equals(name()) ? Status.YES : Status.NO;
}
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
Objects.requireNonNull(value, "String value should not be null");
bytesRef.bytes = value.getBytes(StandardCharsets.UTF_8);
bytesRef.length = bytesRef.bytes.length;
done = true;
}
@Override
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
throw new UnsupportedOperationException(
"'large' binary fields are not (yet) supported");
}
});
if (bytesRef.length < largeValueLengthCacheThreshold) {
return cachedBytes = bytesRef;
} else {
return bytesRef;
}
}
}
@Override
public BytesRef binaryValue() {
return null;
}
@Override
public Reader readerValue() {
return null;
}
@Override
public Number numericValue() {
return null;
}
@Override
public StoredValue storedValue() {
return new StoredValue(stringValue());
}
@Override
public InvertableType invertableType() {
return null;
}
}
/**
* This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
*
* @param doc A SolrDocument or SolrInputDocument instance where docValues will be added
* @param docid The lucene docid of the document to be populated
* @param fields The fields with docValues to populate the document with. DocValues fields which
* do not exist or not decodable will be ignored.
*/
public void decorateDocValueFields(
SolrDocumentBase doc,
int docid,
Set fields,
DocValuesIteratorCache reuseDvIters)
throws IOException {
final List leafContexts = searcher.getLeafContexts();
final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
final int localId = docid - leafContexts.get(subIndex).docBase;
final LeafReader leafReader = leafContexts.get(subIndex).reader();
for (String fieldName : fields) {
DocValuesIteratorCache.FieldDocValuesSupplier e = reuseDvIters.getSupplier(fieldName);
if (e != null) {
Object fieldValue = decodeDVField(localId, leafReader, subIndex, e);
if (fieldValue != null) {
doc.setField(fieldName, fieldValue);
}
}
}
}
/**
* Decode value from DV field for a document
*
* @return null if DV field is not exist or can not decodable
*/
private Object decodeDVField(
int localId,
LeafReader leafReader,
int readerOrd,
DocValuesIteratorCache.FieldDocValuesSupplier e)
throws IOException {
final DocValuesType dvType = e.type;
switch (dvType) {
case NUMERIC:
final NumericDocValues ndv = e.getNumericDocValues(localId, leafReader, readerOrd);
if (ndv == null) {
return null;
}
long val = ndv.longValue();
return decodeNumberFromDV(e.schemaField, val, false);
case BINARY:
BinaryDocValues bdv = e.getBinaryDocValues(localId, leafReader, readerOrd);
if (bdv != null) {
return BytesRef.deepCopyOf(bdv.binaryValue());
}
return null;
case SORTED:
SortedDocValues sdv = e.getSortedDocValues(localId, leafReader, readerOrd);
if (sdv != null) {
final BytesRef bRef = sdv.lookupOrd(sdv.ordValue());
// Special handling for Boolean fields since they're stored as 'T' and 'F'.
if (e.schemaField.getType() instanceof BoolField) {
return e.schemaField.getType().toObject(e.schemaField, bRef);
} else {
return bRef.utf8ToString();
}
}
return null;
case SORTED_NUMERIC:
final SortedNumericDocValues numericDv =
e.getSortedNumericDocValues(localId, leafReader, readerOrd);
if (numericDv != null) {
final int docValueCount = numericDv.docValueCount();
final List