All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.index.SlowCompositeReaderWrapper Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.index;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.CompositeReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafMetaData;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiBits;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.packed.PackedInts;

/**
 * This class forces a composite reader (eg a {@link MultiReader} or {@link DirectoryReader}) to
 * emulate a {@link LeafReader}. This requires implementing the postings APIs on-the-fly, using the
 * static methods in {@link MultiTerms}, {@link MultiDocValues}, by stepping through the sub-readers
 * to merge fields/terms, appending docs, etc.
 *
 * 

NOTE: this class almost always results in a performance hit. If this is important to * your use case, you'll get better performance by gathering the sub readers using {@link * IndexReader#getContext()} to get the leaves and then operate per-LeafReader, instead of using * this class. */ public final class SlowCompositeReaderWrapper extends LeafReader { private final CompositeReader in; private final LeafMetaData metaData; // Cached copy of FieldInfos to prevent it from being re-created on each // getFieldInfos call. Most (if not all) other LeafReader implementations // also have a cached FieldInfos instance so this is consistent. SOLR-12878 private final FieldInfos fieldInfos; // TODO: this could really be a weak map somewhere else on the coreCacheKey, // but do we really need to optimize slow-wrapper any more? final Map cachedOrdMaps = new ConcurrentHashMap<>(); /** * This method is sugar for getting an {@link LeafReader} from an {@link IndexReader} of any kind. * If the reader is already atomic, it is returned unchanged, otherwise wrapped by this class. */ public static LeafReader wrap(IndexReader reader) throws IOException { if (reader instanceof CompositeReader) { return new SlowCompositeReaderWrapper((CompositeReader) reader); } else { assert reader instanceof LeafReader; return (LeafReader) reader; } } SlowCompositeReaderWrapper(CompositeReader reader) throws IOException { in = reader; in.registerParentReader(this); if (reader.leaves().isEmpty()) { metaData = new LeafMetaData(Version.LATEST.major, Version.LATEST, null, false); } else { Version minVersion = Version.LATEST; for (LeafReaderContext leafReaderContext : reader.leaves()) { Version leafVersion = leafReaderContext.reader().getMetaData().getMinVersion(); if (leafVersion == null) { minVersion = null; break; } else if (minVersion.onOrAfter(leafVersion)) { minVersion = leafVersion; } } LeafMetaData leafMetaData = reader.leaves().get(0).reader().getMetaData(); metaData = new LeafMetaData( leafMetaData.getCreatedVersionMajor(), minVersion, leafMetaData.getSort(), leafMetaData.hasBlocks()); } fieldInfos = FieldInfos.getMergedFieldInfos(in); } @Override public String toString() { return "SlowCompositeReaderWrapper(" + in + ")"; } @Override public CacheHelper getReaderCacheHelper() { return in.getReaderCacheHelper(); } @Override public CacheHelper getCoreCacheHelper() { // TODO: this is trappy as the expectation is that core keys live for a long // time, but here we need to bound it to the lifetime of the wrapped // composite reader? Unfortunately some features seem to rely on this... return in.getReaderCacheHelper(); } @Override public Terms terms(String field) throws IOException { ensureOpen(); return MultiTerms.getTerms(in, field); } @Override public NumericDocValues getNumericDocValues(String field) throws IOException { ensureOpen(); return MultiDocValues.getNumericValues(in, field); // TODO cache? } @Override public BinaryDocValues getBinaryDocValues(String field) throws IOException { ensureOpen(); return MultiDocValues.getBinaryValues(in, field); // TODO cache? } @Override public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException { ensureOpen(); return MultiDocValues.getSortedNumericValues(in, field); // TODO cache? } @Override public SortedDocValues getSortedDocValues(String field) throws IOException { ensureOpen(); // Integration of what was previously in MultiDocValues.getSortedValues: // The purpose of this integration is to be able to construct a value producer which can always // produce a value that is actually needed. The reason for the producer is to avoid getAndSet // pitfalls in this multithreaded context. // So all cases that do not lead to a cacheable value are handled upfront. // We kept the semantics of MultiDocValues.getSortedValues. final List leaves = in.leaves(); final int size = leaves.size(); if (size == 0) { return null; } else if (size == 1) { return leaves.get(0).reader().getSortedDocValues(field); } boolean anyReal = false; final SortedDocValues[] values = new SortedDocValues[size]; final int[] starts = new int[size + 1]; long totalCost = 0; for (int i = 0; i < size; i++) { LeafReaderContext context = in.leaves().get(i); final LeafReader reader = context.reader(); final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) { return null; } SortedDocValues v = reader.getSortedDocValues(field); if (v == null) { v = DocValues.emptySorted(); } else { anyReal = true; } totalCost += v.cost(); values[i] = v; starts[i] = context.docBase; } starts[size] = maxDoc(); if (anyReal == false) { return null; } // at this point in time we are able to formulate the producer OrdinalMap map = null; CacheHelper cacheHelper = getReaderCacheHelper(); Function producer = (notUsed) -> { try { OrdinalMap mapping = OrdinalMap.build( cacheHelper == null ? null : cacheHelper.getKey(), values, PackedInts.DEFAULT); return mapping; } catch (IOException e) { throw new RuntimeException(e); } }; // either we use a cached result that gets produced eventually during caching, // or we produce directly without caching if (cacheHelper != null) { map = cachedOrdMaps.computeIfAbsent(field + cacheHelper.getKey(), producer); } else { map = producer.apply("notUsed"); } return new MultiSortedDocValues(values, starts, map, totalCost); } @Override public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { ensureOpen(); // Integration of what was previously in MultiDocValues.getSortedSetValues: // The purpose of this integration is to be able to construct a value producer which can always // produce a value that is actually needed. The reason for the producer is to avoid getAndSet // pitfalls in this multithreaded context. // So all cases that do not lead to a cacheable value are handled upfront. // We kept the semantics of MultiDocValues.getSortedSetValues. final List leaves = in.leaves(); final int size = leaves.size(); if (size == 0) { return null; } else if (size == 1) { return leaves.get(0).reader().getSortedSetDocValues(field); } boolean anyReal = false; final SortedSetDocValues[] values = new SortedSetDocValues[size]; final int[] starts = new int[size + 1]; long totalCost = 0; for (int i = 0; i < size; i++) { LeafReaderContext context = in.leaves().get(i); final LeafReader reader = context.reader(); final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) { return null; } SortedSetDocValues v = reader.getSortedSetDocValues(field); if (v == null) { v = DocValues.emptySortedSet(); } else { anyReal = true; } totalCost += v.cost(); values[i] = v; starts[i] = context.docBase; } starts[size] = maxDoc(); if (anyReal == false) { return null; } // at this point in time we are able to formulate the producer OrdinalMap map = null; CacheHelper cacheHelper = getReaderCacheHelper(); Function producer = (notUsed) -> { try { OrdinalMap mapping = OrdinalMap.build( cacheHelper == null ? null : cacheHelper.getKey(), values, PackedInts.DEFAULT); return mapping; } catch (IOException e) { throw new RuntimeException(e); } }; // either we use a cached result that gets produced eventually during caching, // or we produce directly without caching if (cacheHelper != null) { map = cachedOrdMaps.computeIfAbsent(field + cacheHelper.getKey(), producer); } else { map = producer.apply("notUsed"); } return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, totalCost); } @Override public NumericDocValues getNormValues(String field) throws IOException { ensureOpen(); return MultiDocValues.getNormValues(in, field); // TODO cache? } @Override public Fields getTermVectors(int docID) throws IOException { return in.getTermVectors(docID); } @Override public TermVectors termVectors() throws IOException { ensureOpen(); return in.termVectors(); } @Override public StoredFields storedFields() throws IOException { ensureOpen(); return in.storedFields(); } @Override public int numDocs() { // Don't call ensureOpen() here (it could affect performance) return in.numDocs(); } @Override public int maxDoc() { // Don't call ensureOpen() here (it could affect performance) return in.maxDoc(); } @Override public void document(int docID, StoredFieldVisitor visitor) throws IOException { ensureOpen(); in.document(docID, visitor); } @Override public Bits getLiveDocs() { ensureOpen(); return MultiBits.getLiveDocs(in); // TODO cache? } @Override public PointValues getPointValues(String field) { ensureOpen(); throw new UnsupportedOperationException(); } @Override public FloatVectorValues getFloatVectorValues(String field) { ensureOpen(); throw new UnsupportedOperationException(); } @Override public ByteVectorValues getByteVectorValues(String field) { ensureOpen(); throw new UnsupportedOperationException(); } @Override public void searchNearestVectors( String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { throw new UnsupportedOperationException(); } @Override public void searchNearestVectors( String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { throw new UnsupportedOperationException(); } @Override public FieldInfos getFieldInfos() { return fieldInfos; } @Override protected void doClose() throws IOException { // TODO: as this is a wrapper, should we really close the delegate? in.close(); } @Override public void checkIntegrity() throws IOException { ensureOpen(); for (LeafReaderContext ctx : in.leaves()) { ctx.reader().checkIntegrity(); } } @Override public LeafMetaData getMetaData() { return metaData; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy