org.apache.lucene.facet.taxonomy.OrdinalMappingLeafReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-facet Show documentation
Show all versions of lucene-facet Show documentation
Apache Lucene (module: facet)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet.taxonomy;
import com.carrotsearch.hppc.IntArrayList;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.FacetsConfig.DimConfig;
import org.apache.lucene.facet.taxonomy.OrdinalsReader.OrdinalsSegmentReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FilterBinaryDocValues;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.FilterSortedNumericDocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/**
* A {@link org.apache.lucene.index.FilterLeafReader} for updating facets ordinal references, based
* on an ordinal map. You should use this code in conjunction with merging taxonomies - after you
* merge taxonomies, you receive an {@link OrdinalMap} which maps the 'old' ordinals to the 'new'
* ones. You can use that map to re-map the doc values which contain the facets information
* (ordinals) either before or while merging the indexes.
*
* For re-mapping the ordinals during index merge, do the following:
*
*
* // merge the old taxonomy with the new one.
* OrdinalMap map = new MemoryOrdinalMap();
* DirectoryTaxonomyWriter.addTaxonomy(srcTaxoDir, map);
* int[] ordmap = map.getMap();
*
* // Add the index and re-map ordinals on the go
* DirectoryReader reader = DirectoryReader.open(oldDir);
* IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
* IndexWriter writer = new IndexWriter(newDir, conf);
* List<LeafReaderContext> leaves = reader.leaves();
* LeafReader wrappedLeaves[] = new LeafReader[leaves.size()];
* for (int i = 0; i < leaves.size(); i++) {
* wrappedLeaves[i] = new OrdinalMappingLeafReader(leaves.get(i).reader(), ordmap);
* }
* writer.addIndexes(new MultiReader(wrappedLeaves));
* writer.commit();
*
*
* @lucene.experimental
*/
public class OrdinalMappingLeafReader extends FilterLeafReader {
// silly way, but we need to use dedupAndEncode and it's protected on FacetsConfig.
private static class InnerFacetsConfig extends FacetsConfig {
InnerFacetsConfig() {}
@Override
public BytesRef dedupAndEncode(IntsRef ordinals) {
return super.dedupAndEncode(ordinals);
}
}
private class OrdinalMappingBinaryDocValues extends FilterBinaryDocValues {
private final IntsRef ordinals = new IntsRef(32);
private final OrdinalsSegmentReader ordsReader;
OrdinalMappingBinaryDocValues(OrdinalsSegmentReader ordsReader, BinaryDocValues in)
throws IOException {
super(in);
this.ordsReader = ordsReader;
}
@SuppressWarnings("synthetic-access")
@Override
public BytesRef binaryValue() {
try {
// NOTE: this isn't quite koscher, because in general
// multiple threads can call BinaryDV.get which would
// then conflict on the single ordinals instance, but
// because this impl is only used for merging, we know
// only 1 thread calls us:
ordsReader.get(docID(), ordinals);
// map the ordinals
for (int i = 0; i < ordinals.length; i++) {
ordinals.ints[i] = ordinalMap[ordinals.ints[i]];
}
return encode(ordinals);
} catch (IOException e) {
throw new RuntimeException("error reading category ordinals for doc " + docID(), e);
}
}
}
private class OrdinalMappingSortedNumericDocValues extends FilterSortedNumericDocValues {
private final IntArrayList currentValues;
private int currIndex;
OrdinalMappingSortedNumericDocValues(SortedNumericDocValues in) {
super(in);
currentValues = new IntArrayList(32);
}
@Override
public boolean advanceExact(int target) throws IOException {
boolean result = in.advanceExact(target);
if (result) {
reloadValues();
}
return result;
}
@Override
public int advance(int target) throws IOException {
int result = in.advance(target);
if (result != DocIdSetIterator.NO_MORE_DOCS) {
reloadValues();
}
return result;
}
@Override
public int nextDoc() throws IOException {
int result = in.nextDoc();
if (result != DocIdSetIterator.NO_MORE_DOCS) {
reloadValues();
}
return result;
}
@Override
public int docValueCount() {
return currentValues.elementsCount;
}
private void reloadValues() throws IOException {
currIndex = 0;
currentValues.clear();
for (int i = 0; i < in.docValueCount(); i++) {
int originalOrd = Math.toIntExact(in.nextValue());
currentValues.add(ordinalMap[originalOrd]);
}
Arrays.sort(currentValues.buffer, 0, currentValues.elementsCount);
}
@Override
public long nextValue() {
assert currIndex < currentValues.size();
int actual = currentValues.get(currIndex);
currIndex++;
return actual;
}
}
private final int[] ordinalMap;
private final InnerFacetsConfig facetsConfig;
private final Set facetFields;
/**
* Wraps an LeafReader, mapping ordinals according to the ordinalMap, using the provided {@link
* FacetsConfig} which was used to build the wrapped reader.
*/
public OrdinalMappingLeafReader(LeafReader in, int[] ordinalMap, FacetsConfig srcConfig) {
super(in);
this.ordinalMap = ordinalMap;
facetsConfig = new InnerFacetsConfig();
facetFields = new HashSet<>();
for (DimConfig dc : srcConfig.getDimConfigs().values()) {
facetFields.add(dc.indexFieldName);
}
// always add the default indexFieldName. This is because FacetsConfig does
// not explicitly record dimensions that were indexed under the default
// DimConfig, unless they have a custom DimConfig.
facetFields.add(FacetsConfig.DEFAULT_DIM_CONFIG.indexFieldName);
}
/**
* Expert: encodes category ordinals into a BytesRef. Override in case you use custom encoding,
* other than the default done by FacetsConfig.
*
* @deprecated Custom binary formats are no longer directly supported for taxonomy faceting
* starting in Lucene 9
*/
@Deprecated
protected BytesRef encode(IntsRef ordinals) {
return facetsConfig.dedupAndEncode(ordinals);
}
/**
* Expert: override in case you used custom encoding for the categories under this field.
*
* @deprecated Custom binary formats are no longer directly supported for taxonomy faceting
* starting in Lucene 9
*/
@Deprecated
protected OrdinalsReader getOrdinalsReader(String field) {
return new DocValuesOrdinalsReader(field);
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
BinaryDocValues original = in.getBinaryDocValues(field);
if (original != null && facetFields.contains(field)) {
// The requested field is a facet ordinals field _and_ it's non-null, so move forward with
// mapping:
final OrdinalsReader ordsReader = getOrdinalsReader(field);
return new OrdinalMappingBinaryDocValues(ordsReader.getReader(in.getContext()), original);
} else {
// The requested field either isn't present (null) or isn't a facet ordinals field. Either
// way, just return the original:
return original;
}
}
@Override
public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
SortedNumericDocValues original = in.getSortedNumericDocValues(field);
if (original != null && facetFields.contains(field)) {
// The requested field is a facet ordinals field _and_ it's non-null, so move forward with
// mapping:
return new OrdinalMappingSortedNumericDocValues(original);
} else {
// The requested field either isn't present (null) or isn't a facet ordinals field. Either
// way, just return the original:
return original;
}
}
@Override
public CacheHelper getCoreCacheHelper() {
return null;
}
@Override
public CacheHelper getReaderCacheHelper() {
return null;
}
}