All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.DocValuesConsumer Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs;

import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocIDMerger;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.EmptyDocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentWriteState; // javadocs
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.packed.PackedInts;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

/** 
 * Abstract API that consumes numeric, binary and
 * sorted docvalues.  Concrete implementations of this
 * actually do "something" with the docvalues (write it into
 * the index in a specific format).
 * 

* The lifecycle is: *

    *
  1. DocValuesConsumer is created by * {@link NormsFormat#normsConsumer(SegmentWriteState)}. *
  2. {@link #addNumericField}, {@link #addBinaryField}, * {@link #addSortedField}, {@link #addSortedSetField}, * or {@link #addSortedNumericField} are called for each Numeric, * Binary, Sorted, SortedSet, or SortedNumeric docvalues field. * The API is a "pull" rather than "push", and the implementation * is free to iterate over the values multiple times * ({@link Iterable#iterator()}). *
  3. After all fields are added, the consumer is {@link #close}d. *
* * @lucene.experimental */ public abstract class DocValuesConsumer implements Closeable { /** Sole constructor. (For invocation by subclass * constructors, typically implicit.) */ protected DocValuesConsumer() {} /** * Writes numeric docvalues for a field. * @param field field information * @param valuesProducer Numeric values to write. * @throws IOException if an I/O error occurred. */ public abstract void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException; /** * Writes binary docvalues for a field. * @param field field information * @param valuesProducer Binary values to write. * @throws IOException if an I/O error occurred. */ public abstract void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException; /** * Writes pre-sorted binary docvalues for a field. * @param field field information * @param valuesProducer produces the values and ordinals to write * @throws IOException if an I/O error occurred. */ public abstract void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException; /** * Writes pre-sorted numeric docvalues for a field * @param field field information * @param valuesProducer produces the values to write * @throws IOException if an I/O error occurred. */ public abstract void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException; /** * Writes pre-sorted set docvalues for a field * @param field field information * @param valuesProducer produces the values to write * @throws IOException if an I/O error occurred. */ public abstract void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException; /** Merges in the fields from the readers in * mergeState. The default implementation * calls {@link #mergeNumericField}, {@link #mergeBinaryField}, * {@link #mergeSortedField}, {@link #mergeSortedSetField}, * or {@link #mergeSortedNumericField} for each field, * depending on its type. * Implementations can override this method * for more sophisticated merging (bulk-byte copying, etc). */ public void merge(MergeState mergeState) throws IOException { for(DocValuesProducer docValuesProducer : mergeState.docValuesProducers) { if (docValuesProducer != null) { docValuesProducer.checkIntegrity(); } } for (FieldInfo mergeFieldInfo : mergeState.mergeFieldInfos) { DocValuesType type = mergeFieldInfo.getDocValuesType(); if (type != DocValuesType.NONE) { if (type == DocValuesType.NUMERIC) { mergeNumericField(mergeFieldInfo, mergeState); } else if (type == DocValuesType.BINARY) { mergeBinaryField(mergeFieldInfo, mergeState); } else if (type == DocValuesType.SORTED) { mergeSortedField(mergeFieldInfo, mergeState); } else if (type == DocValuesType.SORTED_SET) { mergeSortedSetField(mergeFieldInfo, mergeState); } else if (type == DocValuesType.SORTED_NUMERIC) { mergeSortedNumericField(mergeFieldInfo, mergeState); } else { throw new AssertionError("type=" + type); } } } } /** Tracks state of one numeric sub-reader that we are merging */ private static class NumericDocValuesSub extends DocIDMerger.Sub { final NumericDocValues values; public NumericDocValuesSub(MergeState.DocMap docMap, NumericDocValues values) { super(docMap); this.values = values; assert values.docID() == -1; } @Override public int nextDoc() throws IOException { return values.nextDoc(); } } /** * Merges the numeric docvalues from MergeState. *

* The default implementation calls {@link #addNumericField}, passing * a DocValuesProducer that merges and filters deleted documents on the fly. */ public void mergeNumericField(final FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException { addNumericField(mergeFieldInfo, new EmptyDocValuesProducer() { @Override public NumericDocValues getNumeric(FieldInfo fieldInfo) throws IOException { if (fieldInfo != mergeFieldInfo) { throw new IllegalArgumentException("wrong fieldInfo"); } List subs = new ArrayList<>(); assert mergeState.docMaps.length == mergeState.docValuesProducers.length; long cost = 0; for (int i=0;i docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); final long finalCost = cost; return new NumericDocValues() { private int docID = -1; private NumericDocValuesSub current; @Override public int docID() { return docID; } @Override public int nextDoc() throws IOException { current = docIDMerger.next(); if (current == null) { docID = NO_MORE_DOCS; } else { docID = current.mappedDocID; } return docID; } @Override public int advance(int target) throws IOException { throw new UnsupportedOperationException(); } @Override public boolean advanceExact(int target) throws IOException { throw new UnsupportedOperationException(); } @Override public long cost() { return finalCost; } @Override public long longValue() throws IOException { return current.values.longValue(); } }; } }); } /** Tracks state of one binary sub-reader that we are merging */ private static class BinaryDocValuesSub extends DocIDMerger.Sub { final BinaryDocValues values; public BinaryDocValuesSub(MergeState.DocMap docMap, BinaryDocValues values) { super(docMap); this.values = values; assert values.docID() == -1; } @Override public int nextDoc() throws IOException { return values.nextDoc(); } } /** * Merges the binary docvalues from MergeState. *

* The default implementation calls {@link #addBinaryField}, passing * a DocValuesProducer that merges and filters deleted documents on the fly. */ public void mergeBinaryField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException { addBinaryField(mergeFieldInfo, new EmptyDocValuesProducer() { @Override public BinaryDocValues getBinary(FieldInfo fieldInfo) throws IOException { if (fieldInfo != mergeFieldInfo) { throw new IllegalArgumentException("wrong fieldInfo"); } List subs = new ArrayList<>(); long cost = 0; for (int i=0;i docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); final long finalCost = cost; return new BinaryDocValues() { private BinaryDocValuesSub current; private int docID = -1; @Override public int docID() { return docID; } @Override public int nextDoc() throws IOException { current = docIDMerger.next(); if (current == null) { docID = NO_MORE_DOCS; } else { docID = current.mappedDocID; } return docID; } @Override public int advance(int target) throws IOException { throw new UnsupportedOperationException(); } @Override public boolean advanceExact(int target) throws IOException { throw new UnsupportedOperationException(); } @Override public long cost() { return finalCost; } @Override public BytesRef binaryValue() throws IOException { return current.values.binaryValue(); } }; } }); } /** Tracks state of one sorted numeric sub-reader that we are merging */ private static class SortedNumericDocValuesSub extends DocIDMerger.Sub { final SortedNumericDocValues values; public SortedNumericDocValuesSub(MergeState.DocMap docMap, SortedNumericDocValues values) { super(docMap); this.values = values; assert values.docID() == -1; } @Override public int nextDoc() throws IOException { return values.nextDoc(); } } /** * Merges the sorted docvalues from toMerge. *

* The default implementation calls {@link #addSortedNumericField}, passing * iterables that filter deleted documents. */ public void mergeSortedNumericField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException { addSortedNumericField(mergeFieldInfo, new EmptyDocValuesProducer() { @Override public SortedNumericDocValues getSortedNumeric(FieldInfo fieldInfo) throws IOException { if (fieldInfo != mergeFieldInfo) { throw new IllegalArgumentException("wrong FieldInfo"); } // We must make new iterators + DocIDMerger for each iterator: List subs = new ArrayList<>(); long cost = 0; for (int i=0;i docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); return new SortedNumericDocValues() { private int docID = -1; private SortedNumericDocValuesSub currentSub; @Override public int docID() { return docID; } @Override public int nextDoc() throws IOException { currentSub = docIDMerger.next(); if (currentSub == null) { docID = NO_MORE_DOCS; } else { docID = currentSub.mappedDocID; } return docID; } @Override public int advance(int target) throws IOException { throw new UnsupportedOperationException(); } @Override public boolean advanceExact(int target) throws IOException { throw new UnsupportedOperationException(); } @Override public int docValueCount() { return currentSub.values.docValueCount(); } @Override public long cost() { return finalCost; } @Override public long nextValue() throws IOException { return currentSub.values.nextValue(); } }; } }); } /** * A merged {@link TermsEnum}. This helps avoid relying on the default terms enum, * which calls {@link SortedDocValues#lookupOrd(int)} or * {@link SortedSetDocValues#lookupOrd(long)} on every call to {@link TermsEnum#next()}. */ private static class MergedTermsEnum extends TermsEnum { private final TermsEnum[] subs; private final OrdinalMap ordinalMap; private final long valueCount; private long ord = -1; private BytesRef term; MergedTermsEnum(OrdinalMap ordinalMap, TermsEnum[] subs) { this.ordinalMap = ordinalMap; this.subs = subs; this.valueCount = ordinalMap.getValueCount(); } @Override public BytesRef term() throws IOException { return term; } @Override public long ord() throws IOException { return ord; } @Override public BytesRef next() throws IOException { if (++ord >= valueCount) { return null; } final int subNum = ordinalMap.getFirstSegmentNumber(ord); final TermsEnum sub = subs[subNum]; final long subOrd = ordinalMap.getFirstSegmentOrd(ord); do { term = sub.next(); } while (sub.ord() < subOrd); assert sub.ord() == subOrd; return term; } @Override public AttributeSource attributes() { throw new UnsupportedOperationException(); } @Override public boolean seekExact(BytesRef text) throws IOException { throw new UnsupportedOperationException(); } @Override public SeekStatus seekCeil(BytesRef text) throws IOException { throw new UnsupportedOperationException(); } @Override public void seekExact(long ord) throws IOException { throw new UnsupportedOperationException(); } @Override public void seekExact(BytesRef term, TermState state) throws IOException { throw new UnsupportedOperationException(); } @Override public int docFreq() throws IOException { throw new UnsupportedOperationException(); } @Override public long totalTermFreq() throws IOException { throw new UnsupportedOperationException(); } @Override public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(); } @Override public ImpactsEnum impacts(int flags) throws IOException { throw new UnsupportedOperationException(); } @Override public TermState termState() throws IOException { throw new UnsupportedOperationException(); } } /** Tracks state of one sorted sub-reader that we are merging */ private static class SortedDocValuesSub extends DocIDMerger.Sub { final SortedDocValues values; final LongValues map; public SortedDocValuesSub(MergeState.DocMap docMap, SortedDocValues values, LongValues map) { super(docMap); this.values = values; this.map = map; assert values.docID() == -1; } @Override public int nextDoc() throws IOException { return values.nextDoc(); } } /** * Merges the sorted docvalues from toMerge. *

* The default implementation calls {@link #addSortedField}, passing * an Iterable that merges ordinals and values and filters deleted documents . */ public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState) throws IOException { List toMerge = new ArrayList<>(); for (int i=0;i= 0) { bitset.set(ord); } } } liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset); weights[sub] = bitset.cardinality(); } } // step 2: create ordinal map (this conceptually does the "merging") final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT); // step 3: add field addSortedField(fieldInfo, new EmptyDocValuesProducer() { @Override public SortedDocValues getSorted(FieldInfo fieldInfoIn) throws IOException { if (fieldInfoIn != fieldInfo) { throw new IllegalArgumentException("wrong FieldInfo"); } // We must make new iterators + DocIDMerger for each iterator: List subs = new ArrayList<>(); long cost = 0; for (int i=0;i docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); return new SortedDocValues() { private int docID = -1; private int ord; @Override public int docID() { return docID; } @Override public int nextDoc() throws IOException { SortedDocValuesSub sub = docIDMerger.next(); if (sub == null) { return docID = NO_MORE_DOCS; } int subOrd = sub.values.ordValue(); assert subOrd != -1; ord = (int) sub.map.get(subOrd); docID = sub.mappedDocID; return docID; } @Override public int ordValue() { return ord; } @Override public int advance(int target) { throw new UnsupportedOperationException(); } @Override public boolean advanceExact(int target) throws IOException { throw new UnsupportedOperationException(); } @Override public long cost() { return finalCost; } @Override public int getValueCount() { return (int) map.getValueCount(); } @Override public BytesRef lookupOrd(int ord) throws IOException { int segmentNumber = map.getFirstSegmentNumber(ord); int segmentOrd = (int) map.getFirstSegmentOrd(ord); return dvs[segmentNumber].lookupOrd(segmentOrd); } @Override public TermsEnum termsEnum() throws IOException { TermsEnum[] subs = new TermsEnum[toMerge.size()]; for (int sub = 0; sub < subs.length; ++sub) { subs[sub] = toMerge.get(sub).termsEnum(); } return new MergedTermsEnum(map, subs); } }; } }); } /** Tracks state of one sorted set sub-reader that we are merging */ private static class SortedSetDocValuesSub extends DocIDMerger.Sub { final SortedSetDocValues values; final LongValues map; public SortedSetDocValuesSub(MergeState.DocMap docMap, SortedSetDocValues values, LongValues map) { super(docMap); this.values = values; this.map = map; assert values.docID() == -1; } @Override public int nextDoc() throws IOException { return values.nextDoc(); } @Override public String toString() { return "SortedSetDocValuesSub(mappedDocID=" + mappedDocID + " values=" + values + ")"; } } /** * Merges the sortedset docvalues from toMerge. *

* The default implementation calls {@link #addSortedSetField}, passing * an Iterable that merges ordinals and values and filters deleted documents . */ public void mergeSortedSetField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException { List toMerge = new ArrayList<>(); for (int i=0;i subs = new ArrayList<>(); long cost = 0; for (int i=0;i docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); final long finalCost = cost; return new SortedSetDocValues() { private int docID = -1; private SortedSetDocValuesSub currentSub; @Override public int docID() { return docID; } @Override public int nextDoc() throws IOException { currentSub = docIDMerger.next(); if (currentSub == null) { docID = NO_MORE_DOCS; } else { docID = currentSub.mappedDocID; } return docID; } @Override public int advance(int target) throws IOException { throw new UnsupportedOperationException(); } @Override public boolean advanceExact(int target) throws IOException { throw new UnsupportedOperationException(); } @Override public long nextOrd() throws IOException { long subOrd = currentSub.values.nextOrd(); if (subOrd == NO_MORE_ORDS) { return NO_MORE_ORDS; } return currentSub.map.get(subOrd); } @Override public long cost() { return finalCost; } @Override public BytesRef lookupOrd(long ord) throws IOException { int segmentNumber = map.getFirstSegmentNumber(ord); long segmentOrd = map.getFirstSegmentOrd(ord); return toMerge.get(segmentNumber).lookupOrd(segmentOrd); } @Override public long getValueCount() { return map.getValueCount(); } @Override public TermsEnum termsEnum() throws IOException { TermsEnum[] subs = new TermsEnum[toMerge.size()]; for (int sub = 0; sub < subs.length; ++sub) { subs[sub] = toMerge.get(sub).termsEnum(); } return new MergedTermsEnum(map, subs); } }; } }); } // TODO: seek-by-ord to nextSetBit static class BitsFilteredTermsEnum extends FilteredTermsEnum { final LongBitSet liveTerms; BitsFilteredTermsEnum(TermsEnum in, LongBitSet liveTerms) { super(in, false); // <-- not passing false here wasted about 3 hours of my time!!!!!!!!!!!!! assert liveTerms != null; this.liveTerms = liveTerms; } @Override protected AcceptStatus accept(BytesRef term) throws IOException { if (liveTerms.get(ord())) { return AcceptStatus.YES; } else { return AcceptStatus.NO; } } } /** Helper: returns true if the given docToValue count contains only at most one value */ public static boolean isSingleValued(Iterable docToValueCount) { for (Number count : docToValueCount) { if (count.longValue() > 1) { return false; } } return true; } /** Helper: returns single-valued view, using {@code missingValue} when count is zero */ public static Iterable singletonView(final Iterable docToValueCount, final Iterable values, final Number missingValue) { assert isSingleValued(docToValueCount); return new Iterable() { @Override public Iterator iterator() { final Iterator countIterator = docToValueCount.iterator(); final Iterator valuesIterator = values.iterator(); return new Iterator() { @Override public boolean hasNext() { return countIterator.hasNext(); } @Override public Number next() { int count = countIterator.next().intValue(); if (count == 0) { return missingValue; } else { return valuesIterator.next(); } } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy