org.apache.lucene.codecs.FieldsConsumer Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.MappedMultiFields;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.ReaderSlice;
/**
* Abstract API that consumes terms, doc, freq, prox, offset and
* payloads postings. Concrete implementations of this
* actually do "something" with the postings (write it into
* the index in a specific format).
*
* @lucene.experimental
*/
public abstract class FieldsConsumer implements Closeable {
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
protected FieldsConsumer() {
}
// TODO: can we somehow compute stats for you...?
// TODO: maybe we should factor out "limited" (only
// iterables, no counts/stats) base classes from
// Fields/Terms/Docs/AndPositions?
/** Write all fields, terms and postings. This the "pull"
* API, allowing you to iterate more than once over the
* postings, somewhat analogous to using a DOM API to
* traverse an XML tree.
*
* Notes:
*
*
* - You must compute index statistics,
* including each Term's docFreq and totalTermFreq,
* as well as the summary sumTotalTermFreq,
* sumTotalDocFreq and docCount.
*
*
- You must skip terms that have no docs and
* fields that have no terms, even though the provided
* Fields API will expose them; this typically
* requires lazily writing the field or term until
* you've actually seen the first term or
* document.
*
*
- The provided Fields instance is limited: you
* cannot call any methods that return
* statistics/counts; you cannot pass a non-null
* live docs when pulling docs/positions enums.
*
*/
public abstract void write(Fields fields) throws IOException;
/** Merges in the fields from the readers in
* mergeState
. The default implementation skips
* and maps around deleted documents, and calls {@link #write(Fields)}.
* Implementations can override this method for more sophisticated
* merging (bulk-byte copying, etc). */
public void merge(MergeState mergeState) throws IOException {
final List fields = new ArrayList<>();
final List slices = new ArrayList<>();
int docBase = 0;
for(int readerIndex=0;readerIndex
© 2015 - 2025 Weber Informatics LLC | Privacy Policy