org.apache.lucene.index.MultiFields Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.MergedIterator;
/**
* Exposes flex API, merged from flex API of sub-segments.
* This is useful when you're interacting with an {@link
* IndexReader} implementation that consists of sequential
* sub-readers (eg {@link DirectoryReader} or {@link
* MultiReader}).
*
* NOTE: for composite readers, you'll get better
* performance by gathering the sub readers using
* {@link IndexReader#getContext()} to get the
* atomic leaves and then operate per-LeafReader,
* instead of using this class.
*
* @lucene.experimental
*/
public final class MultiFields extends Fields {
private final Fields[] subs;
private final ReaderSlice[] subSlices;
private final Map terms = new ConcurrentHashMap<>();
/** Returns a single {@link Fields} instance for this
* reader, merging fields/terms/docs/positions on the
* fly. This method will return null if the reader
* has no postings.
*
* NOTE: this is a slow way to access postings.
* It's better to get the sub-readers and iterate through them
* yourself. */
public static Fields getFields(IndexReader reader) throws IOException {
final List leaves = reader.leaves();
switch (leaves.size()) {
case 1:
// already an atomic reader / reader with one leave
return leaves.get(0).reader().fields();
default:
final List fields = new ArrayList<>(leaves.size());
final List slices = new ArrayList<>(leaves.size());
for (final LeafReaderContext ctx : leaves) {
final LeafReader r = ctx.reader();
final Fields f = r.fields();
fields.add(f);
slices.add(new ReaderSlice(ctx.docBase, r.maxDoc(), fields.size()-1));
}
if (fields.size() == 1) {
return fields.get(0);
} else {
return new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
slices.toArray(ReaderSlice.EMPTY_ARRAY));
}
}
}
/** Returns a single {@link Bits} instance for this
* reader, merging live Documents on the
* fly. This method will return null if the reader
* has no deletions.
*
* NOTE: this is a very slow way to access live docs.
* For example, each Bits access will require a binary search.
* It's better to get the sub-readers and iterate through them
* yourself. */
public static Bits getLiveDocs(IndexReader reader) {
if (reader.hasDeletions()) {
final List leaves = reader.leaves();
final int size = leaves.size();
assert size > 0 : "A reader with deletions must have at least one leave";
if (size == 1) {
return leaves.get(0).reader().getLiveDocs();
}
final Bits[] liveDocs = new Bits[size];
final int[] starts = new int[size + 1];
for (int i = 0; i < size; i++) {
// record all liveDocs, even if they are null
final LeafReaderContext ctx = leaves.get(i);
liveDocs[i] = ctx.reader().getLiveDocs();
starts[i] = ctx.docBase;
}
starts[size] = reader.maxDoc();
return new MultiBits(liveDocs, starts, true);
} else {
return null;
}
}
/** This method may return null if the field does not exist.*/
public static Terms getTerms(IndexReader r, String field) throws IOException {
return getFields(r).terms(field);
}
/** Returns {@link PostingsEnum} for the specified field and
* term. This will return null if the field or term does
* not exist. */
public static PostingsEnum getTermDocsEnum(IndexReader r, String field, BytesRef term) throws IOException {
return getTermDocsEnum(r, field, term, PostingsEnum.FREQS);
}
/** Returns {@link PostingsEnum} for the specified field and
* term, with control over whether freqs are required.
* Some codecs may be able to optimize their
* implementation when freqs are not required. This will
* return null if the field or term does not exist. See {@link
* TermsEnum#postings(PostingsEnum,int)}.*/
public static PostingsEnum getTermDocsEnum(IndexReader r, String field, BytesRef term, int flags) throws IOException {
assert field != null;
assert term != null;
final Terms terms = getTerms(r, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(term)) {
return termsEnum.postings(null, flags);
}
}
return null;
}
/** Returns {@link PostingsEnum} for the specified
* field and term. This will return null if the field or
* term does not exist or positions were not indexed.
* @see #getTermPositionsEnum(IndexReader, String, BytesRef, int) */
public static PostingsEnum getTermPositionsEnum(IndexReader r, String field, BytesRef term) throws IOException {
return getTermPositionsEnum(r, field, term, PostingsEnum.ALL);
}
/** Returns {@link PostingsEnum} for the specified
* field and term, with control over whether offsets and payloads are
* required. Some codecs may be able to optimize
* their implementation when offsets and/or payloads are not
* required. This will return null if the field or term does not
* exist. See {@link TermsEnum#postings(PostingsEnum,int)}. */
public static PostingsEnum getTermPositionsEnum(IndexReader r, String field, BytesRef term, int flags) throws IOException {
assert field != null;
assert term != null;
final Terms terms = getTerms(r, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(term)) {
return termsEnum.postings(null, flags);
}
}
return null;
}
/**
* Expert: construct a new MultiFields instance directly.
* @lucene.internal
*/
// TODO: why is this public?
public MultiFields(Fields[] subs, ReaderSlice[] subSlices) {
this.subs = subs;
this.subSlices = subSlices;
}
@SuppressWarnings({"unchecked","rawtypes"})
@Override
public Iterator iterator() {
Iterator subIterators[] = new Iterator[subs.length];
for(int i=0;i(subIterators);
}
@Override
public Terms terms(String field) throws IOException {
Terms result = terms.get(field);
if (result != null)
return result;
// Lazy init: first time this field is requested, we
// create & add to terms:
final List subs2 = new ArrayList<>();
final List slices2 = new ArrayList<>();
// Gather all sub-readers that share this field
for(int i=0;i
* NOTE: the returned field numbers will likely not
* correspond to the actual field numbers in the underlying
* readers, and codec metadata ({@link FieldInfo#getAttribute(String)}
* will be unavailable.
*/
public static FieldInfos getMergedFieldInfos(IndexReader reader) {
final FieldInfos.Builder builder = new FieldInfos.Builder();
for(final LeafReaderContext ctx : reader.leaves()) {
builder.add(ctx.reader().getFieldInfos());
}
return builder.finish();
}
/** Call this to get the (merged) FieldInfos representing the
* set of indexed fields only for a composite reader.
*
* NOTE: the returned field numbers will likely not
* correspond to the actual field numbers in the underlying
* readers, and codec metadata ({@link FieldInfo#getAttribute(String)}
* will be unavailable.
*/
public static Collection getIndexedFields(IndexReader reader) {
final Collection fields = new HashSet<>();
for(final FieldInfo fieldInfo : getMergedFieldInfos(reader)) {
if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
fields.add(fieldInfo.name);
}
}
return fields;
}
}