All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.MultiFields Maven / Gradle / Ivy

There is a newer version: 9.10.0
Show newest version
package org.apache.lucene.index;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.MergedIterator;

/**
 * Exposes flex API, merged from flex API of sub-segments.
 * This is useful when you're interacting with an {@link
 * IndexReader} implementation that consists of sequential
 * sub-readers (eg {@link DirectoryReader} or {@link
 * MultiReader}).
 *
 * 

NOTE: for composite readers, you'll get better * performance by gathering the sub readers using * {@link IndexReader#getContext()} to get the * atomic leaves and then operate per-LeafReader, * instead of using this class. * * @lucene.experimental */ public final class MultiFields extends Fields { private final Fields[] subs; private final ReaderSlice[] subSlices; private final Map terms = new ConcurrentHashMap<>(); /** Returns a single {@link Fields} instance for this * reader, merging fields/terms/docs/positions on the * fly. This method will return null if the reader * has no postings. * *

NOTE: this is a slow way to access postings. * It's better to get the sub-readers and iterate through them * yourself. */ public static Fields getFields(IndexReader reader) throws IOException { final List leaves = reader.leaves(); switch (leaves.size()) { case 1: // already an atomic reader / reader with one leave return leaves.get(0).reader().fields(); default: final List fields = new ArrayList<>(leaves.size()); final List slices = new ArrayList<>(leaves.size()); for (final LeafReaderContext ctx : leaves) { final LeafReader r = ctx.reader(); final Fields f = r.fields(); fields.add(f); slices.add(new ReaderSlice(ctx.docBase, r.maxDoc(), fields.size()-1)); } if (fields.size() == 1) { return fields.get(0); } else { return new MultiFields(fields.toArray(Fields.EMPTY_ARRAY), slices.toArray(ReaderSlice.EMPTY_ARRAY)); } } } /** Returns a single {@link Bits} instance for this * reader, merging live Documents on the * fly. This method will return null if the reader * has no deletions. * *

NOTE: this is a very slow way to access live docs. * For example, each Bits access will require a binary search. * It's better to get the sub-readers and iterate through them * yourself. */ public static Bits getLiveDocs(IndexReader reader) { if (reader.hasDeletions()) { final List leaves = reader.leaves(); final int size = leaves.size(); assert size > 0 : "A reader with deletions must have at least one leave"; if (size == 1) { return leaves.get(0).reader().getLiveDocs(); } final Bits[] liveDocs = new Bits[size]; final int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { // record all liveDocs, even if they are null final LeafReaderContext ctx = leaves.get(i); liveDocs[i] = ctx.reader().getLiveDocs(); starts[i] = ctx.docBase; } starts[size] = reader.maxDoc(); return new MultiBits(liveDocs, starts, true); } else { return null; } } /** This method may return null if the field does not exist.*/ public static Terms getTerms(IndexReader r, String field) throws IOException { return getFields(r).terms(field); } /** Returns {@link PostingsEnum} for the specified field and * term. This will return null if the field or term does * not exist. */ public static PostingsEnum getTermDocsEnum(IndexReader r, String field, BytesRef term) throws IOException { return getTermDocsEnum(r, field, term, PostingsEnum.FREQS); } /** Returns {@link PostingsEnum} for the specified field and * term, with control over whether freqs are required. * Some codecs may be able to optimize their * implementation when freqs are not required. This will * return null if the field or term does not exist. See {@link * TermsEnum#postings(PostingsEnum,int)}.*/ public static PostingsEnum getTermDocsEnum(IndexReader r, String field, BytesRef term, int flags) throws IOException { assert field != null; assert term != null; final Terms terms = getTerms(r, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term)) { return termsEnum.postings(null, flags); } } return null; } /** Returns {@link PostingsEnum} for the specified * field and term. This will return null if the field or * term does not exist or positions were not indexed. * @see #getTermPositionsEnum(IndexReader, String, BytesRef, int) */ public static PostingsEnum getTermPositionsEnum(IndexReader r, String field, BytesRef term) throws IOException { return getTermPositionsEnum(r, field, term, PostingsEnum.ALL); } /** Returns {@link PostingsEnum} for the specified * field and term, with control over whether offsets and payloads are * required. Some codecs may be able to optimize * their implementation when offsets and/or payloads are not * required. This will return null if the field or term does not * exist. See {@link TermsEnum#postings(PostingsEnum,int)}. */ public static PostingsEnum getTermPositionsEnum(IndexReader r, String field, BytesRef term, int flags) throws IOException { assert field != null; assert term != null; final Terms terms = getTerms(r, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term)) { return termsEnum.postings(null, flags); } } return null; } /** * Expert: construct a new MultiFields instance directly. * @lucene.internal */ // TODO: why is this public? public MultiFields(Fields[] subs, ReaderSlice[] subSlices) { this.subs = subs; this.subSlices = subSlices; } @SuppressWarnings({"unchecked","rawtypes"}) @Override public Iterator iterator() { Iterator subIterators[] = new Iterator[subs.length]; for(int i=0;i(subIterators); } @Override public Terms terms(String field) throws IOException { Terms result = terms.get(field); if (result != null) return result; // Lazy init: first time this field is requested, we // create & add to terms: final List subs2 = new ArrayList<>(); final List slices2 = new ArrayList<>(); // Gather all sub-readers that share this field for(int i=0;i * NOTE: the returned field numbers will likely not * correspond to the actual field numbers in the underlying * readers, and codec metadata ({@link FieldInfo#getAttribute(String)} * will be unavailable. */ public static FieldInfos getMergedFieldInfos(IndexReader reader) { final FieldInfos.Builder builder = new FieldInfos.Builder(); for(final LeafReaderContext ctx : reader.leaves()) { builder.add(ctx.reader().getFieldInfos()); } return builder.finish(); } /** Call this to get the (merged) FieldInfos representing the * set of indexed fields only for a composite reader. *

* NOTE: the returned field numbers will likely not * correspond to the actual field numbers in the underlying * readers, and codec metadata ({@link FieldInfo#getAttribute(String)} * will be unavailable. */ public static Collection getIndexedFields(IndexReader reader) { final Collection fields = new HashSet<>(); for(final FieldInfo fieldInfo : getMergedFieldInfos(reader)) { if (fieldInfo.getIndexOptions() != IndexOptions.NONE) { fields.add(fieldInfo.name); } } return fields; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy