All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.TermStates Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;
import java.util.Arrays;

/**
 * Maintains a {@link IndexReader} {@link TermState} view over
 * {@link IndexReader} instances containing a single term. The
 * {@link TermStates} doesn't track if the given {@link TermState}
 * objects are valid, neither if the {@link TermState} instances refer to the
 * same terms in the associated readers.
 * 
 * @lucene.experimental
 */
public final class TermStates {

  private static final TermState EMPTY_TERMSTATE = new TermState() {
    @Override
    public void copyFrom(TermState other) {

    }
  };

  // Important: do NOT keep hard references to index readers
  private final Object topReaderContextIdentity;
  private final TermState[] states;
  private final Term term;  // null if stats are to be used
  private int docFreq;
  private long totalTermFreq;

  //public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;

  private TermStates(Term term, IndexReaderContext context) {
    assert context != null && context.isTopLevel;
    topReaderContextIdentity = context.identity;
    docFreq = 0;
    totalTermFreq = 0;
    final int len;
    if (context.leaves() == null) {
      len = 1;
    } else {
      len = context.leaves().size();
    }
    states = new TermState[len];
    this.term = term;
  }

  /**
   * Creates an empty {@link TermStates} from a {@link IndexReaderContext}
   */
  public TermStates(IndexReaderContext context) {
    this(null, context);
  }

  /**
   * Expert: Return whether this {@link TermStates} was built for the given
   * {@link IndexReaderContext}. This is typically used for assertions.
   * @lucene.internal
   */
  public boolean wasBuiltFor(IndexReaderContext context) {
    return topReaderContextIdentity == context.identity;
  }

  /**
   * Creates a {@link TermStates} with an initial {@link TermState},
   * {@link IndexReader} pair.
   */
  public TermStates(IndexReaderContext context, TermState state, int ord, int docFreq, long totalTermFreq) {
    this(null, context);
    register(state, ord, docFreq, totalTermFreq);
  }

  /**
   * Creates a {@link TermStates} from a top-level {@link IndexReaderContext} and the
   * given {@link Term}. This method will lookup the given term in all context's leaf readers 
   * and register each of the readers containing the term in the returned {@link TermStates}
   * using the leaf reader's ordinal.
   * 

* Note: the given context must be a top-level context. * * @param needsStats if {@code true} then all leaf contexts will be visited up-front to * collect term statistics. Otherwise, the {@link TermState} objects * will be built only when requested */ public static TermStates build(IndexReaderContext context, Term term, boolean needsStats) throws IOException { assert context != null && context.isTopLevel; final TermStates perReaderTermState = new TermStates(needsStats ? null : term, context); if (needsStats) { for (final LeafReaderContext ctx : context.leaves()) { //if (DEBUG) System.out.println(" r=" + leaves[i].reader); TermsEnum termsEnum = loadTermsEnum(ctx, term); if (termsEnum != null) { final TermState termState = termsEnum.termState(); //if (DEBUG) System.out.println(" found"); perReaderTermState.register(termState, ctx.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } } } return perReaderTermState; } private static TermsEnum loadTermsEnum(LeafReaderContext ctx, Term term) throws IOException { final Terms terms = ctx.reader().terms(term.field()); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term.bytes())) { return termsEnum; } } return null; } /** * Clears the {@link TermStates} internal state and removes all * registered {@link TermState}s */ public void clear() { docFreq = 0; totalTermFreq = 0; Arrays.fill(states, null); } /** * Registers and associates a {@link TermState} with an leaf ordinal. The leaf ordinal * should be derived from a {@link IndexReaderContext}'s leaf ord. */ public void register(TermState state, final int ord, final int docFreq, final long totalTermFreq) { register(state, ord); accumulateStatistics(docFreq, totalTermFreq); } /** * Expert: Registers and associates a {@link TermState} with an leaf ordinal. The * leaf ordinal should be derived from a {@link IndexReaderContext}'s leaf ord. * On the contrary to {@link #register(TermState, int, int, long)} this method * does NOT update term statistics. */ public void register(TermState state, final int ord) { assert state != null : "state must not be null"; assert ord >= 0 && ord < states.length; assert states[ord] == null : "state for ord: " + ord + " already registered"; states[ord] = state; } /** Expert: Accumulate term statistics. */ public void accumulateStatistics(final int docFreq, final long totalTermFreq) { assert docFreq >= 0; assert totalTermFreq >= 0; assert docFreq <= totalTermFreq; this.docFreq += docFreq; this.totalTermFreq += totalTermFreq; } /** * Returns the {@link TermState} for a leaf reader context or null if no * {@link TermState} for the context was registered. * * @param ctx * the {@link LeafReaderContext} to get the {@link TermState} for. * @return the {@link TermState} for the given readers ord or null if no * {@link TermState} for the reader was registered */ public TermState get(LeafReaderContext ctx) throws IOException { assert ctx.ord >= 0 && ctx.ord < states.length; if (term == null) return states[ctx.ord]; if (this.states[ctx.ord] == null) { TermsEnum te = loadTermsEnum(ctx, term); this.states[ctx.ord] = te == null ? EMPTY_TERMSTATE : te.termState(); } if (this.states[ctx.ord] == EMPTY_TERMSTATE) return null; return this.states[ctx.ord]; } /** * Returns the accumulated document frequency of all {@link TermState} * instances passed to {@link #register(TermState, int, int, long)}. * @return the accumulated document frequency of all {@link TermState} * instances passed to {@link #register(TermState, int, int, long)}. */ public int docFreq() { if (term != null) { throw new IllegalStateException("Cannot call docFreq() when needsStats=false"); } return docFreq; } /** * Returns the accumulated term frequency of all {@link TermState} * instances passed to {@link #register(TermState, int, int, long)}. * @return the accumulated term frequency of all {@link TermState} * instances passed to {@link #register(TermState, int, int, long)}. */ public long totalTermFreq() { if (term != null) { throw new IllegalStateException("Cannot call totalTermFreq() when needsStats=false"); } return totalTermFreq; } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("TermStates\n"); for(TermState termState : states) { sb.append(" state="); sb.append(termState); sb.append('\n'); } return sb.toString(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy