All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.queries.function.IndexReaderFunctions Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.queries.function;

import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource;

/**
 * Class exposing static helper methods for generating DoubleValuesSource instances over some
 * IndexReader statistics
 */
public final class IndexReaderFunctions {

  // non-instantiable class
  private IndexReaderFunctions() {}

  /**
   * Creates a constant value source returning the docFreq of a given term
   *
   * @see IndexReader#docFreq(Term)
   */
  public static DoubleValuesSource docFreq(Term term) {
    return new IndexReaderDoubleValuesSource(
        r -> (double) r.docFreq(term), "docFreq(" + term.toString() + ")");
  }

  /**
   * Creates a constant value source returning the index's maxDoc
   *
   * @see IndexReader#maxDoc()
   */
  public static DoubleValuesSource maxDoc() {
    return new IndexReaderDoubleValuesSource(IndexReader::maxDoc, "maxDoc()");
  }

  /**
   * Creates a constant value source returning the index's numDocs
   *
   * @see IndexReader#numDocs()
   */
  public static DoubleValuesSource numDocs() {
    return new IndexReaderDoubleValuesSource(IndexReader::numDocs, "numDocs()");
  }

  /**
   * Creates a constant value source returning the number of deleted docs in the index
   *
   * @see IndexReader#numDeletedDocs()
   */
  public static DoubleValuesSource numDeletedDocs() {
    return new IndexReaderDoubleValuesSource(IndexReader::numDeletedDocs, "numDeletedDocs()");
  }

  /**
   * Creates a constant value source returning the sumTotalTermFreq for a field
   *
   * @see IndexReader#getSumTotalTermFreq(String)
   */
  public static LongValuesSource sumTotalTermFreq(String field) {
    return new SumTotalTermFreqValuesSource(field);
  }

  private static class SumTotalTermFreqValuesSource extends LongValuesSource {

    private final String field;

    private SumTotalTermFreqValuesSource(String field) {
      this.field = field;
    }

    @Override
    public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
      throw new UnsupportedOperationException("IndexReaderFunction must be rewritten before use");
    }

    @Override
    public boolean needsScores() {
      return false;
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) return true;
      if (o == null || getClass() != o.getClass()) return false;
      SumTotalTermFreqValuesSource that = (SumTotalTermFreqValuesSource) o;
      return Objects.equals(field, that.field);
    }

    @Override
    public int hashCode() {
      return Objects.hash(field);
    }

    @Override
    public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
      return new NoCacheConstantLongValuesSource(
          searcher.getIndexReader().getSumTotalTermFreq(field), this);
    }

    @Override
    public String toString() {
      return "sumTotalTermFreq(" + field + ")";
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return false;
    }
  }

  private static class NoCacheConstantLongValuesSource extends LongValuesSource {

    final long value;
    final LongValuesSource parent;

    private NoCacheConstantLongValuesSource(long value, LongValuesSource parent) {
      this.value = value;
      this.parent = parent;
    }

    @Override
    public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
      return new LongValues() {
        @Override
        public long longValue() throws IOException {
          return value;
        }

        @Override
        public boolean advanceExact(int doc) throws IOException {
          return true;
        }
      };
    }

    @Override
    public boolean needsScores() {
      return false;
    }

    @Override
    public LongValuesSource rewrite(IndexSearcher reader) throws IOException {
      return this;
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) return true;
      if (!(o instanceof NoCacheConstantLongValuesSource)) return false;
      NoCacheConstantLongValuesSource that = (NoCacheConstantLongValuesSource) o;
      return value == that.value && Objects.equals(parent, that.parent);
    }

    @Override
    public int hashCode() {
      return Objects.hash(value, parent);
    }

    @Override
    public String toString() {
      return parent.toString();
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return false;
    }
  }

  /**
   * Creates a value source that returns the term freq of a given term for each document
   *
   * @see PostingsEnum#freq()
   */
  public static DoubleValuesSource termFreq(Term term) {
    return new TermFreqDoubleValuesSource(term);
  }

  private static class TermFreqDoubleValuesSource extends DoubleValuesSource {

    private final Term term;

    private TermFreqDoubleValuesSource(Term term) {
      this.term = term;
    }

    @Override
    public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
      Terms terms = Terms.getTerms(ctx.reader(), term.field());
      TermsEnum te = terms.iterator();

      if (te.seekExact(term.bytes()) == false) {
        return DoubleValues.EMPTY;
      }

      final PostingsEnum pe = te.postings(null);
      assert pe != null;

      return new DoubleValues() {
        @Override
        public double doubleValue() throws IOException {
          return pe.freq();
        }

        @Override
        public boolean advanceExact(int doc) throws IOException {
          if (pe.docID() > doc) {
            return false;
          }
          return pe.docID() == doc || pe.advance(doc) == doc;
        }
      };
    }

    @Override
    public boolean needsScores() {
      return false;
    }

    @Override
    public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
      return this;
    }

    @Override
    public String toString() {
      return "termFreq(" + term.toString() + ")";
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) return true;
      if (o == null || getClass() != o.getClass()) return false;
      TermFreqDoubleValuesSource that = (TermFreqDoubleValuesSource) o;
      return Objects.equals(term, that.term);
    }

    @Override
    public int hashCode() {
      return Objects.hash(term);
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return true;
    }
  }

  /**
   * Creates a constant value source returning the totalTermFreq for a given term
   *
   * @see IndexReader#totalTermFreq(Term)
   */
  public static DoubleValuesSource totalTermFreq(Term term) {
    return new IndexReaderDoubleValuesSource(
        r -> r.totalTermFreq(term), "totalTermFreq(" + term.toString() + ")");
  }

  /**
   * Creates a constant value source returning the sumDocFreq for a given field
   *
   * @see IndexReader#getSumDocFreq(String)
   */
  public static DoubleValuesSource sumDocFreq(String field) {
    return new IndexReaderDoubleValuesSource(
        r -> r.getSumDocFreq(field), "sumDocFreq(" + field + ")");
  }

  /**
   * Creates a constant value source returning the docCount for a given field
   *
   * @see IndexReader#getDocCount(String)
   */
  public static DoubleValuesSource docCount(String field) {
    return new IndexReaderDoubleValuesSource(r -> r.getDocCount(field), "docCount(" + field + ")");
  }

  @FunctionalInterface
  private interface ReaderFunction {
    double apply(IndexReader reader) throws IOException;
  }

  private static class IndexReaderDoubleValuesSource extends DoubleValuesSource {

    private final ReaderFunction func;
    private final String description;

    private IndexReaderDoubleValuesSource(ReaderFunction func, String description) {
      this.func = func;
      this.description = description;
    }

    @Override
    public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
      throw new UnsupportedOperationException("IndexReaderFunction must be rewritten before use");
    }

    @Override
    public boolean needsScores() {
      return false;
    }

    @Override
    public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
      return new NoCacheConstantDoubleValuesSource(func.apply(searcher.getIndexReader()), this);
    }

    @Override
    public String toString() {
      return description;
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) return true;
      if (o == null || getClass() != o.getClass()) return false;
      IndexReaderDoubleValuesSource that = (IndexReaderDoubleValuesSource) o;
      return Objects.equals(description, that.description) && Objects.equals(func, that.func);
    }

    @Override
    public int hashCode() {
      return Objects.hash(description, func);
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return false;
    }
  }

  private static class NoCacheConstantDoubleValuesSource extends DoubleValuesSource {

    final double value;
    final DoubleValuesSource parent;

    private NoCacheConstantDoubleValuesSource(double value, DoubleValuesSource parent) {
      this.value = value;
      this.parent = parent;
    }

    @Override
    public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
      return new DoubleValues() {
        @Override
        public double doubleValue() throws IOException {
          return value;
        }

        @Override
        public boolean advanceExact(int doc) throws IOException {
          return true;
        }
      };
    }

    @Override
    public boolean needsScores() {
      return false;
    }

    @Override
    public DoubleValuesSource rewrite(IndexSearcher reader) throws IOException {
      return this;
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) return true;
      if (!(o instanceof NoCacheConstantDoubleValuesSource)) return false;
      NoCacheConstantDoubleValuesSource that = (NoCacheConstantDoubleValuesSource) o;
      return Double.compare(that.value, value) == 0 && Objects.equals(parent, that.parent);
    }

    @Override
    public int hashCode() {
      return Objects.hash(value, parent);
    }

    @Override
    public String toString() {
      return parent.toString();
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return false;
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy