All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.queries.spans.SpanWeight Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.queries.spans;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafSimScorer;
import org.apache.lucene.search.Matches;
import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.MatchesUtils;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil;

/** Expert-only. Public for use by other weight implementations */
public abstract class SpanWeight extends Weight {

  /**
   * Enumeration defining what postings information should be retrieved from the index for a given
   * Spans
   */
  public enum Postings {
    POSITIONS {
      @Override
      public int getRequiredPostings() {
        return PostingsEnum.POSITIONS;
      }
    },
    PAYLOADS {
      @Override
      public int getRequiredPostings() {
        return PostingsEnum.PAYLOADS;
      }
    },
    OFFSETS {
      @Override
      public int getRequiredPostings() {
        return PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS;
      }
    };

    public abstract int getRequiredPostings();

    public Postings atLeast(Postings postings) {
      if (postings.compareTo(this) > 0) return postings;
      return this;
    }
  }

  protected final Similarity similarity;
  protected final Similarity.SimScorer simScorer;
  protected final String field;

  /**
   * Create a new SpanWeight
   *
   * @param query the parent query
   * @param searcher the IndexSearcher to query against
   * @param termStates a map of terms to {@link TermStates} for use in building the similarity. May
   *     be null if scores are not required
   * @throws IOException on error
   */
  public SpanWeight(
      SpanQuery query, IndexSearcher searcher, Map termStates, float boost)
      throws IOException {
    super(query);
    this.field = query.getField();
    this.similarity = searcher.getSimilarity();
    this.simScorer = buildSimWeight(query, searcher, termStates, boost);
  }

  private Similarity.SimScorer buildSimWeight(
      SpanQuery query, IndexSearcher searcher, Map termStates, float boost)
      throws IOException {
    if (termStates == null || termStates.size() == 0 || query.getField() == null) return null;
    TermStatistics[] termStats = new TermStatistics[termStates.size()];
    int termUpTo = 0;
    for (Map.Entry entry : termStates.entrySet()) {
      TermStates ts = entry.getValue();
      if (ts.docFreq() > 0) {
        termStats[termUpTo++] =
            searcher.termStatistics(entry.getKey(), ts.docFreq(), ts.totalTermFreq());
      }
    }
    CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
    if (termUpTo > 0) {
      return similarity.scorer(
          boost, collectionStats, ArrayUtil.copyOfSubArray(termStats, 0, termUpTo));
    } else {
      return null; // no terms at all exist, we won't use similarity
    }
  }

  /**
   * Collect all TermStates used by this Weight
   *
   * @param contexts a map to add the TermStates to
   */
  public abstract void extractTermStates(Map contexts);

  /**
   * Expert: Return a Spans object iterating over matches from this Weight
   *
   * @param ctx a LeafReaderContext for this Spans
   * @return a Spans
   * @throws IOException on error
   */
  public abstract Spans getSpans(LeafReaderContext ctx, Postings requiredPostings)
      throws IOException;

  @Override
  public SpanScorer scorer(LeafReaderContext context) throws IOException {
    final Spans spans = getSpans(context, Postings.POSITIONS);
    if (spans == null) {
      return null;
    }
    final LeafSimScorer docScorer = getSimScorer(context);
    return new SpanScorer(this, spans, docScorer);
  }

  /**
   * Return a LeafSimScorer for this context
   *
   * @param context the LeafReaderContext
   * @return a SimWeight
   * @throws IOException on error
   */
  public LeafSimScorer getSimScorer(LeafReaderContext context) throws IOException {
    return simScorer == null ? null : new LeafSimScorer(simScorer, context.reader(), field, true);
  }

  @Override
  public Explanation explain(LeafReaderContext context, int doc) throws IOException {
    SpanScorer scorer = scorer(context);
    if (scorer != null) {
      int newDoc = scorer.iterator().advance(doc);
      if (newDoc == doc) {
        if (simScorer != null) {
          float freq = scorer.sloppyFreq();
          LeafSimScorer docScorer = new LeafSimScorer(simScorer, context.reader(), field, true);
          Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
          Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
          return Explanation.match(
              scoreExplanation.getValue(),
              "weight("
                  + getQuery()
                  + " in "
                  + doc
                  + ") ["
                  + similarity.getClass().getSimpleName()
                  + "], result of:",
              scoreExplanation);
        } else {
          // simScorer won't be set when scoring isn't needed
          return Explanation.match(
              0f, String.format(Locale.ROOT, "match %s in %s without score", getQuery(), doc));
        }
      }
    }

    return Explanation.noMatch("no matching term");
  }

  private static class TermMatch {
    Term term;
    int position;
    int startOffset;
    int endOffset;
  }

  @Override
  public Matches matches(LeafReaderContext context, int doc) throws IOException {
    return MatchesUtils.forField(
        field,
        () -> {
          Spans spans = getSpans(context, Postings.OFFSETS);
          if (spans == null || spans.advance(doc) != doc) {
            return null;
          }
          return new MatchesIterator() {

            int innerTermCount = 0;
            TermMatch[] innerTerms = new TermMatch[0];

            SpanCollector termCollector =
                new SpanCollector() {
                  @Override
                  public void collectLeaf(PostingsEnum postings, int position, Term term)
                      throws IOException {
                    innerTermCount++;
                    if (innerTermCount > innerTerms.length) {
                      TermMatch[] temp = new TermMatch[innerTermCount];
                      System.arraycopy(innerTerms, 0, temp, 0, innerTermCount - 1);
                      innerTerms = temp;
                      innerTerms[innerTermCount - 1] = new TermMatch();
                    }
                    innerTerms[innerTermCount - 1].term = term;
                    innerTerms[innerTermCount - 1].position = position;
                    innerTerms[innerTermCount - 1].startOffset = postings.startOffset();
                    innerTerms[innerTermCount - 1].endOffset = postings.endOffset();
                  }

                  @Override
                  public void reset() {
                    innerTermCount = 0;
                  }
                };

            @Override
            public boolean next() throws IOException {
              innerTermCount = 0;
              return spans.nextStartPosition() != Spans.NO_MORE_POSITIONS;
            }

            @Override
            public int startPosition() {
              return spans.startPosition();
            }

            @Override
            public int endPosition() {
              return spans.endPosition() - 1;
            }

            @Override
            public int startOffset() throws IOException {
              if (innerTermCount == 0) {
                collectInnerTerms();
              }
              return innerTerms[0].startOffset;
            }

            @Override
            public int endOffset() throws IOException {
              if (innerTermCount == 0) {
                collectInnerTerms();
              }
              return innerTerms[innerTermCount - 1].endOffset;
            }

            @Override
            public MatchesIterator getSubMatches() throws IOException {
              if (innerTermCount == 0) {
                collectInnerTerms();
              }
              return new MatchesIterator() {

                int upto = -1;

                @Override
                public boolean next() throws IOException {
                  upto++;
                  return upto < innerTermCount;
                }

                @Override
                public int startPosition() {
                  return innerTerms[upto].position;
                }

                @Override
                public int endPosition() {
                  return innerTerms[upto].position;
                }

                @Override
                public int startOffset() throws IOException {
                  return innerTerms[upto].startOffset;
                }

                @Override
                public int endOffset() throws IOException {
                  return innerTerms[upto].endOffset;
                }

                @Override
                public MatchesIterator getSubMatches() throws IOException {
                  return null;
                }

                @Override
                public Query getQuery() {
                  return new TermQuery(innerTerms[upto].term);
                }
              };
            }

            @Override
            public Query getQuery() {
              return SpanWeight.this.getQuery();
            }

            void collectInnerTerms() throws IOException {
              termCollector.reset();
              spans.collect(termCollector);
              Arrays.sort(innerTerms, 0, innerTermCount, Comparator.comparing(a -> a.position));
            }
          };
        });
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy