org.apache.lucene.queries.spans.SpanTermQuery Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queries.spans;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
/**
* Matches spans containing a term. This should not be used for terms that are indexed at position
* Integer.MAX_VALUE.
*/
public class SpanTermQuery extends SpanQuery {
protected final Term term;
protected final TermStates termStates;
/** Construct a SpanTermQuery matching the named term's spans. */
public SpanTermQuery(Term term) {
this.term = Objects.requireNonNull(term);
this.termStates = null;
}
/**
* Expert: Construct a SpanTermQuery matching the named term's spans, using the provided
* TermStates
*/
public SpanTermQuery(Term term, TermStates termStates) {
this.term = Objects.requireNonNull(term);
this.termStates = termStates;
}
/** Return the term whose spans are matched. */
public Term getTerm() {
return term;
}
/**
* Returns the {@link TermStates} passed to the constructor, or null if it was not passed.
*
* @lucene.experimental
*/
public TermStates getTermStates() {
return termStates;
}
@Override
public String getField() {
return term.field();
}
@Override
public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
final TermStates context;
final IndexReaderContext topContext = searcher.getTopReaderContext();
if (termStates == null || termStates.wasBuiltFor(topContext) == false) {
context = TermStates.build(searcher, term, scoreMode.needsScores());
} else {
context = termStates;
}
return new SpanTermWeight(
context,
searcher,
scoreMode.needsScores() ? Collections.singletonMap(term, context) : null,
boost);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(term.field())) {
visitor.consumeTerms(this, term);
}
}
/**
* Creates SpanTermQuery scorer instances
*
* @lucene.internal
*/
public class SpanTermWeight extends SpanWeight {
final TermStates termStates;
public SpanTermWeight(
TermStates termStates, IndexSearcher searcher, Map terms, float boost)
throws IOException {
super(SpanTermQuery.this, searcher, terms, boost);
this.termStates = termStates;
assert termStates != null : "TermStates must not be null";
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
}
@Override
public void extractTermStates(Map contexts) {
contexts.put(term, termStates);
}
@Override
public Spans getSpans(final LeafReaderContext context, Postings requiredPostings)
throws IOException {
assert termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context))
: "The top-reader used to create Weight is not the same as the current reader's top-reader ("
+ ReaderUtil.getTopLevelContext(context);
final TermState state = termStates.get(context);
if (state == null) { // term is not present in that reader
assert context.reader().docFreq(term) == 0
: "no termstate found but term exists in reader term=" + term;
return null;
}
final Terms terms = context.reader().terms(term.field());
if (terms == null) return null;
if (terms.hasPositions() == false)
throw new IllegalStateException(
"field \""
+ term.field()
+ "\" was indexed without position data; cannot run SpanTermQuery (term="
+ term.text()
+ ")");
final TermsEnum termsEnum = terms.iterator();
termsEnum.seekExact(term.bytes(), state);
final PostingsEnum postings =
termsEnum.postings(null, requiredPostings.getRequiredPostings());
float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST;
return new TermSpans(getSimScorer(context), postings, term, positionsCost);
}
}
/**
* A guess of the relative cost of dealing with the term positions when using a SpanNearQuery
* instead of a PhraseQuery.
*/
private static final float PHRASE_TO_SPAN_TERM_POSITIONS_COST = 4.0f;
private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
private static final int TERM_OPS_PER_POS = 7;
/**
* Returns an expected cost in simple operations of processing the occurrences of a term in a
* document that contains the term.
*
* @param termsEnum The term is the term at which this TermsEnum is positioned.
* This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
* TODO: keep only a single copy of this method and the constants used in it when
* SpanTermQuery moves to the o.a.l.search package.
*/
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
int docFreq = termsEnum.docFreq();
assert docFreq > 0;
long totalTermFreq = termsEnum.totalTermFreq();
assert totalTermFreq > 0;
float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
}
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
if (term.field().equals(field)) buffer.append(term.text());
else buffer.append(term.toString());
return buffer.toString();
}
@Override
public int hashCode() {
return classHash() ^ term.hashCode();
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) && term.equals(((SpanTermQuery) other).term);
}
}