Maven / Gradle / Ivy
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import java.util.*;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.ToStringUtils;
* A generalized version of {@link PhraseQuery}, with an added
* method {@link #add(Term[])} for adding more than one term at the same position
* that are treated as a disjunction (OR).
* To use this class to search for the phrase "Microsoft app*" first use
* {@link #add(Term)} on the term "microsoft" (assuming lowercase analysis), then
* find all terms that have "app" as prefix using {@link LeafReader#terms(String)},
* seeking to "app" then iterating and collecting terms until there is no longer
* that prefix, and finally use {@link #add(Term[])} to add them to the query.
public class MultiPhraseQuery extends Query {
private String field;// becomes non-null on first add() then is unmodified
private final ArrayList termArrays = new ArrayList<>();
private final ArrayList positions = new ArrayList<>();
private int slop = 0;
/** Sets the phrase slop for this query.
* @see PhraseQuery#getSlop()
public void setSlop(int s) {
if (s < 0) {
throw new IllegalArgumentException("slop value cannot be negative");
slop = s;
/** Sets the phrase slop for this query.
* @see PhraseQuery#getSlop()
public int getSlop() { return slop; }
/** Add a single term at the next position in the phrase.
public void add(Term term) { add(new Term[]{term}); }
/** Add multiple terms at the next position in the phrase. Any of the terms
* may match (a disjunction).
* The array is not copied or mutated, the caller should consider it
* immutable subsequent to calling this method.
public void add(Term[] terms) {
int position = 0;
if (positions.size() > 0)
position = positions.get(positions.size() - 1) + 1;
add(terms, position);
* Allows to specify the relative position of terms within the phrase.
* The array is not copied or mutated, the caller should consider it
* immutable subsequent to calling this method.
public void add(Term[] terms, int position) {
Objects.requireNonNull(terms, "Term array must not be null");
if (termArrays.size() == 0)
field = terms[0].field();
for (Term term : terms) {
if (!term.field().equals(field)) {
throw new IllegalArgumentException(
"All phrase terms must be in the same field (" + field + "): " + term);
* Returns a List of the terms in the multi-phrase.
* Do not modify the List or its contents.
public List getTermArrays() {
return Collections.unmodifiableList(termArrays);
* Returns the relative positions of terms in this phrase.
public int[] getPositions() {
int[] result = new int[positions.size()];
for (int i = 0; i < positions.size(); i++)
result[i] = positions.get(i);
return result;
private class MultiPhraseWeight extends Weight {
private final Similarity similarity;
private final Similarity.SimWeight stats;
private final Map termContexts = new HashMap<>();
private final boolean needsScores;
public MultiPhraseWeight(IndexSearcher searcher, boolean needsScores)
throws IOException {
this.needsScores = needsScores;
this.similarity = searcher.getSimilarity(needsScores);
final IndexReaderContext context = searcher.getTopReaderContext();
// compute idf
ArrayList allTermStats = new ArrayList<>();
for(final Term[] terms: termArrays) {
for (Term term: terms) {
TermContext termContext = termContexts.get(term);
if (termContext == null) {
termContext =, term);
termContexts.put(term, termContext);
allTermStats.add(searcher.termStatistics(term, termContext));
stats = similarity.computeWeight(
allTermStats.toArray(new TermStatistics[allTermStats.size()]));
public void extractTerms(Set terms) {
for (final Term[] arr : termArrays) {
Collections.addAll(terms, arr);
public float getValueForNormalization() {
return stats.getValueForNormalization();
public void normalize(float queryNorm, float boost) {
stats.normalize(queryNorm, boost);
public Scorer scorer(LeafReaderContext context) throws IOException {
assert !termArrays.isEmpty();
final LeafReader reader = context.reader();
PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()];
final Terms fieldTerms = reader.terms(field);
if (fieldTerms == null) {
return null;
// TODO: move this check to createWeight to happen earlier to the user?
if (fieldTerms.hasPositions() == false) {
throw new IllegalStateException("field \"" + field + "\" was indexed without position data;" +
" cannot run MultiPhraseQuery (phrase=" + getQuery() + ")");
// Reuse single TermsEnum below:
final TermsEnum termsEnum = fieldTerms.iterator();
float totalMatchCost = 0;
for (int pos=0; pos postings = new ArrayList<>();
for (Term term : terms) {
TermState termState = termContexts.get(term).get(context.ord);
if (termState != null) {
termsEnum.seekExact(term.bytes(), termState);
postings.add(termsEnum.postings(null, PostingsEnum.POSITIONS));
totalMatchCost += PhraseQuery.termPositionsCost(termsEnum);
if (postings.isEmpty()) {
return null;
final PostingsEnum postingsEnum;
if (postings.size() == 1) {
postingsEnum = postings.get(0);
} else {
postingsEnum = new UnionPostingsEnum(postings);
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, positions.get(pos).intValue(), terms);
// sort by increasing docFreq order
if (slop == 0) {
if (slop == 0) {
return new ExactPhraseScorer(this, postingsFreqs,
similarity.simScorer(stats, context),
needsScores, totalMatchCost);
} else {
return new SloppyPhraseScorer(this, postingsFreqs, slop,
similarity.simScorer(stats, context),
needsScores, totalMatchCost);
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context);
if (scorer != null) {
int newDoc = scorer.iterator().advance(doc);
if (newDoc == doc) {
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).sloppyFreq();
SimScorer docScorer = similarity.simScorer(stats, context);
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
return Explanation.match(
"weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:",
return Explanation.noMatch("no matching term");
public Query rewrite(IndexReader reader) throws IOException {
if (getBoost() != 1f) {
return super.rewrite(reader);
if (termArrays.isEmpty()) {
return new MatchNoDocsQuery();
} else if (termArrays.size() == 1) { // optimize one-term case
Term[] terms = termArrays.get(0);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (Term term : terms) {
builder.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
} else {
return super.rewrite(reader);
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new MultiPhraseWeight(searcher, needsScores);
/** Prints a user-readable version of this query. */
public final String toString(String f) {
StringBuilder buffer = new StringBuilder();
if (field == null || !field.equals(f)) {
int k = 0;
Iterator i = termArrays.iterator();
int lastPos = -1;
boolean first = true;
while (i.hasNext()) {
Term[] terms =;
int position = positions.get(k);
if (first) {
first = false;
} else {
buffer.append(" ");
for (int j=1; j<(position-lastPos); j++) {
buffer.append("? ");
if (terms.length > 1) {
for (int j = 0; j < terms.length; j++) {
if (j < terms.length-1)
buffer.append(" ");
} else {
lastPos = position;
if (slop != 0) {
return buffer.toString();
/** Returns true if o
is equal to this. */
public boolean equals(Object o) {
if (!(o instanceof MultiPhraseQuery)) return false;
MultiPhraseQuery other = (MultiPhraseQuery)o;
return super.equals(o)
&& this.slop == other.slop
&& termArraysEquals(this.termArrays, other.termArrays)
&& this.positions.equals(other.positions);
/** Returns a hash code value for this object.*/
public int hashCode() {
return super.hashCode()
^ slop
^ termArraysHashCode()
^ positions.hashCode();
// Breakout calculation of the termArrays hashcode
private int termArraysHashCode() {
int hashCode = 1;
for (final Term[] termArray: termArrays) {
hashCode = 31 * hashCode
+ (termArray == null ? 0 : Arrays.hashCode(termArray));
return hashCode;
// Breakout calculation of the termArrays equals
private boolean termArraysEquals(List termArrays1, List termArrays2) {
if (termArrays1.size() != termArrays2.size()) {
return false;
ListIterator iterator1 = termArrays1.listIterator();
ListIterator iterator2 = termArrays2.listIterator();
while (iterator1.hasNext()) {
Term[] termArray1 =;
Term[] termArray2 =;
if (!(termArray1 == null ? termArray2 == null : Arrays.equals(termArray1,
termArray2))) {
return false;
return true;
* Takes the logical union of multiple PostingsEnum iterators.
* Note: positions are merged during freq()
static class UnionPostingsEnum extends PostingsEnum {
/** queue ordered by docid */
final DocsQueue docsQueue;
/** cost of this enum: sum of its subs */
final long cost;
/** queue ordered by position for current doc */
final PositionsQueue posQueue = new PositionsQueue();
/** current doc posQueue is working */
int posQueueDoc = -2;
/** list of subs (unordered) */
final PostingsEnum[] subs;
UnionPostingsEnum(Collection subs) {
docsQueue = new DocsQueue(subs.size());
long cost = 0;
for (PostingsEnum sub : subs) {
cost += sub.cost();
this.cost = cost;
this.subs = subs.toArray(new PostingsEnum[subs.size()]);
public int freq() throws IOException {
int doc = docID();
if (doc != posQueueDoc) {
for (PostingsEnum sub : subs) {
if (sub.docID() == doc) {
int freq = sub.freq();
for (int i = 0; i < freq; i++) {
posQueueDoc = doc;
return posQueue.size();
public int nextPosition() throws IOException {
public int docID() {
public int nextDoc() throws IOException {
PostingsEnum top =;
int doc = top.docID();
do {
top = docsQueue.updateTop();
} while (top.docID() == doc);
return top.docID();
public int advance(int target) throws IOException {
PostingsEnum top =;
do {
top = docsQueue.updateTop();
} while (top.docID() < target);
return top.docID();
public long cost() {
return cost;
public int startOffset() throws IOException {
return -1; // offsets are unsupported
public int endOffset() throws IOException {
return -1; // offsets are unsupported
public BytesRef getPayload() throws IOException {
return null; // payloads are unsupported
* disjunction of postings ordered by docid.
static class DocsQueue extends PriorityQueue {
DocsQueue(int size) {
public final boolean lessThan(PostingsEnum a, PostingsEnum b) {
return a.docID() < b.docID();
* queue of terms for a single document. its a sorted array of
* all the positions from all the postings
static class PositionsQueue {
private int arraySize = 16;
private int index = 0;
private int size = 0;
private int[] array = new int[arraySize];
void add(int i) {
if (size == arraySize)
array[size++] = i;
int next() {
return array[index++];
void sort() {
Arrays.sort(array, index, size);
void clear() {
index = 0;
size = 0;
int size() {
return size;
private void growArray() {
int[] newArray = new int[arraySize * 2];
System.arraycopy(array, 0, newArray, 0, arraySize);
array = newArray;
arraySize *= 2;