All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wikimedia.highlighter.experimental.lucene.QueryFlattener Maven / Gradle / Ivy

package org.wikimedia.highlighter.experimental.lucene;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanPositionCheckQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;

/**
 * Flattens {@link Query}s similarly to Lucene's FieldQuery.
 */
public class QueryFlattener {
    private final int maxMultiTermQueryTerms;

    public QueryFlattener(int maxMultiTermQueryTerms) {
        this.maxMultiTermQueryTerms = maxMultiTermQueryTerms;
    }

    public interface Callback {
        void flattened(Term term, float boost, Query rewritten);
    }

    public void flatten(Query query, IndexReader reader, Callback callback) {
        flatten(query, 1f, null, reader, callback);
    }

    protected void flatten(Query query, float pathBoost, Query rewritten, IndexReader reader,
            Callback callback) {
        if (query instanceof TermQuery) {
            flattenQuery((TermQuery) query, pathBoost, rewritten, reader, callback);
        } else if (query instanceof PhraseQuery) {
            flattenQuery((PhraseQuery) query, pathBoost, rewritten, reader, callback);
        } else if (query instanceof BooleanQuery) {
            flattenQuery((BooleanQuery) query, pathBoost, rewritten, reader, callback);
        } else if (query instanceof DisjunctionMaxQuery) {
            flattenQuery((DisjunctionMaxQuery) query, pathBoost, rewritten, reader, callback);
        } else if (query instanceof ConstantScoreQuery) {
            flattenQuery((ConstantScoreQuery) query, pathBoost, rewritten, reader, callback);
        } else if (query instanceof FilteredQuery) {
            flattenQuery((FilteredQuery) query, pathBoost, rewritten, reader, callback);
        } else if (query instanceof MultiPhraseQuery) {
            flattenQuery((MultiPhraseQuery) query, pathBoost, rewritten, reader, callback);
        } else if (query instanceof SpanQuery
                && flattenSpan((SpanQuery) query, pathBoost, rewritten, reader, callback)) {
            // Actually nothing to do here, but it keeps the code lining up to
            // have it.
        } else if (!flattenUnknown(query, pathBoost, rewritten, reader, callback)) {
            if (query instanceof MultiTermQuery) {
                MultiTermQuery copy = (MultiTermQuery) query.clone();
                copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(
                        maxMultiTermQueryTerms));
                query = copy;
            }
            Query newRewritten;
            try {
                newRewritten = query.rewrite(reader);
            } catch (IOException e) {
                throw new WrappedExceptionFromLucene(e);
            }
            if (newRewritten != query) {
                // only rewrite once and then flatten again - the rewritten
                // query could have a special treatment
                flatten(newRewritten, pathBoost, query, reader, callback);
            }
        }
    }

    protected boolean flattenSpan(SpanQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        if (query instanceof SpanTermQuery) {
            flattenQuery((SpanTermQuery) query, pathBoost, rewritten, reader, callback);
            return true;
        } else if (query instanceof SpanPositionCheckQuery) {
            flattenQuery((SpanPositionCheckQuery) query, pathBoost, rewritten, reader, callback);
            return true;
        } else if (query instanceof SpanNearQuery) {
            flattenQuery((SpanNearQuery) query, pathBoost, rewritten, reader, callback);
            return true;
        } else if (query instanceof SpanNotQuery) {
            flattenQuery((SpanNotQuery) query, pathBoost, rewritten, reader, callback);
            return true;
        } else if (query instanceof SpanOrQuery) {
            flattenQuery((SpanOrQuery) query, pathBoost, rewritten, reader, callback);
            return true;
        }
        return false;
    }

    protected boolean flattenUnknown(Query query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        return false;
    }

    protected void flattenQuery(TermQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        callback.flattened(query.getTerm(), pathBoost * query.getBoost(), rewritten);
    }

    protected void flattenQuery(PhraseQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        float boost = pathBoost * query.getBoost();
        for (Term term : query.getTerms()) {
            callback.flattened(term, boost, rewritten);
        }
    }

    protected void flattenQuery(BooleanQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        for (BooleanClause clause : query) {
            if (!clause.isProhibited()) {
                flatten(clause.getQuery(), pathBoost * query.getBoost(), rewritten, reader,
                        callback);
            }
        }
    }

    protected void flattenQuery(DisjunctionMaxQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        float boost = pathBoost * query.getBoost();
        for (Query clause : query) {
            flatten(clause, boost, rewritten, reader, callback);
        }
    }

    protected void flattenQuery(ConstantScoreQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        if (query.getQuery() != null) {
            flatten(query.getQuery(), pathBoost * query.getBoost(), rewritten, reader, callback);
        }
        // TODO maybe flatten filter like Elasticsearch does
    }

    protected void flattenQuery(FilteredQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        if (query.getQuery() != null) {
            flatten(query.getQuery(), pathBoost * query.getBoost(), rewritten, reader, callback);
        }
        // TODO maybe flatten filter like Elasticsearch does
    }

    protected void flattenQuery(MultiPhraseQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        // Elasticsearch uses a more complicated method to preserve the phrase
        // queries. We can't use them so we go with something simpler.
        float boost = pathBoost * query.getBoost();
        for (Term[] terms : query.getTermArrays()) {
            for (Term term : terms) {
                callback.flattened(term, boost, rewritten);
            }
        }
    }

    protected void flattenQuery(SpanTermQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        callback.flattened(query.getTerm(), query.getBoost() * pathBoost, rewritten);
    }

    protected void flattenQuery(SpanPositionCheckQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        flattenSpan(query.getMatch(), pathBoost * query.getBoost(), rewritten, reader, callback);
    }

    protected void flattenQuery(SpanNearQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        pathBoost *= query.getBoost();
        for (SpanQuery clause : query.getClauses()) {
            flattenSpan(clause, pathBoost, rewritten, reader, callback);
        }
    }

    protected void flattenQuery(SpanNotQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        flattenSpan(query.getInclude(), query.getBoost() * pathBoost, rewritten, reader, callback);
    }

    protected void flattenQuery(SpanOrQuery query, float pathBoost, Query rewritten,
            IndexReader reader, Callback callback) {
        pathBoost *= query.getBoost();
        for (SpanQuery clause : query.getClauses()) {
            flattenSpan(clause, pathBoost, rewritten, reader, callback);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy