All Downloads are FREE. Search and download functionalities are using the official Maven repository.

querqy.lucene.rewrite.FieldBoostTermQueryBuilder Maven / Gradle / Ivy

There is a newer version: 5.8.lucene961.1
Show newest version
package querqy.lucene.rewrite;

import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Weight;

import java.io.IOException;
import java.util.Optional;
import java.util.Set;

public class FieldBoostTermQueryBuilder implements TermQueryBuilder {


    @Override
    public Optional getDocumentFrequencyCorrection() {
        return Optional.empty();
    }

    @Override
    public FieldBoostTermQuery createTermQuery(final Term term, final FieldBoost boost) {
        return new FieldBoostTermQuery(term, boost);
    }

    /**
     * A term query that scores by query queryBoost and {@link FieldBoost} but not by
     * {@link org.apache.lucene.search.similarities.Similarity} or {@link DocumentFrequencyCorrection}.
     *
     * Created by rene on 11/09/2016.
     */
    public static class FieldBoostTermQuery extends TermQuery {

        protected final Term term;
        protected final FieldBoost fieldBoost;

        public FieldBoostTermQuery(final Term term, final FieldBoost fieldBoost) {

            super(term);

            this.term = term;

            if (fieldBoost == null) {
                throw new IllegalArgumentException("FieldBoost must not be null");
            }
            this.fieldBoost = fieldBoost;

        }

        @Override
        public Weight createWeight(final IndexSearcher searcher, final ScoreMode scoreMode, final float boost)
                throws IOException {
            final IndexReaderContext context = searcher.getTopReaderContext();
            final TermStates termState = TermStates.build(context, term, scoreMode.needsScores());
            // TODO: set boosts to 1f if needsScores is false?
            return new FieldBoostWeight(termState, boost, fieldBoost.getBoost(term.field(), searcher.getIndexReader()));
        }



        class FieldBoostWeight extends Weight {
            private final TermStates termStates;
            private float score;
            private float queryBoost;
            private final float fieldBoost;


            public FieldBoostWeight(final TermStates termStates, final float queryBoost, final float fieldBoost) {
                super(FieldBoostTermQuery.this);
                assert termStates != null : "TermContext must not be null";
                this.termStates = termStates;

                this.queryBoost = queryBoost;
                this.fieldBoost = fieldBoost;
                this.score = queryBoost * fieldBoost;
            }

            float getScore() {
                return score;
            }

            @Override
            public String toString() {
                return "weight(" + FieldBoostTermQuery.this + ")";
            }



            @Override
            public Scorer scorer(final LeafReaderContext context) throws IOException {
                assert termStates != null && termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context))
                        : "The top-reader used to create Weight is not the same as the current reader's top-reader: " + ReaderUtil.getTopLevelContext(context);
                final TermsEnum termsEnum = getTermsEnum(context);
                if (termsEnum == null) {
                    return null;
                }
                PostingsEnum docs = termsEnum.postings(null, PostingsEnum.NONE);
                assert docs != null;
                return new TermBoostScorer(this, docs, score);
            }

            /**
             * Returns a {@link TermsEnum} positioned at this weights Term or null if
             * the term does not exist in the given context
             */
            private TermsEnum getTermsEnum(final LeafReaderContext context) throws IOException {
                final TermState state = termStates.get(context);
                if (state == null) { // term is not present in that reader
                    assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
                    return null;
                }
                // System.out.println("LD=" + reader.getLiveDocs() + " set?=" +
                // (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
                final TermsEnum termsEnum = context.reader().terms(term.field()).iterator();
                termsEnum.seekExact(term.bytes());
                return termsEnum;
            }

            private boolean termNotInReader(final LeafReader reader, final Term term) throws IOException {
                // only called from assert
                // System.out.println("TQ.termNotInReader reader=" + reader + " term=" +
                // field + ":" + bytes.utf8ToString());
                return reader.docFreq(term) == 0;
            }

            @Override
            public Explanation explain(final LeafReaderContext context, final int doc) throws IOException {

                Scorer scorer = scorer(context);
                if (scorer != null) {
                    int newDoc = scorer.iterator().advance(doc);
                    if (newDoc == doc) {

                        Explanation scoreExplanation = Explanation.match(score, "product of:",
                                Explanation.match(queryBoost, "queryBoost"),
                                Explanation.match(fieldBoost, "fieldBoost")
                        );

                        Explanation result = Explanation.match(scorer.score(),
                                "weight(" + getQuery() + " in " + doc + ") ["
                                        + FieldBoostTermQuery.this.fieldBoost.getClass().getSimpleName() + "], result of:",
                                scoreExplanation

                        );



                        return result;
                    }
                }
                return Explanation.noMatch("no matching term");
            }

            public float getFieldBoost() {
                return fieldBoost;
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }
        }

        class TermBoostScorer extends Scorer {
            private final PostingsEnum postingsEnum;
            private final float score;

            /**
             * Construct a TermScorer.
             *
             * @param weight
             *          The weight of the Term in the query.
             * @param td
             *          An iterator over the documents matching the Term.
             * @param score
             *          The score
             */
            TermBoostScorer(final Weight weight, final PostingsEnum td, final float score) {
                super(weight);
                this.score = score;
                this.postingsEnum = td;
            }

            @Override
            public int docID() {
                return postingsEnum.docID();
            }

            @Override
            public DocIdSetIterator iterator() { return postingsEnum; }

            @Override
            public float getMaxScore(int upTo) throws IOException {
                return score;
            }


            @Override
            public float score() throws IOException {
                assert docID() != DocIdSetIterator.NO_MORE_DOCS;
                return score;
            }

            /** Returns a string representation of this TermScorer. */
            @Override
            public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; }
        }



        @Override
        public String toString(final String field) {
            StringBuilder buffer = new StringBuilder();
            if (!term.field().equals(field)) {
                buffer.append(term.field());
                buffer.append(":");
            }
            buffer.append(term.text());
            buffer.append(fieldBoost.toString(term.field()));
            return buffer.toString();
        }

        @Override
        public boolean equals(final Object o) {
            if (this == o) return true;
            if (o == null || getClass() != o.getClass()) return false;
            if (!super.equals(o)) return false;

            FieldBoostTermQuery that = (FieldBoostTermQuery) o;

            if (!term.equals(that.term)) return false;
            return fieldBoost.equals(that.fieldBoost);

        }

        @Override
        public int hashCode() {
            int result = super.hashCode();
            result = 31 * result + term.hashCode();
            result = 31 * result + fieldBoost.hashCode();
            return result;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy