All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wikimedia.highlighter.cirrus.lucene.hit.TokenStreamHitEnum Maven / Gradle / Ivy

The newest version!
package org.wikimedia.highlighter.cirrus.lucene.hit;

import java.io.IOException;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.BytesRef;
import org.wikimedia.highlighter.cirrus.lucene.WrappedExceptionFromLucene;
import org.wikimedia.search.highlighter.cirrus.hit.AbstractHitEnum;
import org.wikimedia.search.highlighter.cirrus.hit.TermSourceFinder;
import org.wikimedia.search.highlighter.cirrus.hit.TermWeigher;
import org.wikimedia.search.highlighter.cirrus.hit.WeightFilteredHitEnumWrapper;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

/**
 * Enumerate hits by pumping a TokenStream.  Similar to how the "plain" highlighter works in Lucene.  Note that this will enumerate all hits, even those
 * that have 0 weight.  It really should be wrapped with a {@link WeightFilteredHitEnumWrapper} to filter out hits with 0 weight.
 */
public class TokenStreamHitEnum extends AbstractHitEnum {
    private final TokenStream tokenStream;
    private final TermWeigher queryWeigher;
    private final TermWeigher corpusWeigher;
    private final TermSourceFinder sourceFinder;
    private final PositionIncrementAttribute positionIncr;
    private final OffsetAttribute offsets;
    private final TermToBytesRefAttribute termRef;
    private int position = -1;
    private float queryWeight;
    private float corpusWeight;
    private int source;

    /**
     * Build an HitEnum for a TokenStream.
     */
    public TokenStreamHitEnum(TokenStream tokenStream, TermWeigher queryWeigher,
            TermWeigher corpusWeigher, TermSourceFinder sourceFinder) {
        this.tokenStream = tokenStream;
        this.queryWeigher = queryWeigher;
        this.corpusWeigher = corpusWeigher;
        this.sourceFinder = sourceFinder;
        positionIncr = tokenStream.addAttribute(PositionIncrementAttribute.class);
        offsets = tokenStream.addAttribute(OffsetAttribute.class);
        termRef = tokenStream.addAttribute(TermToBytesRefAttribute.class);

        try {
            tokenStream.reset();
        } catch (IOException e) {
            throw new WrappedExceptionFromLucene(e);
        }
    }

    @Override
    @SuppressFBWarnings(
            value = "EXS_EXCEPTION_SOFTENING_NO_CHECKED",
            justification = "The contract of AbstractHitEnum makes sense without exposing IOException")
    public boolean next() {
        try {
            if (!tokenStream.incrementToken()) {
                return false;
            }
            BytesRef term = termRef.getBytesRef();
            position += positionIncr.getPositionIncrement();
            queryWeight = queryWeigher.weigh(term);
            corpusWeight = corpusWeigher.weigh(term);
            source = sourceFinder.source(term);
            return true;
        } catch (IOException e) {
            throw new WrappedExceptionFromLucene(e);
        }
    }

    @Override
    public int position() {
        return position;
    }

    @Override
    public int startOffset() {
        return offsets.startOffset();
    }

    @Override
    public int endOffset() {
        return offsets.endOffset();
    }

    @Override
    public float queryWeight() {
        return queryWeight;
    }

    @Override
    public float corpusWeight() {
        return corpusWeight;
    }

    @Override
    public int source() {
        return source;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy