All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.uhighlight.Passage Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.uhighlight;


import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;

/**
 * Represents a passage (typically a sentence of the document).
 * 

* A passage contains {@link #getNumMatches} highlights from the query, * and the offsets and query terms that correspond with each match. * * @lucene.experimental */ public class Passage { private int startOffset = -1; private int endOffset = -1; private float score = 0.0f; private int[] matchStarts = new int[8]; private int[] matchEnds = new int[8]; private BytesRef[] matchTerms = new BytesRef[8]; private int[] matchTermFreqInDoc = new int[8]; private int numMatches = 0; /** @lucene.internal */ public void addMatch(int startOffset, int endOffset, BytesRef term, int termFreqInDoc) { assert startOffset >= this.startOffset && startOffset <= this.endOffset; if (numMatches == matchStarts.length) { int newLength = ArrayUtil.oversize(numMatches + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); int newMatchStarts[] = new int[newLength]; int newMatchEnds[] = new int[newLength]; int newMatchTermFreqInDoc[] = new int[newLength]; BytesRef newMatchTerms[] = new BytesRef[newLength]; System.arraycopy(matchStarts, 0, newMatchStarts, 0, numMatches); System.arraycopy(matchEnds, 0, newMatchEnds, 0, numMatches); System.arraycopy(matchTerms, 0, newMatchTerms, 0, numMatches); System.arraycopy(matchTermFreqInDoc, 0, newMatchTermFreqInDoc, 0, numMatches); matchStarts = newMatchStarts; matchEnds = newMatchEnds; matchTerms = newMatchTerms; matchTermFreqInDoc = newMatchTermFreqInDoc; } assert matchStarts.length == matchEnds.length && matchEnds.length == matchTerms.length; matchStarts[numMatches] = startOffset; matchEnds[numMatches] = endOffset; matchTerms[numMatches] = term; matchTermFreqInDoc[numMatches] = termFreqInDoc; numMatches++; } /** @lucene.internal */ public void reset() { startOffset = endOffset = -1; score = 0.0f; numMatches = 0; } /** For debugging. ex: Passage[0-22]{yin[0-3],yang[4-8],yin[10-13]}score=2.4964213 */ @Override public String toString() { StringBuilder buf = new StringBuilder(); buf.append("Passage[").append(startOffset).append('-').append(endOffset).append(']'); buf.append('{'); for (int i = 0; i < numMatches; i++) { if (i != 0) { buf.append(','); } buf.append(matchTerms[i].utf8ToString()); buf.append('[').append(matchStarts[i] - startOffset).append('-').append(matchEnds[i] - startOffset).append(']'); } buf.append('}'); buf.append("score=").append(score); return buf.toString(); } /** * Start offset of this passage. * * @return start index (inclusive) of the passage in the * original content: always >= 0. */ public int getStartOffset() { return startOffset; } /** * End offset of this passage. * * @return end index (exclusive) of the passage in the * original content: always >= {@link #getStartOffset()} */ public int getEndOffset() { return endOffset; } public int getLength() { return endOffset - startOffset; } /** * Passage's score. */ public float getScore() { return score; } public void setScore(float score) { this.score = score; } /** * Number of term matches available in * {@link #getMatchStarts}, {@link #getMatchEnds}, * {@link #getMatchTerms} */ public int getNumMatches() { return numMatches; } /** * Start offsets of the term matches, in increasing order. *

* Only {@link #getNumMatches} are valid. Note that these * offsets are absolute (not relative to {@link #getStartOffset()}). */ public int[] getMatchStarts() { return matchStarts; } /** * End offsets of the term matches, corresponding with {@link #getMatchStarts}. *

* Only {@link #getNumMatches} are valid. Note that its possible that an end offset * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the * Analyzer produced a term which spans a passage boundary. */ public int[] getMatchEnds() { return matchEnds; } /** * BytesRef (term text) of the matches, corresponding with {@link #getMatchStarts()}. The primary purpose of this * method is to expose the number of unique terms per passage for use in passage scoring. * The actual term byte content is not well defined by this highlighter, and thus use of it is more subject to * change. *

* The term might be simply the analyzed term at this position. * Depending on the highlighter's configuration, the match term may be a phrase (instead of a word), and in such * a case might be a series of space-separated analyzed terms. * If the match is from a {@link org.apache.lucene.search.MultiTermQuery} then the match term may be the toString() of * that query. *

* Only {@link #getNumMatches()} are valid. */ public BytesRef[] getMatchTerms() { return matchTerms; } public int[] getMatchTermFreqsInDoc() { return matchTermFreqInDoc; } /** @lucene.internal */ public void setStartOffset(int startOffset) { this.startOffset = startOffset; } /** @lucene.internal */ public void setEndOffset(int endOffset) { assert startOffset <= endOffset; this.endOffset = endOffset; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy