All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.matchhighlight.OffsetsFromPositions Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.matchhighlight;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.search.MatchesIterator;

/**
 * This strategy applies to fields with stored positions but no offsets. We re-analyze the field's
 * value to find out offsets of match positions.
 *
 * 

Note that this may fail if index data (positions stored in the index) is out of sync with the * field values or the analyzer. This strategy assumes it'll never happen. */ public final class OffsetsFromPositions implements OffsetsRetrievalStrategy { private final String field; private final Analyzer analyzer; public OffsetsFromPositions(String field, Analyzer analyzer) { this.field = field; this.analyzer = analyzer; } @Override public List get( MatchesIterator matchesIterator, MatchRegionRetriever.FieldValueProvider doc) throws IOException { ArrayList positionRanges = new ArrayList<>(); while (matchesIterator.next()) { int from = matchesIterator.startPosition(); int to = matchesIterator.endPosition(); if (from < 0 || to < 0) { throw new IOException("Matches API returned negative positions for field: " + field); } positionRanges.add(new OffsetRange(from, to)); } // Convert from positions to offsets. return convertPositionsToOffsets(positionRanges, doc.getValues(field)); } List convertPositionsToOffsets( ArrayList positionRanges, List values) throws IOException { if (positionRanges.isEmpty()) { return positionRanges; } class PositionSpan extends OffsetRange { int leftOffset = Integer.MAX_VALUE; int rightOffset = Integer.MIN_VALUE; PositionSpan(int from, int to) { super(from, to); } @Override public String toString() { return "[from=" + from + ", to=" + to + ", L: " + leftOffset + ", R: " + rightOffset + ']'; } } ArrayList spans = new ArrayList<>(); int minPosition = Integer.MAX_VALUE; int maxPosition = Integer.MIN_VALUE; for (OffsetRange range : positionRanges) { spans.add(new PositionSpan(range.from, range.to)); minPosition = Math.min(minPosition, range.from); maxPosition = Math.max(maxPosition, range.to); } PositionSpan[] spansTable = spans.toArray(PositionSpan[]::new); int spanCount = spansTable.length; int position = -1; int valueOffset = 0; for (int valueIndex = 0, max = values.size(); valueIndex < max; valueIndex++) { final String value = values.get(valueIndex).toString(); final boolean lastValue = valueIndex + 1 == max; TokenStream ts = analyzer.tokenStream(field, value); OffsetAttribute offsetAttr = ts.getAttribute(OffsetAttribute.class); PositionIncrementAttribute posAttr = ts.getAttribute(PositionIncrementAttribute.class); ts.reset(); while (ts.incrementToken()) { position += posAttr.getPositionIncrement(); if (position >= minPosition) { // Correct left and right offsets for each span this position applies to. int startOffset = valueOffset + offsetAttr.startOffset(); int endOffset = valueOffset + offsetAttr.endOffset(); int j = 0; for (int i = 0; i < spanCount; i++) { PositionSpan span = spansTable[j] = spansTable[i]; if (position >= span.from) { if (position <= span.to) { span.leftOffset = Math.min(span.leftOffset, startOffset); span.rightOffset = Math.max(span.rightOffset, endOffset); } else { // this span can't intersect with any following position // so omit it by skipping j++. continue; } } j++; } spanCount = j; // Only short-circuit if we're on the last value (which should be the common // case since most fields would only have a single value anyway). We need // to make sure of this because otherwise offsetAttr would have incorrect value. if (position > maxPosition && lastValue) { break; } } } ts.end(); position += posAttr.getPositionIncrement() + analyzer.getPositionIncrementGap(field); valueOffset += offsetAttr.endOffset() + analyzer.getOffsetGap(field); ts.close(); } ArrayList converted = new ArrayList<>(spans.size()); for (PositionSpan span : spans) { if (span.leftOffset == Integer.MAX_VALUE || span.rightOffset == Integer.MIN_VALUE) { throw new RuntimeException("One of the offsets missing for position range: " + span); } converted.add(new OffsetRange(span.leftOffset, span.rightOffset)); } return converted; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy