All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.matchhighlight.PassageFormatter Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.matchhighlight;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.RandomAccess;
import java.util.function.Function;

/**
 * Formats a collection of {@linkplain Passage passages} over a given string, cleaning up and
 * resolving restrictions concerning overlaps, allowed sub-ranges over the input string and length
 * restrictions.
 *
 * 

Passages are demarcated with constructor-provided ellipsis and start/end marker sequences. */ public class PassageFormatter { private final String ellipsis; private final Function markerStart; private final Function markerEnd; private final ArrayList markerStack = new ArrayList<>(); public PassageFormatter(String ellipsis, String markerStart, String markerEnd) { this(ellipsis, (m) -> markerStart, (m) -> markerEnd); } public PassageFormatter( String ellipsis, Function markerStart, Function markerEnd) { this.ellipsis = ellipsis; this.markerStart = markerStart; this.markerEnd = markerEnd; } public List format(CharSequence value, List passages, List ranges) { assert PassageSelector.sortedAndNonOverlapping(passages); assert PassageSelector.sortedAndNonOverlapping(ranges); assert withinRange(new OffsetRange(0, value.length()), passages); assert ranges instanceof RandomAccess; if (ranges.isEmpty()) { return Collections.emptyList(); } ArrayList result = new ArrayList<>(); StringBuilder buf = new StringBuilder(); int rangeIndex = 0; OffsetRange range = ranges.get(rangeIndex); passageFormatting: for (Passage passage : passages) { // Move to the range of the current passage. while (passage.from >= range.to) { if (++rangeIndex == ranges.size()) { break passageFormatting; } range = ranges.get(rangeIndex); } assert range.from <= passage.from && range.to >= passage.to : range + " ? " + passage; buf.setLength(0); if (range.from < passage.from) { buf.append(ellipsis); } format(buf, value, passage); if (range.to > passage.to) { buf.append(ellipsis); } result.add(buf.toString()); } return result; } private boolean withinRange(OffsetRange limits, List contained) { contained.forEach( r -> { if (r.from < limits.from || r.to > limits.to) { throw new AssertionError( String.format( Locale.ROOT, "Range outside of the permitted limit (limit = %s): %s", limits, r)); } }); return true; } public StringBuilder format(StringBuilder buf, CharSequence value, final Passage passage) { switch (passage.markers.size()) { case 0: // No markers, full passage appended. buf.append(value, passage.from, passage.to); break; case 1: // One marker, trivial and frequent case so it's handled separately. OffsetRange m = passage.markers.iterator().next(); buf.append(value, passage.from, m.from); buf.append(markerStart.apply(m)); buf.append(value, m.from, m.to); buf.append(markerEnd.apply(m)); buf.append(value, m.to, passage.to); break; default: // Multiple markers, possibly overlapping or nested. markerStack.clear(); multipleMarkers(value, passage, buf, markerStack); break; } return buf; } /** Handle multiple markers, possibly overlapping or nested. */ private void multipleMarkers( CharSequence value, final Passage p, StringBuilder b, ArrayList markerStack) { int at = p.from; int max = p.to; SlicePoint[] slicePoints = slicePoints(p); for (SlicePoint slicePoint : slicePoints) { b.append(value, at, slicePoint.offset); OffsetRange currentMarker = slicePoint.marker; switch (slicePoint.type) { case START: markerStack.add(currentMarker); b.append(markerStart.apply(currentMarker)); break; case END: int markerIndex = markerStack.lastIndexOf(currentMarker); for (int k = markerIndex; k < markerStack.size(); k++) { b.append(markerEnd.apply(markerStack.get(k))); } markerStack.remove(markerIndex); for (int k = markerIndex; k < markerStack.size(); k++) { b.append(markerStart.apply(markerStack.get(k))); } break; default: throw new RuntimeException(); } at = slicePoint.offset; } if (at < max) { b.append(value, at, max); } } private static SlicePoint[] slicePoints(Passage p) { SlicePoint[] slicePoints = new SlicePoint[p.markers.size() * 2]; int x = 0; for (OffsetRange m : p.markers) { slicePoints[x++] = new SlicePoint(SlicePoint.Type.START, m.from, m); slicePoints[x++] = new SlicePoint(SlicePoint.Type.END, m.to, m); } // Order slice points by their offset Comparator c = Comparator.comparingInt(pt -> pt.offset) .thenComparingInt(pt -> pt.type.ordering) .thenComparing( (a, b) -> { if (a.type == SlicePoint.Type.START) { // Longer start slice points come first. return Integer.compare(b.marker.to, a.marker.to); } else { // Shorter end slice points come first. return Integer.compare(b.marker.from, a.marker.from); } }); Arrays.sort(slicePoints, c); return slicePoints; } static class SlicePoint { enum Type { START(2), END(1); private final int ordering; Type(int ordering) { this.ordering = ordering; } } public final int offset; public final Type type; public final OffsetRange marker; public SlicePoint(Type t, int offset, OffsetRange m) { this.type = t; this.offset = offset; this.marker = m; } @Override public String toString() { return "(" + type + ", " + marker + ")"; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy