All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.uhighlight.SplittingBreakIterator Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.uhighlight;

import java.text.BreakIterator;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;

/**
 * Virtually slices the text on both sides of every occurrence of the specified character. If the
 * slice is 0-length which happens for adjacent slice characters or when they are at the beginning
 * or end, that character is reported as a boundary. For every slice between the specified
 * characters, it is further processed with a specified BreakIterator. A consequence is that the
 * enclosed BreakIterator will never "see" the splitting character. 
* Note: {@link #setText(CharacterIterator)} is unsupported. Use the string version. * * @lucene.experimental */ public class SplittingBreakIterator extends BreakIterator { private final BreakIterator baseIter; private final char sliceChar; private String text; private int sliceStartIdx; private int sliceEndIdx; private int current; public SplittingBreakIterator(BreakIterator baseIter, char sliceChar) { this.baseIter = baseIter; this.sliceChar = sliceChar; } @Override public void setText(CharacterIterator newText) { throw new UnsupportedOperationException("unexpected"); } @Override public void setText(String newText) { this.text = newText; first(); } @Override public CharacterIterator getText() { StringCharacterIterator charIter = new StringCharacterIterator(text); // API doesn't say what the state should be but it should probably be at the current index. charIter.setIndex(current()); return charIter; } @Override public int current() { assert current != DONE; return current; // MUST be updated by the other methods when result isn't DONE. } @Override public int first() { sliceStartIdx = 0; sliceEndIdx = text.indexOf(sliceChar); if (sliceEndIdx == -1) { sliceEndIdx = text.length(); } if (sliceStartIdx == sliceEndIdx) { return current = sliceStartIdx; } baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); // since setText() sets to first(), just grab current() return current = sliceStartIdx + baseIter.current(); } @Override public int last() { sliceEndIdx = text.length(); sliceStartIdx = text.lastIndexOf(sliceChar); if (sliceStartIdx == -1) { sliceStartIdx = 0; } else { sliceStartIdx++; // past sliceChar } if (sliceEndIdx == sliceStartIdx) { return current = sliceEndIdx; } baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); return current = sliceStartIdx + baseIter.last(); } @Override public int next() { int prevCurrent = current; current = sliceStartIdx == sliceEndIdx ? DONE : baseIter.next(); if (current != DONE) { return current = current + sliceStartIdx; } if (sliceEndIdx >= text.length()) { current = prevCurrent; // keep current where it is return DONE; } sliceStartIdx = sliceEndIdx + 1; sliceEndIdx = text.indexOf(sliceChar, sliceStartIdx); if (sliceEndIdx == -1) { sliceEndIdx = text.length(); } if (sliceStartIdx == sliceEndIdx) { return current = sliceStartIdx; } baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); return current = sliceStartIdx + baseIter.current(); // use current() since at first() already } @Override public int previous() { // note: closely follows next() but reversed int prevCurrent = current; current = sliceStartIdx == sliceEndIdx ? DONE : baseIter.previous(); if (current != DONE) { return current = current + sliceStartIdx; } if (sliceStartIdx == 0) { current = prevCurrent; // keep current where it is return DONE; } sliceEndIdx = sliceStartIdx - 1; sliceStartIdx = text.lastIndexOf(sliceChar, sliceEndIdx - 1); if (sliceStartIdx == -1) { sliceStartIdx = 0; } else { sliceStartIdx++; // past sliceChar } if (sliceStartIdx == sliceEndIdx) { return current = sliceStartIdx; } baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); return current = sliceStartIdx + baseIter.last(); } @Override public int following(int offset) { // if the offset is not in this slice, update the slice if (offset + 1 < sliceStartIdx || offset + 1 > sliceEndIdx) { if (offset == text.length()) { // DONE condition last(); // because https://bugs.openjdk.java.net/browse/JDK-8015110 return DONE; } sliceStartIdx = text.lastIndexOf(sliceChar, offset); // no +1 if (sliceStartIdx == -1) { sliceStartIdx = 0; } else { sliceStartIdx++; // move past separator } sliceEndIdx = text.indexOf(sliceChar, Math.max(offset + 1, sliceStartIdx)); if (sliceEndIdx == -1) { sliceEndIdx = text.length(); } if (sliceStartIdx != sliceEndIdx) { // otherwise, adjacent separator or separator at end baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); } } // lookup following() in this slice: if (sliceStartIdx == sliceEndIdx) { return current = offset + 1; } else { // note: following() can never be first() if the first character is a boundary (it usually // is). // So we have to check if we should call first() instead of following(): if (offset == sliceStartIdx - 1) { // the first boundary following this offset is the very first boundary in this slice return current = sliceStartIdx + baseIter.first(); } else { return current = sliceStartIdx + baseIter.following(offset - sliceStartIdx); } } } @Override public int preceding(int offset) { // note: closely follows following() but reversed if (offset - 1 < sliceStartIdx || offset - 1 > sliceEndIdx) { if (offset == 0) { // DONE condition first(); // because https://bugs.openjdk.java.net/browse/JDK-8015110 return DONE; } sliceEndIdx = text.indexOf(sliceChar, offset); // no -1 if (sliceEndIdx == -1) { sliceEndIdx = text.length(); } sliceStartIdx = text.lastIndexOf(sliceChar, offset - 1); if (sliceStartIdx == -1) { sliceStartIdx = 0; } else { sliceStartIdx = Math.min(sliceStartIdx + 1, sliceEndIdx); } if (sliceStartIdx != sliceEndIdx) { // otherwise, adjacent separator or separator at end baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); } } // lookup preceding() in this slice: if (sliceStartIdx == sliceEndIdx) { return current = offset - 1; } else { // note: preceding() can never be last() if the last character is a boundary (it usually is). // So we have to check if we should call last() instead of preceding(): if (offset == sliceEndIdx + 1) { // the last boundary preceding this offset is the very last boundary in this slice return current = sliceStartIdx + baseIter.last(); } else { return current = sliceStartIdx + baseIter.preceding(offset - sliceStartIdx); } } } @Override public int next(int n) { if (n < 0) { for (int i = 0; i < -n; i++) { if (previous() == DONE) { return DONE; } } } else { for (int i = 0; i < n; i++) { if (next() == DONE) { return DONE; } } } return current(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy