org.apache.lucene.analysis.ComboTokenStream Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch-analysis-combo Show documentation
Show all versions of elasticsearch-analysis-combo Show documentation
The Combo Analyzer plugin for ElasticSearch provides with a new analyzer type that combines the output of multiple analyzers into one.
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.Attribute;
import java.io.IOException;
import java.util.AbstractQueue;
import java.util.Iterator;
import java.util.PriorityQueue;
/**
* A TokenStream combining the output of multiple sub-TokenStreams.
*
* This class copies the attributes from the last sub-TokenStream that
* was read from. If attributes are not uniform between sub-TokenStreams,
* extraneous attributes will stay untouched.
*/
public class ComboTokenStream extends TokenStream {
/**
* Whether or not to continue with the current TokenStream
* if it has multiple terms at same position, minimizing
* queue moves, or to enforce strict order (position, offsets)
*/
static final boolean KEEP_STREAM_IF_SAME_POSITION = false;
private int lastPosition;
// Position tracked sub-TokenStreams
private final PositionedTokenStream[] positionedTokenStreams;
// Reading queue, using the reading order from PositionedTokenStream
private final AbstractQueue readQueue;
// Flag for lazy initialization and reset
private boolean readQueueResetted;
public ComboTokenStream(TokenStream... tokenStreams) {
// Load the TokenStreams, track their position, and register their attributes
this.positionedTokenStreams = new PositionedTokenStream[tokenStreams.length];
for (int i = tokenStreams.length-1 ; i >= 0 ; --i) {
if (tokenStreams[i] == null) continue;
this.positionedTokenStreams[i] = new PositionedTokenStream(tokenStreams[i]);
// Add each and every token seen in the current sub AttributeSource
Iterator> iterator = this.positionedTokenStreams[i].getAttributeClassesIterator();
while (iterator.hasNext()) {
addAttribute(iterator.next());
}
}
this.lastPosition = 0;
// Create an initially empty queue.
// It will be filled at first incrementToken() call, because
// it needs to call the same function on each sub-TokenStreams.
this.readQueue = new PriorityQueue(tokenStreams.length);
readQueueResetted = false;
}
/*
* TokenStream multiplexed methods
*/
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
// Fill the queue on first call
if (!readQueueResetted) {
readQueueResetted = true;
readQueue.clear();
for (PositionedTokenStream pts : positionedTokenStreams) {
if (pts == null) continue;
// Read first token
pts.clearAttributes();
if (pts.incrementToken()) {
// PositionedTokenStream.incrementToken() initialized internal
// variables to perform proper ordering.
// Therefore we can only add it to the queue now!
readQueue.add(pts);
} // no token left (no token at all)
}
}
// Read from the first token
PositionedTokenStream toRead = readQueue.peek();
if (toRead == null) {
return false; // end of streams
}
// Look position to see if it will be increased, see usage a bit below
int pos = toRead.getPosition();
// Copy the current token attributes from the sub-TokenStream to our AttributeSource
restoreState(toRead.captureState());
// Override the PositionIncrementAttribute
this.getAttribute(PositionIncrementAttribute.class).setPositionIncrement(Math.max(0, pos - lastPosition));
// Prepare next read
// We did not remove the TokenStream from the queue yet,
// because if we have another token available at the same position,
// we can save a queue movement.
toRead.clearAttributes();
if (!toRead.incrementToken()) {
// No more token to read, remove from the queue
readQueue.poll();
} else {
// Check if token position changed
if (readQueue.size() > 1 && (!KEEP_STREAM_IF_SAME_POSITION || toRead.getPosition() != pos)) {
// If yes, re-enter in the priority queue
readQueue.add(readQueue.poll());
} // Otherwise, next call will continue with the same TokenStream (less queue movements)
}
lastPosition = pos;
return true;
}
@Override public void end() throws IOException {
super.end();
lastPosition = 0;
// Apply on each sub-TokenStream
for (PositionedTokenStream pts : positionedTokenStreams) {
if (pts == null) continue;
pts.end();
}
readQueueResetted = false;
readQueue.clear();
}
@Override public void reset() throws IOException {
super.reset();
clearAttributes();
lastPosition = 0;
// Apply on each sub-TokenStream
for (PositionedTokenStream pts : positionedTokenStreams) {
if (pts == null) continue;
pts.reset();
}
readQueueResetted = false;
readQueue.clear();
}
@Override public void close() throws IOException {
super.close();
lastPosition = 0;
// Apply on each sub-TokenStream
for (PositionedTokenStream pts : positionedTokenStreams) {
if (pts == null) continue;
pts.close();
}
readQueueResetted = false;
readQueue.clear();
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy