org.apache.lucene.analysis.miscellaneous.ConcatenatingTokenStream Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.IOUtils;
/**
* A TokenStream that takes an array of input TokenStreams as sources, and
* concatenates them together.
*
* Offsets from the second and subsequent sources are incremented to behave
* as if all the inputs were from a single source.
*
* All of the input TokenStreams must have the same attribute implementations
*/
public final class ConcatenatingTokenStream extends TokenStream {
private final TokenStream[] sources;
private final OffsetAttribute[] sourceOffsets;
private final PositionIncrementAttribute[] sourceIncrements;
private final OffsetAttribute offsetAtt;
private final PositionIncrementAttribute posIncAtt;
private int currentSource;
private int offsetIncrement;
private int initialPositionIncrement = 1;
/**
* Create a new ConcatenatingTokenStream from a set of inputs
* @param sources an array of TokenStream inputs to concatenate
*/
public ConcatenatingTokenStream(TokenStream... sources) {
super(combineSources(sources));
this.sources = sources;
this.offsetAtt = addAttribute(OffsetAttribute.class);
this.posIncAtt = addAttribute(PositionIncrementAttribute.class);
this.sourceOffsets = new OffsetAttribute[sources.length];
this.sourceIncrements = new PositionIncrementAttribute[sources.length];
for (int i = 0; i < sources.length; i++) {
this.sourceOffsets[i] = sources[i].addAttribute(OffsetAttribute.class);
this.sourceIncrements[i] = sources[i].addAttribute(PositionIncrementAttribute.class);
}
}
private static AttributeSource combineSources(TokenStream... sources) {
AttributeSource base = sources[0].cloneAttributes();
try {
for (int i = 1; i < sources.length; i++) {
Iterator> it = sources[i].getAttributeClassesIterator();
while (it.hasNext()) {
base.addAttribute(it.next());
}
// check attributes can be captured
sources[i].copyTo(base);
}
return base;
}
catch (IllegalArgumentException e) {
throw new IllegalArgumentException("Attempted to concatenate TokenStreams with different attribute types", e);
}
}
@Override
public boolean incrementToken() throws IOException {
boolean newSource = false;
while (sources[currentSource].incrementToken() == false) {
if (currentSource >= sources.length - 1)
return false;
sources[currentSource].end();
initialPositionIncrement = sourceIncrements[currentSource].getPositionIncrement();
OffsetAttribute att = sourceOffsets[currentSource];
if (att != null)
offsetIncrement += att.endOffset();
currentSource++;
newSource = true;
}
clearAttributes();
sources[currentSource].copyTo(this);
offsetAtt.setOffset(offsetAtt.startOffset() + offsetIncrement, offsetAtt.endOffset() + offsetIncrement);
if (newSource) {
int posInc = posIncAtt.getPositionIncrement();
posIncAtt.setPositionIncrement(posInc + initialPositionIncrement);
}
return true;
}
@Override
public void end() throws IOException {
sources[currentSource].end();
int finalOffset = sourceOffsets[currentSource].endOffset() + offsetIncrement;
int finalPosInc = sourceIncrements[currentSource].getPositionIncrement();
super.end();
offsetAtt.setOffset(finalOffset, finalOffset);
posIncAtt.setPositionIncrement(finalPosInc);
}
@Override
public void reset() throws IOException {
for (TokenStream source : sources) {
source.reset();
}
super.reset();
currentSource = 0;
offsetIncrement = 0;
}
@Override
public void close() throws IOException {
try {
IOUtils.close(sources);
}
finally {
super.close();
}
}
}