All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.chenlb.mmseg4j.analysis.TokenUtils Maven / Gradle / Ivy

package com.chenlb.mmseg4j.analysis;

import java.io.IOException;

import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;

/**
 * lucene 3.0 从 TokenStream 得到 Token 比较麻烦。
 *
 * @author chenlb 2010-10-7下午10:07:10
 */
public class TokenUtils {

	/**
	 * @param input
	 * @param reusableToken is null well new one auto.
	 * @return null - if not next token or input is null.
	 * @throws IOException
	 */
	public static Token nextToken(TokenStream input, Token reusableToken) throws IOException {
		if(input == null) {
			return null;
		}
		if(!input.incrementToken()) {
			return null;
		}

		CharTermAttribute termAtt = input.getAttribute(CharTermAttribute.class);
		OffsetAttribute offsetAtt = input.getAttribute(OffsetAttribute.class);
		TypeAttribute typeAtt = input.getAttribute(TypeAttribute.class);

		if(reusableToken == null) {
			reusableToken = new Token();
		}

		reusableToken.clear();
		if(termAtt != null) {
			//lucene 3.0
			//reusableToken.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
			//lucene 3.1
			reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length());
		}
		if(offsetAtt != null) {
			//lucene 3.1
			//reusableToken.setStartOffset(offsetAtt.startOffset());
			//reusableToken.setEndOffset(offsetAtt.endOffset());
			//lucene 4.0
			reusableToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
		}

		if(typeAtt != null) {
			reusableToken.setType(typeAtt.type());
		}

		return reusableToken;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy