All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.analysis.TokenizerChain Maven / Gradle / Ivy

// original code from Apache Solr - ported to work with Lucene 3.x and reformatted to Search coding style
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.analysis;

import java.io.Reader;

import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;

/**
 * @version $Id: TokenizerChain.java 805263 2009-08-18 02:50:49Z yonik $
 */

//
// An analyzer that uses a tokenizer and a list of token filters to
// create a TokenStream.
//
public class TokenizerChain extends SolrAnalyzer {
	final private CharFilterFactory[] charFilters;
	final private TokenizerFactory tokenizer;
	final private TokenFilterFactory[] filters;

	public TokenizerChain(TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
		this( null, tokenizer, filters );
	}

	public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
		this.charFilters = charFilters;
		this.tokenizer = tokenizer;
		this.filters = filters;
	}

	public CharFilterFactory[] getCharFilterFactories() {
		return charFilters;
	}

	public TokenizerFactory getTokenizerFactory() {
		return tokenizer;
	}

	public TokenFilterFactory[] getTokenFilterFactories() {
		return filters;
	}

	@Override
	public Reader charStream(Reader reader) {
		if ( charFilters != null && charFilters.length > 0 ) {
			CharStream cs = CharReader.get( reader );
			for ( int i = 0; i < charFilters.length; i++ ) {
				cs = charFilters[i].create( cs );
			}
			reader = cs;
		}
		return reader;
	}

	@Override
	public TokenStreamInfo getStream(String fieldName, Reader reader) {
		Tokenizer tk = ( Tokenizer ) tokenizer.create( charStream( reader ) );
		TokenStream ts = tk;
		for ( int i = 0; i < filters.length; i++ ) {
			ts = filters[i].create( ts );
		}
		return new TokenStreamInfo( tk, ts );
	}

	public String toString() {
		StringBuilder sb = new StringBuilder( "TokenizerChain(" );
		for ( CharFilterFactory filter : charFilters ) {
			sb.append( filter );
			sb.append( ", " );
		}
		sb.append( tokenizer );
		for ( TokenFilterFactory filter : filters ) {
			sb.append( ", " );
			sb.append( filter );
		}
		sb.append( ')' );
		return sb.toString();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy