All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.lionsoul.jcseg.analyzer.JcsegTokenizerFactory Maven / Gradle / Ivy

There is a newer version: 2.6.3
Show newest version
package org.lionsoul.jcseg.analyzer;

import java.io.IOException;
import java.util.Map;

import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import org.lionsoul.jcseg.tokenizer.core.ADictionary;
import org.lionsoul.jcseg.tokenizer.core.DictionaryFactory;
import org.lionsoul.jcseg.tokenizer.core.JcsegException;
import org.lionsoul.jcseg.tokenizer.core.JcsegTaskConfig;

/**
 * jcseg tokenizer factory class for solr
 * 
 * @author chenxin
 */
public class JcsegTokenizerFactory extends TokenizerFactory 
{
    
    private int mode;
    private JcsegTaskConfig config = null;
    private ADictionary dic = null;

    /**
     * set the mode arguments in the schema.xml 
     *     configuration file to change the segment mode for jcseg
     * 
     * @see TokenizerFactory#TokenizerFactory(Map)
     */
    public JcsegTokenizerFactory(Map args)
    {
        super(args);
        
        String _mode = args.get("mode");
        if ( _mode == null ) {
            mode = JcsegTaskConfig.SEARCH_MODE;
        } else {
            _mode = _mode.toLowerCase();
            if ( "simple".equals(_mode) ) {
                mode = JcsegTaskConfig.SIMPLE_MODE;
            } else if ( "detect".equals(_mode) ) {
                mode = JcsegTaskConfig.DETECT_MODE;
            } else if ( "search".equals(_mode) ) {
                mode = JcsegTaskConfig.SEARCH_MODE;
            } else if ( "nlp".equals(_mode) ){
                mode = JcsegTaskConfig.NLP_MODE;
            } else if ( "delimiter".equals(_mode) ) {
                mode = JcsegTaskConfig.DELIMITER_MODE;
            } else {
                mode = JcsegTaskConfig.COMPLEX_MODE;
            }
        }
        
        //initialize the task configuration and the dictionary
        config = new JcsegTaskConfig(true);
        dic = DictionaryFactory.createSingletonDictionary(config);
    }
    
    public void setConfig( JcsegTaskConfig config ) 
    {
        this.config = config;
    }
    
    public void setDict( ADictionary dic ) 
    {
        this.dic = dic;
    }
    
    public JcsegTaskConfig getTaskConfig() 
    {
        return config;
    }
    
    public ADictionary getDict()
    {
        return dic;
    }

    @Override
    public Tokenizer create( AttributeFactory factory ) 
    {
        try {
            return new JcsegTokenizer(mode, config, dic);
        } catch (JcsegException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        
        return null;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy