com.bigdata.search.NeedsConfiguringAnalyzerFactory Maven / Gradle / Ivy

Go to download
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on May 6, 2014 by Jeremy J. Carroll, Syapse Inc.
 */
package com.bigdata.search;

import java.io.IOException;
import java.io.StringReader;
import java.lang.reflect.Constructor;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.WeakHashMap;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.pattern.PatternTokenizer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;

import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.search.ConfigurableAnalyzerFactory.AnalyzerOptions;
import com.bigdata.search.ConfigurableAnalyzerFactory.Options;


/**
 * 
 * The bulk of the code in this class is invoked from {@link #init()} to set up the array of
 *  {@link ConfiguredAnalyzerFactory.AnalyzerPair}s. For example, all of the subclasses of {@link AnalyzerPair}s,
 *  are simply to call the appropriate constructor in the appropriate way: the difficulty is that many subclasses
 *  of {@link Analyzer} have constructors with different signatures, and our code needs to navigate each sort.
 * @author jeremycarroll
 *
 */
class NeedsConfiguringAnalyzerFactory implements IAnalyzerFactory {
	final private static transient Logger log = Logger.getLogger(NeedsConfiguringAnalyzerFactory.class);
	
	/**
	 * We create only one {@link ConfiguredAnalyzerFactory} per namespace
	 * and store it here. The UUID is stable and allows us to side-step lifecycle
	 * issues such as creation and destruction of namespaces, potentially with different properties.
	 * We use a WeakHashMap to ensure that after the destruction of a namespace we clean up.
	 * We have to synchronize this for thread safety.
	 */
    private static final Map allConfigs = 
    		Collections.synchronizedMap(new WeakHashMap());


	private static final String ALL_LUCENE_NATURAL_LANGUAGES =  
			"com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.*.like=eng\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.por.analyzerClass=org.apache.lucene.analysis.br.BrazilianAnalyzer\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.pt.like=por\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.zho.analyzerClass=org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.chi.like=zho\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.zh.like=zho\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.jpn.analyzerClass=org.apache.lucene.analysis.cjk.CJKAnalyzer\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ja.like=jpn\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.kor.like=jpn\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ko.like=kor\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ces.analyzerClass=org.apache.lucene.analysis.cz.CzechAnalyzer\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.cze.like=ces\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.cs.like=ces\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.dut.analyzerClass=org.apache.lucene.analysis.nl.DutchAnalyzer\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.nld.like=dut\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.nl.like=dut\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.deu.analyzerClass=org.apache.lucene.analysis.de.GermanAnalyzer\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ger.like=deu\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.de.like=deu\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.gre.analyzerClass=org.apache.lucene.analysis.el.GreekAnalyzer\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ell.like=gre\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.el.like=gre\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.rus.analyzerClass=org.apache.lucene.analysis.ru.RussianAnalyzer\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.ru.like=rus\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.tha.analyzerClass=org.apache.lucene.analysis.th.ThaiAnalyzer\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.th.like=tha\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.eng.analyzerClass=org.apache.lucene.analysis.standard.StandardAnalyzer\n" +
		    "com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.en.like=eng\n";

	private static final String LUCENE_STANDARD_ANALYZER = 
			"com.bigdata.search.ConfigurableAnalyzerFactory.analyzer.*.analyzerClass=org.apache.lucene.analysis.standard.StandardAnalyzer\n";
	
	static int loggerIdCounter = 0;

	/**
	 * This class and all its subclasses provide a variety of patterns
	 * for mapping from the various constructor patterns of subclasses
	 * of {@link Analyzer} to {@link ConfiguredAnalyzerFactory#AnalyzerPair}.
	 * @author jeremycarroll
	 *
	 */
	private static class AnalyzerPair extends ConfiguredAnalyzerFactory.AnalyzerPair {
		
    	AnalyzerPair(String range, Analyzer withStopWords, Analyzer withOutStopWords) {
    		super(range, withStopWords, withOutStopWords);
    	}
    	
    	/**
    	 * This clone constructor implements {@link AnalyzerOptions#LIKE}.
    	 * @param range
    	 * @param copyMe
    	 */
    	AnalyzerPair(String range, AnalyzerPair copyMe) {
    		super(range, copyMe);
    	}
		
    	/**
    	 * If we have a constructor, with arguments including a populated
    	 * stop word set, then we can use it to make both the withStopWords
    	 * analyzer, and the withoutStopWords analyzer.
    	 * @param range
    	 * @param cons A Constructor including a {@link java.util.Set} argument
    	 *  for the stop words.
    	 * @param params The arguments to pass to the constructor including a populated stopword set.
    	 * @throws Exception
    	 */
    	AnalyzerPair(String range, Constructor cons, Object ... params) throws Exception {
    		this(range, cons.newInstance(params), cons.newInstance(useEmptyStopWordSet(params)));
    	}
    	AnalyzerPair(String range, Analyzer stopWordsNotSupported) {
    		this(range, stopWordsNotSupported, stopWordsNotSupported);    		
    	}
		private static Object[] useEmptyStopWordSet(Object[] params) {
			Object rslt[] = new Object[params.length];
			for (int i=0; i cls) throws Exception {
			super(lro.languageRange, getConstructor(cls, Version.class, Set.class), Version.LUCENE_CURRENT, lro.getStopWords());
		}
	}

	/**
	 * Used for Analyzer classes which do not support stopwords and have a constructor with signature (Version).
	 * @author jeremycarroll
	 *
	 */
	private static class VersionAnalyzerPair extends AnalyzerPair {
		public VersionAnalyzerPair(String range, Class cls) throws Exception {
			super(range, getConstructor(cls, Version.class).newInstance(Version.LUCENE_CURRENT));
		}
	}
	
	/**
	 * Special case code for {@link PatternTokenizer}
	 * @author jeremycarroll
	 *
	 */
    private static class PatternAnalyzer extends AnalyzerPair {
		public PatternAnalyzer(ConfigOptionsToAnalyzer lro, Pattern pattern, CharArraySet stopWords) throws Exception {
			/*
			super(lro.languageRange, getConstructor(PatternAnalyzerImpl.class,Pattern.class, CharArraySet.class), 
				pattern, stopWords);
				*/
			super(lro.languageRange, new PatternAnalyzerImpl(pattern, stopWords), new PatternAnalyzerImpl(pattern, CharArraySet.EMPTY_SET));
		}

		public PatternAnalyzer(ConfigOptionsToAnalyzer lro, Pattern pattern) throws Exception {
			super(lro.languageRange, new PatternAnalyzerImpl(pattern, CharArraySet.EMPTY_SET));
		}
	}
    
    /**
	 * This class is initialized with the config options, using the {@link #setProperty(String, String)}
	 * method, for a particular language range and works out which pair of {@link Analyzer}s
	 * to use for that language range.
	 * 

	 * Instances of this class are only alive during the execution of 
	 * {@link NeedsConfiguringAnalyzerFactory#ConfigurableAnalyzerFactory(FullTextIndex)},
	 * the life-cycle is:
	 * 

	 * The relveant config properties are applied, and are used to populate the fields.
	 * 
The fields are validated
	 * 
An {@link AnalyzerPair} is constructed
	 * 
	 * 
	 * @author jeremycarroll
	 *
	 */
    private static class ConfigOptionsToAnalyzer {
    	
    	String like;
    	String className;
    	String stopwords;
    	Pattern pattern;
    	final String languageRange;
    	AnalyzerPair result;
		Pattern wordBoundary;
		Pattern subWordBoundary;
		Pattern softHyphens;
		Boolean alwaysRemoveSoftHyphens;

		public ConfigOptionsToAnalyzer(String languageRange) {
			this.languageRange = languageRange;
		}

		/**
		 * This is called only when we have already identified that
		 * the class does support stopwords.
		 * @return
		 */
		public Set getStopWords() {
			
			if (doNotUseStopWords()) 
				return CharArraySet.EMPTY_SET;
			
			if (useDefaultStopWords()) {
				return getStopWordsForClass(className);
			}
			
			return getStopWordsForClass(stopwords);
		}

		boolean doNotUseStopWords() {
			return AnalyzerOptions.STOPWORDS_VALUE_NONE.equals(stopwords) || (stopwords == null && pattern != null);
		}

		protected Set getStopWordsForClass(String clazzName) {
			Class analyzerClass = getAnalyzerClass(clazzName);
			try {
				return (Set) analyzerClass.getMethod("getDefaultStopSet").invoke(null);
			} catch (Exception e) {
				if (StandardAnalyzer.class.equals(analyzerClass)) {
					return StandardAnalyzer.STOP_WORDS_SET;
				}
				if (StopAnalyzer.class.equals(analyzerClass)) {
					return StopAnalyzer.ENGLISH_STOP_WORDS_SET;
				}
				throw new RuntimeException("Failed to find stop words from " + clazzName + " for language range "+languageRange);
			}
		}

		protected boolean useDefaultStopWords() {
			return ( stopwords == null && pattern == null ) || AnalyzerOptions.STOPWORDS_VALUE_DEFAULT.equals(stopwords);
		}

		/**
		 * The first step in the life-cycle, used to initialize the fields.
		 * @return true if the property was recognized.
		 */
		public boolean setProperty(String shortProperty, String value) {
			if (shortProperty.equals(AnalyzerOptions.LIKE) ) {
				like = value;
			} else if (shortProperty.equals(AnalyzerOptions.ANALYZER_CLASS) ) {
				className = value;
			} else if (shortProperty.equals(AnalyzerOptions.STOPWORDS) ) {
				stopwords = value;
			} else if (shortProperty.equals(AnalyzerOptions.PATTERN) ) {
				pattern = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS);
			} else if (shortProperty.equals(AnalyzerOptions.WORD_BOUNDARY) ) {
				wordBoundary = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS);
			} else if (shortProperty.equals(AnalyzerOptions.SUB_WORD_BOUNDARY) ) {
				subWordBoundary = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS);
			} else if (shortProperty.equals(AnalyzerOptions.SOFT_HYPHENS) ) {
				softHyphens = Pattern.compile(value,Pattern.UNICODE_CHARACTER_CLASS);
			} else if (shortProperty.equals(AnalyzerOptions.ALWAYS_REMOVE_SOFT_HYPHENS) ) {
				alwaysRemoveSoftHyphens = Boolean.valueOf(value);
			} else {
			   return false;
			}
			return true;
		}

		/**
		 * The second phase of the life-cycle, used for sanity checking.
		 */
		public void validate() {
			if (pattern != null ) {
				if ( className != null && className != PatternTokenizer.class.getName()) {
					throw new RuntimeException("Bad Option: Language range "+languageRange + " with pattern propety for class "+ className);
				}
				className = PatternTokenizer.class.getName();
			}
			if (this.wordBoundary != null  ) {
				if ( className != null && className != TermCompletionAnalyzer.class.getName()) {
					throw new RuntimeException("Bad Option: Language range "+languageRange + " with pattern propety for class "+ className);
				}
				className = TermCompletionAnalyzer.class.getName();
				
				if ( subWordBoundary == null ) {
					subWordBoundary = AnalyzerOptions.DEFAULT_SUB_WORD_BOUNDARY;
				}
				if ( alwaysRemoveSoftHyphens != null && softHyphens == null ) {
					throw new RuntimeException("Bad option: Language range "+languageRange + ": must specify softHypens when setting alwaysRemoveSoftHyphens");		
				}
				if (softHyphens != null && alwaysRemoveSoftHyphens == null) {
					alwaysRemoveSoftHyphens = AnalyzerOptions.DEFAULT_ALWAYS_REMOVE_SOFT_HYPHENS;
				}
				
			} else if ( subWordBoundary != null || softHyphens != null || alwaysRemoveSoftHyphens != null ||
					TermCompletionAnalyzer.class.getName().equals(className) ) {
				throw new RuntimeException("Bad option: Language range "+languageRange + ": must specify wordBoundary for TermCompletionAnalyzer");
			}
			
			if (PatternTokenizer.class.getName().equals(className) && pattern == null ) {
				throw new RuntimeException("Bad Option: Language range "+languageRange + " must specify pattern for PatternTokenizer.");
			}
			if ( (like != null) == (className != null) ) {
				throw new RuntimeException("Bad Option: Language range "+languageRange + " must specify exactly one of implementation class or like.");
			}
			if (stopwords != null && like != null) {
				throw new RuntimeException("Bad Option: Language range "+languageRange + " must not specify stopwords with like.");
			}
			
		}
		
		/**
		 * The third and final phase of the life-cyle used for identifying
		 * the AnalyzerPair.
		 */
		private AnalyzerPair construct() throws Exception {
			if (className == null) {
				return null;
			}
			if (pattern != null) {
				return new PatternAnalyzer(this, pattern);
			}
			if (softHyphens != null) {
				return new AnalyzerPair(
						languageRange,
						new TermCompletionAnalyzer(
								wordBoundary, 
								subWordBoundary, 
								softHyphens, 
								alwaysRemoveSoftHyphens));
			}
			if (wordBoundary != null) {
				return new AnalyzerPair(
						languageRange,
						new TermCompletionAnalyzer(
								wordBoundary, 
								subWordBoundary));
			}
			final Class cls = getAnalyzerClass();
            
            if (hasConstructor(cls, CharArraySet.class)) {
    			return new AnalyzerPair(languageRange,
    					cls.getConstructor(CharArraySet.class).newInstance(new CharArraySet(getStopWords(), false)),
    					cls.getConstructor(CharArraySet.class).newInstance(new CharArraySet(Collections.emptySet(), false)));
            }
            
            if (hasConstructor(cls, Version.class, Set.class)) {

            	// RussianAnalyzer is missing any way to access stop words.
            	if (RussianAnalyzer.class.equals(cls)) {
            		if (useDefaultStopWords()) {
            		    return new AnalyzerPair(languageRange, new RussianAnalyzer(), new RussianAnalyzer(CharArraySet.EMPTY_SET));
            		}
            		if (doNotUseStopWords()) {
            		    return new AnalyzerPair(languageRange,  new RussianAnalyzer(CharArraySet.EMPTY_SET));	
            		}
            	}
            	return new VersionSetAnalyzerPair(this, cls);
            }
            
            if (stopwords != null && !stopwords.equals(AnalyzerOptions.STOPWORDS_VALUE_NONE)) {
            	throw new RuntimeException("Bad option: language range: " + languageRange + " stopwords are not supported by " + className);
            }
            if (hasConstructor(cls, Version.class)) {
            	return new VersionAnalyzerPair(languageRange, cls);
            }
            
            if (hasConstructor(cls)) {
            	return new AnalyzerPair(languageRange, cls.newInstance());
            }
            throw new RuntimeException("Bad option: cannot find constructor for class " + className + " for language range " + languageRange);
		}

		/**
		 * Also part of the third phase of the life-cycle, following the {@link AnalyzerOptions#LIKE}
		 * properties.
		 * @param depth
		 * @param max
		 * @param analyzers
		 * @return
		 */
		AnalyzerPair followLikesToAnalyzerPair(int depth, int max,
				Map analyzers) {
			if (result == null) {
				if (depth == max) {
					throw new RuntimeException("Bad configuration: - 'like' loop for language range " + languageRange);
				}
				ConfigOptionsToAnalyzer next = analyzers.get(like);
				if (next == null) {
					throw new RuntimeException("Bad option: - 'like' not found for language range " + languageRange+ " (not found: '"+ like +"')");	
				}
				result = new AnalyzerPair(languageRange, next.followLikesToAnalyzerPair(depth+1, max, analyzers));
			}
			return result;
		}

		protected Class getAnalyzerClass() {
			return getAnalyzerClass(className);
		}

		@SuppressWarnings("unchecked")
		protected Class getAnalyzerClass(String className2) {
			final Class cls;
			try {
                cls = (Class) Class.forName(className2);
            } catch (ClassNotFoundException e) {
                throw new RuntimeException("Bad option: cannot find class " + className2 + " for language range " + languageRange, e);
            }
			return cls;
		}

		void setAnalyzerPair(AnalyzerPair ap) {
			result = ap;
		}
	}
    
    		
    private final FullTextIndex fullTextIndex;

	private final ConfigurableAnalyzerFactory parent;
    
    
    NeedsConfiguringAnalyzerFactory(ConfigurableAnalyzerFactory parent, final FullTextIndex fullTextIndex) {
        if (fullTextIndex == null)
            throw new IllegalArgumentException();
		this.fullTextIndex = fullTextIndex;
		this.parent = parent;
		
    }

	private ConfiguredAnalyzerFactory init() {
        
        UUID uuid = fullTextIndex.getIndex().getIndexMetadata().getIndexUUID();
        
        ConfiguredAnalyzerFactory configuration = allConfigs.get(uuid);
        
        if (configuration == null) {
        	
        	// First time for this namespace - we need to analyze the properties
        	// and construct a ConfiguredAnalyzerFactory
	        
	        final Properties properties = initProperties();
	        
	        final Map analyzers = new HashMap();
	        
	        properties2analyzers(properties, analyzers);
	        
	        if (!analyzers.containsKey("*")) {
	        	throw new RuntimeException("Bad config: must specify behavior on language range '*'");
	        }
	        
	        for (ConfigOptionsToAnalyzer a: analyzers.values()) {
	        	a.validate();
	        }
	
	        try {
				for (ConfigOptionsToAnalyzer a: analyzers.values()) {
					a.setAnalyzerPair(a.construct());
				}
			} catch (Exception e) {
				throw new RuntimeException("Cannot construct ConfigurableAnalyzerFactory", e);
			}
	        int sz = analyzers.size();
			for (ConfigOptionsToAnalyzer a: analyzers.values()) {
				a.followLikesToAnalyzerPair(0, sz, analyzers);
			}
			
			AnalyzerPair[] allPairs = new AnalyzerPair[sz];
			int i = 0;
			for (ConfigOptionsToAnalyzer a: analyzers.values()) {
				allPairs[i++] = a.result;
			}
			Arrays.sort(allPairs);
			if (log.isInfoEnabled()) {
				StringBuilder sb = new StringBuilder();
				sb.append("Installed text Analyzer's: ");
				for (AnalyzerPair ap: allPairs) {
					sb.append(ap.toString());
					sb.append(", ");
				}
				log.info(sb.toString());
			}
			configuration = new ConfiguredAnalyzerFactory(allPairs, getDefaultLanguage());
			allConfigs.put(uuid, configuration);
        }
        return configuration;
	}

	private String getDefaultLanguage() {
		
		final IKeyBuilder keyBuilder = fullTextIndex.getKeyBuilder();

		if (keyBuilder.isUnicodeSupported()) {

			// The configured locale for the database.
			final Locale locale = ((KeyBuilder) keyBuilder)
					.getSortKeyGenerator().getLocale();

			// The language for that locale.
			return locale.getLanguage();
			
		} else {
			// Rule, Britannia!
			return "en"; 
			
		}
	}

	private static boolean hasConstructor(Class cls, Class ... parameterTypes) {
		return getConstructor(cls, parameterTypes) != null;
	}

	protected static Constructor getConstructor(Class cls,
			Class... parameterTypes) {
		try {
			return cls.getConstructor(parameterTypes);
		} catch (NoSuchMethodException | SecurityException e) {
			if(log.isDebugEnabled()) {
				log.debug(e);
			}
			return null;
		}
	}

	private void properties2analyzers(Properties props, Map analyzers) {
		
		Enumeration en = props.propertyNames();
		while (en.hasMoreElements()) {
			
			String prop = (String)en.nextElement();
			if (prop.equals(Options.NATURAL_LANGUAGE_SUPPORT)) continue;
			if (prop.startsWith(Options.ANALYZER)) {
				String languageRangeAndProperty[] = prop.substring(Options.ANALYZER.length()).replaceAll("_","*").split("[.]");
				if (languageRangeAndProperty.length == 2) {

					String languageRange = languageRangeAndProperty[0].toLowerCase(Locale.US);  // Turkish "I" could create a problem
					String shortProperty = languageRangeAndProperty[1];
					String value =  props.getProperty(prop);
					log.info("Setting language range: " + languageRange + "/" + shortProperty + " = " + value);
					ConfigOptionsToAnalyzer cons = analyzers.get(languageRange);
					if (cons == null) {
						cons = new ConfigOptionsToAnalyzer(languageRange);
						analyzers.put(languageRange, cons);
					}
					if (cons.setProperty(shortProperty, value)) {
						continue;
					}
				}
			} 
			
			log.warn("Failed to process configuration property: " + prop);
		}
		
	}

	protected Properties initProperties() {
		final Properties parentProperties = fullTextIndex.getProperties();
        Properties myProps;
        if (Boolean.valueOf(parentProperties.getProperty(
        		Options.NATURAL_LANGUAGE_SUPPORT, 
        		Options.DEFAULT_NATURAL_LANGUAGE_SUPPORT))) {
        	
        	myProps = loadPropertyString(ALL_LUCENE_NATURAL_LANGUAGES);
        	
        } else  if (hasPropertiesForStarLanguageRange(parentProperties)){
        	
        	myProps = new Properties();
        	
        } else {
        	
        	myProps = loadPropertyString(LUCENE_STANDARD_ANALYZER);
        }
        
        copyRelevantProperties(fullTextIndex.getProperties(), myProps);
        return myProps;
	}

	Properties loadPropertyString(String props) {
		Properties rslt = new Properties();
		try {
			rslt.load(new StringReader(props));
		} catch (IOException e) {
			throw new RuntimeException("Impossible - well clearly not!", e);
		}
		return rslt;
	}
    
    private void copyRelevantProperties(Properties from, Properties to) {
		Enumeration en = from.propertyNames();
		while (en.hasMoreElements()) {
			String prop = (String)en.nextElement();
			if (prop.startsWith(ConfigurableAnalyzerFactory.class.getName())) {
				to.setProperty(prop, from.getProperty(prop));
			}
		}
	}

    private boolean hasPropertiesForStarLanguageRange(Properties from) {
		Enumeration en = from.propertyNames();
		while (en.hasMoreElements()) {
			String prop = (String)en.nextElement();
			if (prop.startsWith(Options.ANALYZER+"_.") 
					|| prop.startsWith(Options.ANALYZER+"*.")) {
				return true;
			}
		}
		return false;
	}
	@Override
	public Analyzer getAnalyzer(String languageCode, boolean filterStopwords) {
		// delayed initialization of parent
		IAnalyzerFactory realDelegate = init();
		parent.delegate = realDelegate;
		return realDelegate.getAnalyzer(languageCode, filterStopwords);
	}

}