All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.hibernate.search.engine.backend.analysis.AnalyzerNames Maven / Gradle / Ivy

The newest version!
/*
 * SPDX-License-Identifier: Apache-2.0
 * Copyright Red Hat Inc. and Hibernate Authors
 */
package org.hibernate.search.engine.backend.analysis;

/**
 * Constants for the names of built-in analyzers.
 */
public final class AnalyzerNames {

	private AnalyzerNames() {
	}

	/**
	 * The default analyzer.
	 * 

* This analyzer will generally be used for full-text field that don't require specific analysis. *

* Unless overridden by explicit analysis configuration, the default analyzer will be the standard analyzer: *

    *
  • First, tokenize using the standard tokenizer, which follows Word Break rules from the * Unicode Text Segmentation algorithm, as specified in * Unicode Standard Annex #29.
  • *
  • Then, lowercase each token.
  • *
*/ public static final String DEFAULT = "default"; /** * The standard analyzer. *

* Unless overridden by explicit analysis configuration, this analyzer behaves as follows: *

    *
  • First, tokenize using the standard tokenizer, which follows Word Break rules from the * Unicode Text Segmentation algorithm, as specified in * Unicode Standard Annex #29.
  • *
  • Then, lowercase each token.
  • *
*/ public static final String STANDARD = "standard"; /** * The simple analyzer. *

* Unless overridden by explicit analysis configuration, this analyzer behaves as follows: *

    *
  • First, tokenize by assuming non-letter characters are separators.
  • *
  • Then, lowercase each token.
  • *
*/ public static final String SIMPLE = "simple"; /** * The whitespace analyzer. *

* Unless overridden by explicit analysis configuration, this analyzer behaves as follows: *

    *
  • First, tokenize by assuming whitespace characters are separators.
  • *
  • Do not change the tokens.
  • *
*/ public static final String WHITESPACE = "whitespace"; /** * The stop analyzer. *

* Unless overridden by explicit analysis configuration, this analyzer behaves as follows: *

    *
  • First, tokenize by assuming non-letter characters are separators.
  • *
  • Then, lowercase each token.
  • *
  • finally, remove english stop words.
  • *
*/ public static final String STOP = "stop"; /** * The keyword analyzer. *

* Unless overridden by explicit analysis configuration, this analyzer does not change the text in any way. *

* With this analyzer, a full text field would behave similarly to a keyword field, * but with fewer features: no terms aggregations, for example. *

* Consider using a keyword field instead. */ public static final String KEYWORD = "keyword"; }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy