All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.carrot2.text.preprocessing.pipeline.CompletePreprocessingPipeline Maven / Gradle / Ivy

Go to download

Carrot2 search results clustering framework. Minimal functional subset (core algorithms and infrastructure, no document sources).

There is a newer version: 3.16.3
Show newest version

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2012, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.text.preprocessing.pipeline;

import org.carrot2.text.preprocessing.*;
import org.carrot2.util.attribute.Bindable;

/**
 * Performs a complete preprocessing on the provided documents. The preprocessing consists
 * of the following steps:
 * 
    *
  1. {@link Tokenizer#tokenize(PreprocessingContext)}
  2. *
  3. {@link CaseNormalizer#normalize(PreprocessingContext)}
  4. *
  5. {@link LanguageModelStemmer#stem(PreprocessingContext)}
  6. *
  7. {@link StopListMarker#mark(PreprocessingContext)}
  8. *
  9. {@link PhraseExtractor#extractPhrases(PreprocessingContext)}
  10. *
  11. {@link LabelFilterProcessor#process(PreprocessingContext)}
  12. *
  13. {@link DocumentAssigner#assign(PreprocessingContext)}
  14. *
*/ @Bindable(prefix = "PreprocessingPipeline") public class CompletePreprocessingPipeline extends BasicPreprocessingPipeline { /** * Phrase extractor used by the algorithm, contains bindable attributes. */ public final PhraseExtractor phraseExtractor = new PhraseExtractor(); /** * Label filter processor used by the algorithm, contains bindable attributes. */ public final LabelFilterProcessor labelFilterProcessor = new LabelFilterProcessor(); /** * Document assigner used by the algorithm, contains bindable attributes. */ public final DocumentAssigner documentAssigner = new DocumentAssigner(); /** * Performs preprocessing on the provided {@link PreprocessingContext}. */ @Override public void preprocess(PreprocessingContext context) { tokenizer.tokenize(context); caseNormalizer.normalize(context); languageModelStemmer.stem(context); stopListMarker.mark(context); phraseExtractor.extractPhrases(context); labelFilterProcessor.process(context); documentAssigner.assign(context); context.preprocessingFinished(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy