All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.carrot2.text.preprocessing.CaseNormalizerDescriptor Maven / Gradle / Ivy

Go to download

Carrot2 search results clustering framework. Minimal functional subset (core algorithms and infrastructure, no document sources).

There is a newer version: 3.16.3
Show newest version


// APT-generated file.

package org.carrot2.text.preprocessing;

//Imported for JavaDoc references mostly.
import org.carrot2.util.attribute.*;

import java.util.*;
import javax.annotation.*;

/**
 * Metadata and attributes of the {@link org.carrot2.text.preprocessing.CaseNormalizer} component. You can use 
 * this descriptor to obtain metadata, such as human readable name and description, about the component 
 * as a whole as well as about its attributes. Using the {@link #attributeBuilder(Map)}
 * you can obtain a builder for type-safe generation of the attribute maps. Please see the
 * main overview for a complete code example. 
 */
@Generated("Generated from org.carrot2.text.preprocessing.CaseNormalizer")
public final class CaseNormalizerDescriptor implements IBindableDescriptor
{
    /**
     * The component class for which this descriptor was generated. 
     */
    public final String bindableClassName = "org.carrot2.text.preprocessing.CaseNormalizer";

    /**
     * Attribute prefix used by the component.
     */
    public final String prefix = "CaseNormalizer";

    /**
     * A one sentence summary of the component. It could be presented as a header of the tool
     * tip of the corresponding UI component.
     */
    public final String title = "Performs case normalization and calculates a number of frequency statistics for words";
    
    /**
     * A short label for the component. It can be presented as a label of the
     * corresponding UI component.
     */
    public final String label = "";

    /**
     * A longer, possibly multi sentence, description of the component. It could be presented
     * as a body of the tool tip of the corresponding UI component.
     */
    public final String description = "The aim of case normalization is to find the most frequently appearing variants of words in terms of case. For example, if in the input documents MacOS appears 20 times, Macos 5 times and macos 2 times, case normalizer will select MacOS to represent all variants and assign the aggregated term frequency of 27 to it. 

This class saves the following results to the PreprocessingContext:

  • AllTokens.wordIndex
  • AllWords.image
  • AllWords.tf
  • AllWords.tfByDocument

This class requires that Tokenizer be invoked first."; /** * Attributes of the component. Note that only statically reachable fields are included. * Additional attributes may be available at run time. */ public final static Attributes attributes; /** * Attributes declared directly by the component. */ private final static Set ownAttributes; /** * Attributes declared by the component or its superclasses. */ private final static Set allAttributes; /** * Attributes declared by the component or its superclasses, lookup dictionary * by attribute key. */ private final static Map allAttributesByKey; /** * Attributes declared by the component or its superclasses, lookup dictionary by * attribute's field name. */ private final static Map allAttributesByFieldName; /** * Static initializer for internal collections. */ static { attributes = new Attributes(); final Set ownAttrs = new HashSet(); ownAttrs.add(attributes.dfThreshold); final Set allAttrs = new HashSet(); allAttrs.add(org.carrot2.text.preprocessing.CaseNormalizerDescriptor.attributes.dfThreshold); allAttributes = Collections.unmodifiableSet(allAttrs); ownAttributes = Collections.unmodifiableSet(ownAttrs); final Map allAttrsByKey = new HashMap(); final Map allAttrsByFieldName = new HashMap(); for (AttributeInfo ai : allAttrs) { allAttrsByKey.put(ai.key, ai); allAttrsByFieldName.put(ai.fieldName, ai); } allAttributesByKey = Collections.unmodifiableMap(allAttrsByKey); allAttributesByFieldName = Collections.unmodifiableMap(allAttrsByFieldName); } /* Attribute keys. */ /** * Constants for all attribute keys of the {@link org.carrot2.text.preprocessing.CaseNormalizer} component. */ public static class Keys { protected Keys() {} /** Attribute key for: {@link org.carrot2.text.preprocessing.CaseNormalizer#dfThreshold}. */ public static final String DF_THRESHOLD = "CaseNormalizer.dfThreshold"; } /* Attribute descriptors. */ /** * All attributes of the {@link org.carrot2.text.preprocessing.CaseNormalizer} component. */ public static final class Attributes { private Attributes() { /* No public instances. */ } /** * */ public final AttributeInfo dfThreshold = new AttributeInfo( "CaseNormalizer.dfThreshold", "org.carrot2.text.preprocessing.CaseNormalizer", "dfThreshold", "Word Document Frequency threshold. Words appearing in fewer than\ndfThreshold documents will be ignored.", "Word document frequency threshold", "Word Document Frequency threshold", "Words appearing in fewer than dfThreshold documents will be ignored.", "Preprocessing", org.carrot2.util.attribute.AttributeLevel.ADVANCED, null ); } /** * Attribute map builder for the {@link org.carrot2.text.preprocessing.CaseNormalizer} component. You can use this * builder as a type-safe alternative to populating the attribute map using attribute keys. */ public static class AttributeBuilder { /** The attribute map populated by this builder. */ public final Map map; /** * Creates a builder backed by the provided map. */ protected AttributeBuilder(Map map) { this.map = map; } /** * Word Document Frequency threshold. Words appearing in fewer than dfThreshold documents will be ignored. * * @see org.carrot2.text.preprocessing.CaseNormalizer#dfThreshold */ public AttributeBuilder dfThreshold(int value) { map.put("CaseNormalizer.dfThreshold", value); return this; } /** * Word Document Frequency threshold. Words appearing in fewer than dfThreshold documents will be ignored. * * @see org.carrot2.text.preprocessing.CaseNormalizer#dfThreshold */ public AttributeBuilder dfThreshold(IObjectFactory value) { map.put("CaseNormalizer.dfThreshold", value); return this; } } /** * Creates an attribute map builder for the component. You can use this * builder as a type-safe alternative to populating the attribute map using attribute keys. * * @param attributeValues An existing map which should be used to collect attribute values. * Attribute values set by this builder will be added to the provided map, overwriting * previously defined mappings, if any. */ public static AttributeBuilder attributeBuilder(Map attributeValues) { return new AttributeBuilder(attributeValues); } /* IBindableDescriptor */ @Override public String getPrefix() { return prefix; } @Override public String getTitle() { return title; } @Override public String getLabel() { return label; } @Override public String getDescription() { return description; } @Override public Set getOwnAttributes() { return ownAttributes; } @Override public Set getAttributes() { return allAttributes; } @Override public Map getAttributesByKey() { return allAttributesByKey; } @Override public Map getAttributesByFieldName() { return allAttributesByFieldName; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy