weka.core.stemmers.SnowballStemmer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weka-dev Show documentation
Show all versions of weka-dev Show documentation
The Waikato Environment for Knowledge Analysis (WEKA), a machine
learning workbench. This version represents the developer version, the
"bleeding edge" of development, you could say. New functionality gets added
to this version.
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
/*
* SnowballStemmer.java
* Copyright (C) 2005-2012 University of Waikato, Hamilton, New Zealand
*
*/
package weka.core.stemmers;
import weka.core.ClassDiscovery;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.PluginManager;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.WekaPackageClassLoaderManager;
import java.lang.reflect.Method;
import java.util.Enumeration;
import java.util.List;
import java.util.Vector;
/**
* A wrapper class for the Snowball stemmers. Only
* available if the Snowball classes are in the classpath.
* If the class discovery is not dynamic, i.e., the property 'UseDynamic' in the
* props file 'weka/gui/GenericPropertiesCreator.props' is 'false', then the
* property 'org.tartarus.snowball.SnowballProgram' in the
* 'weka/gui/GenericObjectEditor.props' file has to be uncommented as well. If
* necessary you have to discover and fill in the snowball stemmers manually.
* You can use the 'weka.core.ClassDiscovery' for this:
* java weka.core.ClassDiscovery org.tartarus.snowball.SnowballProgram
* org.tartarus.snowball.ext
*
*
*
* Valid options are:
*
*
*
* -S <name>
* The name of the snowball stemmer (default 'porter').
* available stemmers:
* danish, dutch, english, finnish, french, german, italian,
* norwegian, porter, portuguese, russian, spanish, swedish
*
*
*
*
* @author FracPete (fracpete at waikato dot ac dot nz)
* @version $Revision: 15257 $
*/
public class SnowballStemmer implements Stemmer, OptionHandler {
/** for serialization. */
static final long serialVersionUID = -6111170431963015178L;
/** the package name for snowball. */
public final static String PACKAGE = "org.tartarus.snowball";
/** the package name where the stemmers are located. */
public final static String PACKAGE_EXT = PACKAGE + ".ext";
/** the snowball program, all stemmers are derived from. */
protected final static String SNOWBALL_PROGRAM = PACKAGE + ".SnowballProgram";
/** whether the snowball stemmers are in the Classpath. */
protected static boolean m_Present = false;
/** contains the all the found stemmers (language names). */
protected static Vector m_Stemmers;
/** the current stemmer. */
protected Object m_Stemmer;
/** the stem method. */
protected transient Method m_StemMethod;
/** the setCurrent method. */
protected transient Method m_SetCurrentMethod;
/** the getCurrent method. */
protected transient Method m_GetCurrentMethod;
/** check for Snowball statically (needs only to be done once) */
static {
checkForSnowball();
}
/**
* initializes the stemmer ("porter").
*/
public SnowballStemmer() {
this("porter");
initStemmers();
}
/**
* initializes the stemmer with the given stemmer.
*
* @param name the name of the stemmer
*/
public SnowballStemmer(String name) {
super();
setStemmer(name);
}
/**
* checks whether Snowball is present in the classpath.
*/
private static void checkForSnowball() {
try {
WekaPackageClassLoaderManager.forName(SNOWBALL_PROGRAM);
m_Present = true;
} catch (Exception e) {
m_Present = false;
}
}
/**
* Returns a string describing the stemmer.
*
* @return a description suitable for displaying in the explorer/experimenter
* gui
*/
public String globalInfo() {
return "A wrapper class for the Snowball stemmers. Only available if the "
+ "Snowball classes are in the classpath.\n"
+ "If the class discovery is not dynamic, i.e., the property 'UseDynamic' "
+ "in the props file 'weka/gui/GenericPropertiesCreator.props' is 'false', "
+ "then the property 'org.tartarus.snowball.SnowballProgram' in the "
+ "'weka/gui/GenericObjectEditor.props' file has to be uncommented "
+ "as well. If necessary you have to discover and fill in the snowball "
+ "stemmers manually. You can use the 'weka.core.ClassDiscovery' for this:\n"
+ " java weka.core.ClassDiscovery org.tartarus.snowball.SnowballProgram org.tartarus.snowball.ext\n";
/* + "\n" + "For more information visit these web sites:\n"
+ " http://weka.wikispaces.com/Stemmers\n"
+ " http://snowball.tartarus.org/\n";*/
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
@Override
public Enumeration
© 2015 - 2024 Weber Informatics LLC | Privacy Policy