All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Repository.Stopwords.StopWords Maven / Gradle / Ivy

The newest version!
package io.github.repir.Repository.Stopwords;

import io.github.repir.tools.Words.StopWordsSmart;
import io.github.repir.tools.Words.StopWordsLetter;
import io.github.repir.tools.lib.Log;
import java.util.HashSet;
import io.github.repir.Repository.Repository;
import io.github.repir.tools.extract.DefaultTokenizer;
import java.util.ArrayList;

/**
 * List of stop words, which is not stored as a feature, but rather configured.
 *
 * @author jeroen
 */
public class StopWords extends io.github.repir.tools.Words.StopWords {

    public static Log log = new Log(StopWords.class);
    private static HashSet intfilterset;
    private Repository repository;

    private StopWords(Repository r) {
        super(r.getConf());
        repository = r;
    }

    public static StopWords get(Repository r) {
        if (singleton == null || 
                !(singleton instanceof StopWords) || 
                ((StopWords)singleton).repository != r) {
            singleton = new StopWords(r);
        }
        return (StopWords)singleton;
    }

    public HashSet getIntSet() {
        if (intfilterset == null) {
            intfilterset = new HashSet();
            for (String t : this.getStemmedFilterSet()) {
                int i = repository.termToID(t);
                if (i >= 0) {
                    intfilterset.add(i);
                }
            }
        }
        return intfilterset;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy