All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.wcohen.ss.tokens.SerializableSimpleTokenizer Maven / Gradle / Ivy

package com.wcohen.ss.tokens;

import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;

import java.io.Serializable;
import java.util.*;

public class SerializableSimpleTokenizer implements Tokenizer, Serializable {
    public static final SerializableSimpleTokenizer DEFAULT_TOKENIZER = new SerializableSimpleTokenizer(true,true);

    private boolean ignorePunctuation = true;
    private boolean ignoreCase = true;

    public SerializableSimpleTokenizer(boolean ignorePunctuation,boolean ignoreCase) {
        this.ignorePunctuation = ignorePunctuation;
        this.ignoreCase = ignoreCase;
    }

    // parameter setting
    public void setIgnorePunctuation(boolean flag)  { ignorePunctuation = flag; }
    public void setIgnoreCase(boolean flag)  { ignoreCase = flag; }
    public String toString() { return "[SimpleTokenizer "+ignorePunctuation+";"+ignoreCase+"]"; }

    /**  Return tokenized version of a string.  Tokens are sequences
     * of alphanumerics, or any single punctuation character. */
    public Token[] tokenize(String input)
    {
        List tokens = new ArrayList();
        int cursor = 0;
        while (cursor




© 2015 - 2025 Weber Informatics LLC | Privacy Policy