All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dspace.search.DSTokenizer Maven / Gradle / Ivy

/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.search;

import java.io.Reader;

import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.util.Version;

/**
 * Customized Lucene Tokenizer, since the standard one rejects numbers from
 * indexing/querying.
 */
public final class DSTokenizer extends CharTokenizer
{
    /**
     * Construct a new LowerCaseTokenizer.
     * @param version Lucene version number
     */
    public DSTokenizer(Version version, Reader in)
    {
        super(version, in);
    }

    /**
     * Collects only characters which satisfy {@link Character#isLetter(char)}.
     */
    @Override
    protected int normalize(int c) {
        return super.normalize(Character.toLowerCase(c));
    }

    /**
     * Collects only characters which do not satisfy
     * {@link Character#isWhitespace(char)}.
     */
    @Override
    protected boolean isTokenChar(int c)
    {
        return Character.isLetterOrDigit(c);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy