All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.topologi.diffx.load.text.TokenizerFactory Maven / Gradle / Ivy

Go to download

docx4j is a library which helps you to work with the Office Open XML file format as used in docx documents, pptx presentations, and xlsx spreadsheets.

There is a newer version: 6.1.2
Show newest version
/*
 * This file is part of the DiffX library.
 *
 * For licensing information please see the file license.txt included in the release.
 * A copy of this licence can also be found at
 *   http://www.opensource.org/licenses/artistic-license-2.0.php
 */
package com.topologi.diffx.load.text;

import com.topologi.diffx.config.DiffXConfig;
import com.topologi.diffx.config.TextGranularity;

/**
 * Factory for tokenizers.
 * 
 * 

This class is designed to returned tokenizers that corresponds to the given configuration. * * @author Christophe Lauret * @version 10 May 2010 */ public final class TokenizerFactory { /** * Creates a factory for tokenizers. * * @throws NullPointerException If the configuration is null. */ private TokenizerFactory() { } /** * Returns the text tokenizer. * * @param config The configuration to use. * * @return the corresponding tokenizer. * * @throws NullPointerException If the configuration is null. */ public static TextTokenizer get(DiffXConfig config) { if (config == null) throw new NullPointerException("The config should be specified"); TextGranularity granularity = config.getGranularity(); switch (granularity) { case CHARACTER: return new TokenizerByChar(); case WORD: return new TokenizerByWord(config.getWhiteSpaceProcessing()); case TEXT: return new TokenizerByText(config.getWhiteSpaceProcessing()); default: throw new IllegalArgumentException("Unsupported text granularity "+granularity); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy