All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.topologi.diffx.load.text.TokeniserFactory Maven / Gradle / Ivy

Go to download

docx4j is a library which helps you to work with the Office Open XML file format as used in docx documents, pptx presentations, and xlsx spreadsheets.

There is a newer version: 6.1.2
Show newest version
package com.topologi.diffx.load.text;

import com.topologi.diffx.config.DiffXConfig;

/**
 * Factory for tokenisers.
 * 
 * 

This class is designed to returned tokenisers that corresponds to the given * configuration. * * @author Christophe Lauret * @version 27 April 2005 */ public final class TokeniserFactory { // class attributes --------------------------------------------------------------------- /** * Indicates whether the factory should generate namespace events. */ private final DiffXConfig config; /** * The tokeniser to use. * * 0 = consider + preserve * 1 = consider + trash * 2 = ignore + preserve * 3 = ignore + trash */ private final transient int tokeniserChoice; // constructors ------------------------------------------------------------------------- /** * Creates a factory for tokenisers. * * @param config The configuration to use. * * @throws NullPointerException If the configuration is null. */ public TokeniserFactory(DiffXConfig config) throws NullPointerException { if (config == null) throw new NullPointerException("Factory requires a tokeniser."); this.config = config; this.tokeniserChoice = (this.config.isIgnoreWhiteSpace()? 2 : 0) + (this.config.isPreserveWhiteSpace()? 0 : 1); } // methods ------------------------------------------------------------------------------ /** * Returns the text tokeniser for the specified text according to the * configuration of this tokeniser. * * @param text The text to tokenise. * * @return The open element event from the uri and name given. */ public TextTokeniser makeTokeniser(CharSequence text) { if (config.isTokenizeBlocks()) return new TextTokeniserSingleBlock(text); if (config.isTokenizeSentences()) return new TextTokeniserSentence(text); switch(tokeniserChoice) { case 0: // consider + preserve return new TextTokeniserByWord(text); case 1: // consider + trash: we actually preserve for now. return new TextTokeniserByWord(text); case 2: // ignore + preserve return new TextTokeniserIgnoreSpace(text); case 3: // ignore + trash return new TextTokeniserNoSpace(text); default: throw new IllegalStateException("Impossible whitespace configuration: "+tokeniserChoice); } } /** * Returns the configuration used by this factory. * * @return the configuration used by this factory. */ public DiffXConfig getConfig() { return this.config; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy