All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.text.StringTokenizer Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.text;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.matcher.StringMatcher;
import org.apache.commons.text.matcher.StringMatcherFactory;

/**
 * Tokenizes a string based on delimiters (separators) and supporting quoting and ignored character concepts.
 * 

* This class can split a String into many smaller strings. It aims to do a similar job to * {@link java.util.StringTokenizer StringTokenizer}, however it offers much more control and flexibility including * implementing the {@code ListIterator} interface. By default, it is set up like {@code StringTokenizer}. *

* The input String is split into a number of tokens. Each token is separated from the next String by a * delimiter. One or more delimiter characters must be specified. *

* Each token may be surrounded by quotes. The quote matcher specifies the quote character(s). A quote may be * escaped within a quoted section by duplicating itself. *

* Between each token and the delimiter are potentially characters that need trimming. The trimmer matcher * specifies these characters. One usage might be to trim whitespace characters. *

* At any point outside the quotes there might potentially be invalid characters. The ignored matcher specifies * these characters to be removed. One usage might be to remove new line characters. *

* Empty tokens may be removed or returned as null. * *

 * "a,b,c"         - Three tokens "a","b","c"   (comma delimiter)
 * " a, b , c "    - Three tokens "a","b","c"   (default CSV processing trims whitespace)
 * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched)
 * 
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
StringTokenizer properties and options
PropertyTypeDefault
delimCharSetMatcher{ \t\n\r\f}
quoteNoneMatcher{}
ignoreNoneMatcher{}
emptyTokenAsNullbooleanfalse
ignoreEmptyTokensbooleantrue
* * @since 1.3 */ public class StringTokenizer implements ListIterator, Cloneable { /** Comma separated values tokenizer internal variable. */ private static final StringTokenizer CSV_TOKENIZER_PROTOTYPE; /** Tab separated values tokenizer internal variable. */ private static final StringTokenizer TSV_TOKENIZER_PROTOTYPE; static { CSV_TOKENIZER_PROTOTYPE = new StringTokenizer(); CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StringMatcherFactory.INSTANCE.commaMatcher()); CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher()); CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher()); CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); TSV_TOKENIZER_PROTOTYPE = new StringTokenizer(); TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StringMatcherFactory.INSTANCE.tabMatcher()); TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher()); TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher()); TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); } /** * Returns a clone of {@code CSV_TOKENIZER_PROTOTYPE}. * * @return a clone of {@code CSV_TOKENIZER_PROTOTYPE}. */ private static StringTokenizer getCSVClone() { return (StringTokenizer) CSV_TOKENIZER_PROTOTYPE.clone(); } /** * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input. * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the * setTrimmer method). *

* You must call a "reset" method to set the string which you want to parse. *

* * @return a new tokenizer instance which parses Comma Separated Value strings */ public static StringTokenizer getCSVInstance() { return getCSVClone(); } /** * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input. * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the * setTrimmer method). * * @param input * the text to parse * @return a new tokenizer instance which parses Comma Separated Value strings */ public static StringTokenizer getCSVInstance(final char[] input) { return getCSVClone().reset(input); } /** * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input. * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the * setTrimmer method). * * @param input * the text to parse * @return a new tokenizer instance which parses Comma Separated Value strings */ public static StringTokenizer getCSVInstance(final String input) { return getCSVClone().reset(input); } /** * Returns a clone of {@code TSV_TOKENIZER_PROTOTYPE}. * * @return a clone of {@code TSV_TOKENIZER_PROTOTYPE}. */ private static StringTokenizer getTSVClone() { return (StringTokenizer) TSV_TOKENIZER_PROTOTYPE.clone(); } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be * trim whitespace from both ends (which can be overridden with the setTrimmer method). *

* You must call a "reset" method to set the string which you want to parse. *

* * @return a new tokenizer instance which parses Tab Separated Value strings. */ public static StringTokenizer getTSVInstance() { return getTSVClone(); } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be * trim whitespace from both ends (which can be overridden with the setTrimmer method). * * @param input * the string to parse * @return a new tokenizer instance which parses Tab Separated Value strings. */ public static StringTokenizer getTSVInstance(final char[] input) { return getTSVClone().reset(input); } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be * trim whitespace from both ends (which can be overridden with the setTrimmer method). * * @param input * the string to parse * @return a new tokenizer instance which parses Tab Separated Value strings. */ public static StringTokenizer getTSVInstance(final String input) { return getTSVClone().reset(input); } /** The text to work on. */ private char[] chars; /** The parsed tokens. */ private String[] tokens; /** The current iteration position. */ private int tokenPos; /** The delimiter matcher. */ private StringMatcher delimMatcher = StringMatcherFactory.INSTANCE.splitMatcher(); /** The quote matcher. */ private StringMatcher quoteMatcher = StringMatcherFactory.INSTANCE.noneMatcher(); /** The ignored matcher. */ private StringMatcher ignoredMatcher = StringMatcherFactory.INSTANCE.noneMatcher(); /** The trimmer matcher. */ private StringMatcher trimmerMatcher = StringMatcherFactory.INSTANCE.noneMatcher(); /** Whether to return empty tokens as null. */ private boolean emptyAsNull; /** Whether to ignore empty tokens. */ private boolean ignoreEmptyTokens = true; /** * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer, but with no text to * tokenize. *

* This constructor is normally used with {@link #reset(String)}. *

*/ public StringTokenizer() { this.chars = null; } /** * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer. * * @param input * the string which is to be parsed, not cloned */ public StringTokenizer(final char[] input) { this.chars = input != null ? input.clone() : null; } /** * Constructs a tokenizer splitting on the specified character. * * @param input * the string which is to be parsed, not cloned * @param delim * the field delimiter character */ public StringTokenizer(final char[] input, final char delim) { this(input); setDelimiterChar(delim); } /** * Constructs a tokenizer splitting on the specified delimiter character and handling quotes using the specified * quote character. * * @param input * the string which is to be parsed, not cloned * @param delim * the field delimiter character * @param quote * the field quoted string character */ public StringTokenizer(final char[] input, final char delim, final char quote) { this(input, delim); setQuoteChar(quote); } /** * Constructs a tokenizer splitting on the specified string. * * @param input * the string which is to be parsed, not cloned * @param delim * the field delimiter string */ public StringTokenizer(final char[] input, final String delim) { this(input); setDelimiterString(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher. * * @param input * the string which is to be parsed, not cloned * @param delim * the field delimiter matcher */ public StringTokenizer(final char[] input, final StringMatcher delim) { this(input); setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher and handling quotes using the specified * quote matcher. * * @param input * the string which is to be parsed, not cloned * @param delim * the field delimiter character * @param quote * the field quoted string character */ public StringTokenizer(final char[] input, final StringMatcher delim, final StringMatcher quote) { this(input, delim); setQuoteMatcher(quote); } /** * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer. * * @param input * the string which is to be parsed */ public StringTokenizer(final String input) { this.chars = input != null ? input.toCharArray() : null; } /** * Constructs a tokenizer splitting on the specified delimiter character. * * @param input * the string which is to be parsed * @param delim * the field delimiter character */ public StringTokenizer(final String input, final char delim) { this(input); setDelimiterChar(delim); } /** * Constructs a tokenizer splitting on the specified delimiter character and handling quotes using the specified * quote character. * * @param input * the string which is to be parsed * @param delim * the field delimiter character * @param quote * the field quoted string character */ public StringTokenizer(final String input, final char delim, final char quote) { this(input, delim); setQuoteChar(quote); } /** * Constructs a tokenizer splitting on the specified delimiter string. * * @param input * the string which is to be parsed * @param delim * the field delimiter string */ public StringTokenizer(final String input, final String delim) { this(input); setDelimiterString(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher. * * @param input * the string which is to be parsed * @param delim * the field delimiter matcher */ public StringTokenizer(final String input, final StringMatcher delim) { this(input); setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher and handling quotes using the specified * quote matcher. * * @param input * the string which is to be parsed * @param delim * the field delimiter matcher * @param quote * the field quoted string matcher */ public StringTokenizer(final String input, final StringMatcher delim, final StringMatcher quote) { this(input, delim); setQuoteMatcher(quote); } /** * Unsupported ListIterator operation. * * @param obj * this parameter ignored. * @throws UnsupportedOperationException * always */ @Override public void add(final String obj) { throw new UnsupportedOperationException("add() is unsupported"); } /** * Adds a token to a list, paying attention to the parameters we've set. * * @param list * the list to add to * @param tok * the token to add */ private void addToken(final List list, String tok) { if (tok == null || tok.isEmpty()) { if (isIgnoreEmptyTokens()) { return; } if (isEmptyTokenAsNull()) { tok = null; } } list.add(tok); } /** * Checks if tokenization has been done, and if not then do it. */ private void checkTokenized() { if (tokens == null) { final List split; if (chars == null) { // still call tokenize as subclass may do some work split = tokenize(null, 0, 0); } else { split = tokenize(chars, 0, chars.length); } tokens = split.toArray(ArrayUtils.EMPTY_STRING_ARRAY); } } /** * Creates a new instance of this Tokenizer. The new instance is reset so that it will be at the start of the token * list. If a {@link CloneNotSupportedException} is caught, return {@code null}. * * @return a new instance of this Tokenizer which has been reset. */ @Override public Object clone() { try { return cloneReset(); } catch (final CloneNotSupportedException ex) { return null; } } /** * Creates a new instance of this Tokenizer. The new instance is reset so that it will be at the start of the token * list. * * @return a new instance of this Tokenizer which has been reset. * @throws CloneNotSupportedException * if there is a problem cloning */ Object cloneReset() throws CloneNotSupportedException { // this method exists to enable 100% test coverage final StringTokenizer cloned = (StringTokenizer) super.clone(); if (cloned.chars != null) { cloned.chars = cloned.chars.clone(); } cloned.reset(); return cloned; } /** * Gets the String content that the tokenizer is parsing. * * @return The string content being parsed */ public String getContent() { if (chars == null) { return null; } return new String(chars); } /** * Gets the field delimiter matcher. * * @return The delimiter matcher in use */ public StringMatcher getDelimiterMatcher() { return this.delimMatcher; } /** * Gets the ignored character matcher. *

* These characters are ignored when parsing the String, unless they are within a quoted region. The default value * is not to ignore anything. *

* * @return The ignored matcher in use */ public StringMatcher getIgnoredMatcher() { return ignoredMatcher; } /** * Gets the quote matcher currently in use. *

* The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data. The * default value is '"' (double quote). *

* * @return The quote matcher in use */ public StringMatcher getQuoteMatcher() { return quoteMatcher; } /** * Gets a copy of the full token list as an independent modifiable array. * * @return The tokens as a String array */ public String[] getTokenArray() { checkTokenized(); return tokens.clone(); } /** * Gets a copy of the full token list as an independent modifiable list. * * @return The tokens as a String list */ public List getTokenList() { checkTokenized(); return new ArrayList<>(Arrays.asList(tokens)); } /** * Gets the trimmer character matcher. *

* These characters are trimmed off on each side of the delimiter until the token or quote is found. The default * value is not to trim anything. *

* * @return The trimmer matcher in use */ public StringMatcher getTrimmerMatcher() { return trimmerMatcher; } /** * Tests whether there are any more tokens. * * @return true if there are more tokens */ @Override public boolean hasNext() { checkTokenized(); return tokenPos < tokens.length; } /** * Tests whether there are any previous tokens that can be iterated to. * * @return true if there are previous tokens */ @Override public boolean hasPrevious() { checkTokenized(); return tokenPos > 0; } /** * Tests whether the tokenizer currently returns empty tokens as null. The default for this property is false. * * @return true if empty tokens are returned as null */ public boolean isEmptyTokenAsNull() { return this.emptyAsNull; } /** * Tests whether the tokenizer currently ignores empty tokens. The default for this property is true. * * @return true if empty tokens are not returned */ public boolean isIgnoreEmptyTokens() { return ignoreEmptyTokens; } /** * Tests if the characters at the index specified match the quote already matched in readNextToken(). * * @param srcChars * the character array being tokenized * @param pos * the position to check for a quote * @param len * the length of the character array being tokenized * @param quoteStart * the start position of the matched quote, 0 if no quoting * @param quoteLen * the length of the matched quote, 0 if no quoting * @return true if a quote is matched */ private boolean isQuote(final char[] srcChars, final int pos, final int len, final int quoteStart, final int quoteLen) { for (int i = 0; i < quoteLen; i++) { if (pos + i >= len || srcChars[pos + i] != srcChars[quoteStart + i]) { return false; } } return true; } /** * Gets the next token. * * @return The next String token * @throws NoSuchElementException * if there are no more elements */ @Override public String next() { if (hasNext()) { return tokens[tokenPos++]; } throw new NoSuchElementException(); } /** * Gets the index of the next token to return. * * @return The next token index */ @Override public int nextIndex() { return tokenPos; } /** * Gets the next token from the String. Equivalent to {@link #next()} except it returns null rather than throwing * {@link NoSuchElementException} when no tokens remain. * * @return The next sequential token, or null when no more tokens are found */ public String nextToken() { if (hasNext()) { return tokens[tokenPos++]; } return null; } /** * Gets the token previous to the last returned token. * * @return The previous token */ @Override public String previous() { if (hasPrevious()) { return tokens[--tokenPos]; } throw new NoSuchElementException(); } /** * Gets the index of the previous token. * * @return The previous token index */ @Override public int previousIndex() { return tokenPos - 1; } /** * Gets the previous token from the String. * * @return The previous sequential token, or null when no more tokens are found */ public String previousToken() { if (hasPrevious()) { return tokens[--tokenPos]; } return null; } /** * Reads character by character through the String to get the next token. * * @param srcChars * the character array being tokenized * @param start * the first character of field * @param len * the length of the character array being tokenized * @param workArea * a temporary work area * @param tokenList * the list of parsed tokens * @return The starting position of the next field (the character immediately after the delimiter), or -1 if end of * string found */ private int readNextToken(final char[] srcChars, int start, final int len, final TextStringBuilder workArea, final List tokenList) { // skip all leading whitespace, unless it is the // field delimiter or the quote character while (start < len) { final int removeLen = Math.max(getIgnoredMatcher().isMatch(srcChars, start, start, len), getTrimmerMatcher().isMatch(srcChars, start, start, len)); if (removeLen == 0 || getDelimiterMatcher().isMatch(srcChars, start, start, len) > 0 || getQuoteMatcher().isMatch(srcChars, start, start, len) > 0) { break; } start += removeLen; } // handle reaching end if (start >= len) { addToken(tokenList, StringUtils.EMPTY); return -1; } // handle empty token final int delimLen = getDelimiterMatcher().isMatch(srcChars, start, start, len); if (delimLen > 0) { addToken(tokenList, StringUtils.EMPTY); return start + delimLen; } // handle found token final int quoteLen = getQuoteMatcher().isMatch(srcChars, start, start, len); if (quoteLen > 0) { return readWithQuotes(srcChars, start + quoteLen, len, workArea, tokenList, start, quoteLen); } return readWithQuotes(srcChars, start, len, workArea, tokenList, 0, 0); } /** * Reads a possibly quoted string token. * * @param srcChars * the character array being tokenized * @param start * the first character of field * @param len * the length of the character array being tokenized * @param workArea * a temporary work area * @param tokenList * the list of parsed tokens * @param quoteStart * the start position of the matched quote, 0 if no quoting * @param quoteLen * the length of the matched quote, 0 if no quoting * @return The starting position of the next field (the character immediately after the delimiter, or if end of * string found, then the length of string */ private int readWithQuotes(final char[] srcChars, final int start, final int len, final TextStringBuilder workArea, final List tokenList, final int quoteStart, final int quoteLen) { // Loop until we've found the end of the quoted // string or the end of the input workArea.clear(); int pos = start; boolean quoting = quoteLen > 0; int trimStart = 0; while (pos < len) { // quoting mode can occur several times throughout a string // we must switch between quoting and non-quoting until we // encounter a non-quoted delimiter, or end of string if (quoting) { // In quoting mode // If we've found a quote character, see if it's // followed by a second quote. If so, then we need // to actually put the quote character into the token // rather than end the token. if (isQuote(srcChars, pos, len, quoteStart, quoteLen)) { if (isQuote(srcChars, pos + quoteLen, len, quoteStart, quoteLen)) { // matched pair of quotes, thus an escaped quote workArea.append(srcChars, pos, quoteLen); pos += quoteLen * 2; trimStart = workArea.size(); continue; } // end of quoting quoting = false; pos += quoteLen; continue; } } else { // Not in quoting mode // check for delimiter, and thus end of token final int delimLen = getDelimiterMatcher().isMatch(srcChars, pos, start, len); if (delimLen > 0) { // return condition when end of token found addToken(tokenList, workArea.substring(0, trimStart)); return pos + delimLen; } // check for quote, and thus back into quoting mode if (quoteLen > 0 && isQuote(srcChars, pos, len, quoteStart, quoteLen)) { quoting = true; pos += quoteLen; continue; } // check for ignored (outside quotes), and ignore final int ignoredLen = getIgnoredMatcher().isMatch(srcChars, pos, start, len); if (ignoredLen > 0) { pos += ignoredLen; continue; } // check for trimmed character // don't yet know if its at the end, so copy to workArea // use trimStart to keep track of trim at the end final int trimmedLen = getTrimmerMatcher().isMatch(srcChars, pos, start, len); if (trimmedLen > 0) { workArea.append(srcChars, pos, trimmedLen); pos += trimmedLen; continue; } } // copy regular character from inside quotes workArea.append(srcChars[pos++]); trimStart = workArea.size(); } // return condition when end of string found addToken(tokenList, workArea.substring(0, trimStart)); return -1; } /** * Throws {@link UnsupportedOperationException} for this unsupported ListIterator operation. * * @throws UnsupportedOperationException * always */ @Override public void remove() { throw new UnsupportedOperationException("remove() is unsupported"); } /** * Resets this tokenizer, forgetting all parsing and iteration already completed. *

* This method allows the same tokenizer to be reused for the same String. *

* * @return this, to enable chaining */ public StringTokenizer reset() { tokenPos = 0; tokens = null; return this; } /** * Resets this tokenizer, giving it a new input string to parse. In this manner you can re-use a tokenizer with the * same settings on multiple input lines. * * @param input * the new character array to tokenize, not cloned, null sets no text to parse * @return this, to enable chaining */ public StringTokenizer reset(final char[] input) { reset(); this.chars = input != null ? input.clone() : null; return this; } /** * Resets this tokenizer, giving it a new input string to parse. In this manner you can re-use a tokenizer with the * same settings on multiple input lines. * * @param input * the new string to tokenize, null sets no text to parse * @return this, to enable chaining */ public StringTokenizer reset(final String input) { reset(); this.chars = input != null ? input.toCharArray() : null; return this; } /** * Throws {@link UnsupportedOperationException} for this unsupported ListIterator operation. * * @param obj * this parameter ignored. * @throws UnsupportedOperationException * always */ @Override public void set(final String obj) { throw new UnsupportedOperationException("set() is unsupported"); } /** * Sets the field delimiter character. * * @param delim * the delimiter character to use * @return this, to enable chaining */ public StringTokenizer setDelimiterChar(final char delim) { return setDelimiterMatcher(StringMatcherFactory.INSTANCE.charMatcher(delim)); } /** * Sets the field delimiter matcher. *

* The delimiter is used to separate one token from another. *

* * @param delim * the delimiter matcher to use * @return this, to enable chaining */ public StringTokenizer setDelimiterMatcher(final StringMatcher delim) { this.delimMatcher = delim == null ? StringMatcherFactory.INSTANCE.noneMatcher() : delim; return this; } /** * Sets the field delimiter string. * * @param delim * the delimiter string to use * @return this, to enable chaining */ public StringTokenizer setDelimiterString(final String delim) { return setDelimiterMatcher(StringMatcherFactory.INSTANCE.stringMatcher(delim)); } /** * Sets whether the tokenizer should return empty tokens as null. The default for this property is false. * * @param emptyAsNull * whether empty tokens are returned as null * @return this, to enable chaining */ public StringTokenizer setEmptyTokenAsNull(final boolean emptyAsNull) { this.emptyAsNull = emptyAsNull; return this; } /** * Sets the character to ignore. *

* This character is ignored when parsing the String, unless it is within a quoted region. *

* * @param ignored * the ignored character to use * @return this, to enable chaining */ public StringTokenizer setIgnoredChar(final char ignored) { return setIgnoredMatcher(StringMatcherFactory.INSTANCE.charMatcher(ignored)); } /** * Sets the matcher for characters to ignore. *

* These characters are ignored when parsing the String, unless they are within a quoted region. *

* * @param ignored * the ignored matcher to use, null ignored * @return this, to enable chaining */ public StringTokenizer setIgnoredMatcher(final StringMatcher ignored) { if (ignored != null) { this.ignoredMatcher = ignored; } return this; } /** * Sets whether the tokenizer should ignore and not return empty tokens. The default for this property is true. * * @param ignoreEmptyTokens * whether empty tokens are not returned * @return this, to enable chaining */ public StringTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) { this.ignoreEmptyTokens = ignoreEmptyTokens; return this; } /** * Sets the quote character to use. *

* The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data. *

* * @param quote * the quote character to use * @return this, to enable chaining */ public StringTokenizer setQuoteChar(final char quote) { return setQuoteMatcher(StringMatcherFactory.INSTANCE.charMatcher(quote)); } /** * Sets the quote matcher to use. *

* The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data. *

* * @param quote * the quote matcher to use, null ignored * @return this, to enable chaining */ public StringTokenizer setQuoteMatcher(final StringMatcher quote) { if (quote != null) { this.quoteMatcher = quote; } return this; } /** * Sets the matcher for characters to trim. *

* These characters are trimmed off on each side of the delimiter until the token or quote is found. * * @param trimmer * the trimmer matcher to use, null ignored * @return this, to enable chaining */ public StringTokenizer setTrimmerMatcher(final StringMatcher trimmer) { if (trimmer != null) { this.trimmerMatcher = trimmer; } return this; } /** * Gets the number of tokens found in the String. * * @return The number of matched tokens */ public int size() { checkTokenized(); return tokens.length; } /** * Internal method to performs the tokenization. *

* Most users of this class do not need to call this method. This method will be called automatically by other * (public) methods when required. *

*

* This method exists to allow subclasses to add code before or after the tokenization. For example, a subclass * could alter the character array, offset or count to be parsed, or call the tokenizer multiple times on multiple * strings. It is also be possible to filter the results. *

*

* {@code StrTokenizer} will always pass a zero offset and a count equal to the length of the array to this * method, however a subclass may pass other values, or even an entirely different array. *

* * @param srcChars * the character array being tokenized, may be null * @param offset * the start position within the character array, must be valid * @param count * the number of characters to tokenize, must be valid * @return The modifiable list of String tokens, unmodifiable if null array or zero count */ protected List tokenize(final char[] srcChars, final int offset, final int count) { if (srcChars == null || count == 0) { return Collections.emptyList(); } final TextStringBuilder buf = new TextStringBuilder(); final List tokenList = new ArrayList<>(); int pos = offset; // loop around the entire buffer while (pos >= 0 && pos < count) { // find next token pos = readNextToken(srcChars, pos, count, buf, tokenList); // handle case where end of string is a delimiter if (pos >= count) { addToken(tokenList, StringUtils.EMPTY); } } return tokenList; } /** * Gets the String content that the tokenizer is parsing. * * @return The string content being parsed */ @Override public String toString() { if (tokens == null) { return "StringTokenizer[not tokenized yet]"; } return "StringTokenizer" + getTokenList(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy