All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.text.StrTokenizer Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.text;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;

/**
 * Tokenizes a string based on delimiters (separators)
 * and supporting quoting and ignored character concepts.
 * 

* This class can split a String into many smaller strings. It aims * to do a similar job to {@link java.util.StringTokenizer StringTokenizer}, * however it offers much more control and flexibility including implementing * the {@code ListIterator} interface. By default, it is set up * like {@code StringTokenizer}. *

* The input String is split into a number of tokens. * Each token is separated from the next String by a delimiter. * One or more delimiter characters must be specified. *

* Each token may be surrounded by quotes. * The quote matcher specifies the quote character(s). * A quote may be escaped within a quoted section by duplicating itself. *

* Between each token and the delimiter are potentially characters that need trimming. * The trimmer matcher specifies these characters. * One usage might be to trim whitespace characters. *

* At any point outside the quotes there might potentially be invalid characters. * The ignored matcher specifies these characters to be removed. * One usage might be to remove new line characters. *

* Empty tokens may be removed or returned as null. *

 * "a,b,c"         - Three tokens "a","b","c"   (comma delimiter)
 * " a, b , c "    - Three tokens "a","b","c"   (default CSV processing trims whitespace)
 * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched)
 * 
* * * * * * * * * * * * * * * * * * * * * *
StrTokenizer properties and options
PropertyTypeDefault
delimCharSetMatcher{ \t\n\r\f}
quoteNoneMatcher{}
ignoreNoneMatcher{}
emptyTokenAsNullbooleanfalse
ignoreEmptyTokensbooleantrue
* * @since 1.0 * @deprecated Deprecated as of 1.3, use {@link StringTokenizer} instead. This class will be removed in 2.0. */ @Deprecated public class StrTokenizer implements ListIterator, Cloneable { /** Comma separated values tokenizer internal variable. */ private static final StrTokenizer CSV_TOKENIZER_PROTOTYPE; /** Tab separated values tokenizer internal variable. */ private static final StrTokenizer TSV_TOKENIZER_PROTOTYPE; static { CSV_TOKENIZER_PROTOTYPE = new StrTokenizer(); CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.commaMatcher()); CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher()); CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher()); CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher()); CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); TSV_TOKENIZER_PROTOTYPE = new StrTokenizer(); TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.tabMatcher()); TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher()); TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher()); TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher()); TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); } /** * Returns a clone of {@code CSV_TOKENIZER_PROTOTYPE}. * * @return a clone of {@code CSV_TOKENIZER_PROTOTYPE}. */ private static StrTokenizer getCSVClone() { return (StrTokenizer) CSV_TOKENIZER_PROTOTYPE.clone(); } /** * Gets a new tokenizer instance which parses Comma Separated Value strings * initializing it with the given input. The default for CSV processing * will be trim whitespace from both ends (which can be overridden with * the setTrimmer method). *

* You must call a "reset" method to set the string which you want to parse. *

* @return a new tokenizer instance which parses Comma Separated Value strings */ public static StrTokenizer getCSVInstance() { return getCSVClone(); } /** * Gets a new tokenizer instance which parses Comma Separated Value strings * initializing it with the given input. The default for CSV processing * will be trim whitespace from both ends (which can be overridden with * the setTrimmer method). * * @param input the text to parse * @return a new tokenizer instance which parses Comma Separated Value strings */ public static StrTokenizer getCSVInstance(final char[] input) { final StrTokenizer tok = getCSVClone(); tok.reset(input); return tok; } /** * Gets a new tokenizer instance which parses Comma Separated Value strings * initializing it with the given input. The default for CSV processing * will be trim whitespace from both ends (which can be overridden with * the setTrimmer method). * * @param input the text to parse * @return a new tokenizer instance which parses Comma Separated Value strings */ public static StrTokenizer getCSVInstance(final String input) { final StrTokenizer tok = getCSVClone(); tok.reset(input); return tok; } /** * Returns a clone of {@code TSV_TOKENIZER_PROTOTYPE}. * * @return a clone of {@code TSV_TOKENIZER_PROTOTYPE}. */ private static StrTokenizer getTSVClone() { return (StrTokenizer) TSV_TOKENIZER_PROTOTYPE.clone(); } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. * The default for CSV processing will be trim whitespace from both ends * (which can be overridden with the setTrimmer method). *

* You must call a "reset" method to set the string which you want to parse. *

* @return a new tokenizer instance which parses Tab Separated Value strings. */ public static StrTokenizer getTSVInstance() { return getTSVClone(); } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. * The default for CSV processing will be trim whitespace from both ends * (which can be overridden with the setTrimmer method). * @param input the string to parse * @return a new tokenizer instance which parses Tab Separated Value strings. */ public static StrTokenizer getTSVInstance(final char[] input) { final StrTokenizer tok = getTSVClone(); tok.reset(input); return tok; } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. * The default for CSV processing will be trim whitespace from both ends * (which can be overridden with the setTrimmer method). * @param input the string to parse * @return a new tokenizer instance which parses Tab Separated Value strings. */ public static StrTokenizer getTSVInstance(final String input) { final StrTokenizer tok = getTSVClone(); tok.reset(input); return tok; } /** The text to work on. */ private char[] chars; /** The parsed tokens. */ private String[] tokens; /** The current iteration position. */ private int tokenPos; /** The delimiter matcher. */ private StrMatcher delimMatcher = StrMatcher.splitMatcher(); /** The quote matcher. */ private StrMatcher quoteMatcher = StrMatcher.noneMatcher(); /** The ignored matcher. */ private StrMatcher ignoredMatcher = StrMatcher.noneMatcher(); /** The trimmer matcher. */ private StrMatcher trimmerMatcher = StrMatcher.noneMatcher(); /** Whether to return empty tokens as null. */ private boolean emptyAsNull; /** Whether to ignore empty tokens. */ private boolean ignoreEmptyTokens = true; /** * Constructs a tokenizer splitting on space, tab, newline and form feed * as per StringTokenizer, but with no text to tokenize. *

* This constructor is normally used with {@link #reset(String)}. *

*/ public StrTokenizer() { this.chars = null; } /** * Constructs a tokenizer splitting on space, tab, newline and form feed * as per StringTokenizer. * * @param input the string which is to be parsed, not cloned */ public StrTokenizer(final char[] input) { if (input == null) { this.chars = null; } else { this.chars = input.clone(); } } /** * Constructs a tokenizer splitting on the specified character. * * @param input the string which is to be parsed, not cloned * @param delim the field delimiter character */ public StrTokenizer(final char[] input, final char delim) { this(input); setDelimiterChar(delim); } /** * Constructs a tokenizer splitting on the specified delimiter character * and handling quotes using the specified quote character. * * @param input the string which is to be parsed, not cloned * @param delim the field delimiter character * @param quote the field quoted string character */ public StrTokenizer(final char[] input, final char delim, final char quote) { this(input, delim); setQuoteChar(quote); } /** * Constructs a tokenizer splitting on the specified string. * * @param input the string which is to be parsed, not cloned * @param delim the field delimiter string */ public StrTokenizer(final char[] input, final String delim) { this(input); setDelimiterString(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher. * * @param input the string which is to be parsed, not cloned * @param delim the field delimiter matcher */ public StrTokenizer(final char[] input, final StrMatcher delim) { this(input); setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher * and handling quotes using the specified quote matcher. * * @param input the string which is to be parsed, not cloned * @param delim the field delimiter character * @param quote the field quoted string character */ public StrTokenizer(final char[] input, final StrMatcher delim, final StrMatcher quote) { this(input, delim); setQuoteMatcher(quote); } /** * Constructs a tokenizer splitting on space, tab, newline and form feed * as per StringTokenizer. * * @param input the string which is to be parsed */ public StrTokenizer(final String input) { if (input != null) { chars = input.toCharArray(); } else { chars = null; } } /** * Constructs a tokenizer splitting on the specified delimiter character. * * @param input the string which is to be parsed * @param delim the field delimiter character */ public StrTokenizer(final String input, final char delim) { this(input); setDelimiterChar(delim); } /** * Constructs a tokenizer splitting on the specified delimiter character * and handling quotes using the specified quote character. * * @param input the string which is to be parsed * @param delim the field delimiter character * @param quote the field quoted string character */ public StrTokenizer(final String input, final char delim, final char quote) { this(input, delim); setQuoteChar(quote); } /** * Constructs a tokenizer splitting on the specified delimiter string. * * @param input the string which is to be parsed * @param delim the field delimiter string */ public StrTokenizer(final String input, final String delim) { this(input); setDelimiterString(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher. * * @param input the string which is to be parsed * @param delim the field delimiter matcher */ public StrTokenizer(final String input, final StrMatcher delim) { this(input); setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher * and handling quotes using the specified quote matcher. * * @param input the string which is to be parsed * @param delim the field delimiter matcher * @param quote the field quoted string matcher */ public StrTokenizer(final String input, final StrMatcher delim, final StrMatcher quote) { this(input, delim); setQuoteMatcher(quote); } /** * Unsupported ListIterator operation. * @param obj this parameter ignored. * @throws UnsupportedOperationException always */ @Override public void add(final String obj) { throw new UnsupportedOperationException("add() is unsupported"); } /** * Adds a token to a list, paying attention to the parameters we've set. * * @param list the list to add to * @param tok the token to add */ private void addToken(final List list, String tok) { if (tok == null || tok.isEmpty()) { if (isIgnoreEmptyTokens()) { return; } if (isEmptyTokenAsNull()) { tok = null; } } list.add(tok); } /** * Checks if tokenization has been done, and if not then do it. */ private void checkTokenized() { if (tokens == null) { if (chars == null) { // still call tokenize as subclass may do some work final List split = tokenize(null, 0, 0); tokens = split.toArray(ArrayUtils.EMPTY_STRING_ARRAY); } else { final List split = tokenize(chars, 0, chars.length); tokens = split.toArray(ArrayUtils.EMPTY_STRING_ARRAY); } } } /** * Creates a new instance of this Tokenizer. The new instance is reset so * that it will be at the start of the token list. * If a {@link CloneNotSupportedException} is caught, return {@code null}. * * @return a new instance of this Tokenizer which has been reset. */ @Override public Object clone() { try { return cloneReset(); } catch (final CloneNotSupportedException ex) { return null; } } /** * Creates a new instance of this Tokenizer. The new instance is reset so that * it will be at the start of the token list. * * @return a new instance of this Tokenizer which has been reset. * @throws CloneNotSupportedException if there is a problem cloning */ Object cloneReset() throws CloneNotSupportedException { // this method exists to enable 100% test coverage final StrTokenizer cloned = (StrTokenizer) super.clone(); if (cloned.chars != null) { cloned.chars = cloned.chars.clone(); } cloned.reset(); return cloned; } /** * Gets the String content that the tokenizer is parsing. * * @return The string content being parsed */ public String getContent() { if (chars == null) { return null; } return new String(chars); } /** * Gets the field delimiter matcher. * * @return The delimiter matcher in use */ public StrMatcher getDelimiterMatcher() { return this.delimMatcher; } /** * Gets the ignored character matcher. *

* These characters are ignored when parsing the String, unless they are * within a quoted region. * The default value is not to ignore anything. *

* * @return The ignored matcher in use */ public StrMatcher getIgnoredMatcher() { return ignoredMatcher; } /** * Gets the quote matcher currently in use. *

* The quote character is used to wrap data between the tokens. * This enables delimiters to be entered as data. * The default value is '"' (double quote). *

* * @return The quote matcher in use */ public StrMatcher getQuoteMatcher() { return quoteMatcher; } /** * Gets a copy of the full token list as an independent modifiable array. * * @return The tokens as a String array */ public String[] getTokenArray() { checkTokenized(); return tokens.clone(); } /** * Gets a copy of the full token list as an independent modifiable list. * * @return The tokens as a String array */ public List getTokenList() { checkTokenized(); final List list = new ArrayList<>(tokens.length); Collections.addAll(list, tokens); return list; } /** * Gets the trimmer character matcher. *

* These characters are trimmed off on each side of the delimiter * until the token or quote is found. * The default value is not to trim anything. *

* * @return The trimmer matcher in use */ public StrMatcher getTrimmerMatcher() { return trimmerMatcher; } /** * Checks whether there are any more tokens. * * @return true if there are more tokens */ @Override public boolean hasNext() { checkTokenized(); return tokenPos < tokens.length; } /** * Checks whether there are any previous tokens that can be iterated to. * * @return true if there are previous tokens */ @Override public boolean hasPrevious() { checkTokenized(); return tokenPos > 0; } /** * Gets whether the tokenizer currently returns empty tokens as null. * The default for this property is false. * * @return true if empty tokens are returned as null */ public boolean isEmptyTokenAsNull() { return this.emptyAsNull; } /** * Gets whether the tokenizer currently ignores empty tokens. * The default for this property is true. * * @return true if empty tokens are not returned */ public boolean isIgnoreEmptyTokens() { return ignoreEmptyTokens; } /** * Checks if the characters at the index specified match the quote * already matched in readNextToken(). * * @param srcChars the character array being tokenized * @param pos the position to check for a quote * @param len the length of the character array being tokenized * @param quoteStart the start position of the matched quote, 0 if no quoting * @param quoteLen the length of the matched quote, 0 if no quoting * @return true if a quote is matched */ private boolean isQuote(final char[] srcChars, final int pos, final int len, final int quoteStart, final int quoteLen) { for (int i = 0; i < quoteLen; i++) { if (pos + i >= len || srcChars[pos + i] != srcChars[quoteStart + i]) { return false; } } return true; } /** * Gets the next token. * * @return The next String token * @throws NoSuchElementException if there are no more elements */ @Override public String next() { if (hasNext()) { return tokens[tokenPos++]; } throw new NoSuchElementException(); } /** * Gets the index of the next token to return. * * @return The next token index */ @Override public int nextIndex() { return tokenPos; } /** * Gets the next token from the String. * Equivalent to {@link #next()} except it returns null rather than * throwing {@link NoSuchElementException} when no tokens remain. * * @return The next sequential token, or null when no more tokens are found */ public String nextToken() { if (hasNext()) { return tokens[tokenPos++]; } return null; } /** * Gets the token previous to the last returned token. * * @return The previous token */ @Override public String previous() { if (hasPrevious()) { return tokens[--tokenPos]; } throw new NoSuchElementException(); } /** * Gets the index of the previous token. * * @return The previous token index */ @Override public int previousIndex() { return tokenPos - 1; } /** * Gets the previous token from the String. * * @return The previous sequential token, or null when no more tokens are found */ public String previousToken() { if (hasPrevious()) { return tokens[--tokenPos]; } return null; } /** * Reads character by character through the String to get the next token. * * @param srcChars the character array being tokenized * @param start the first character of field * @param len the length of the character array being tokenized * @param workArea a temporary work area * @param tokenList the list of parsed tokens * @return The starting position of the next field (the character * immediately after the delimiter), or -1 if end of string found */ private int readNextToken(final char[] srcChars, int start, final int len, final StrBuilder workArea, final List tokenList) { // skip all leading whitespace, unless it is the // field delimiter or the quote character while (start < len) { final int removeLen = Math.max( getIgnoredMatcher().isMatch(srcChars, start, start, len), getTrimmerMatcher().isMatch(srcChars, start, start, len)); if (removeLen == 0 || getDelimiterMatcher().isMatch(srcChars, start, start, len) > 0 || getQuoteMatcher().isMatch(srcChars, start, start, len) > 0) { break; } start += removeLen; } // handle reaching end if (start >= len) { addToken(tokenList, StringUtils.EMPTY); return -1; } // handle empty token final int delimLen = getDelimiterMatcher().isMatch(srcChars, start, start, len); if (delimLen > 0) { addToken(tokenList, StringUtils.EMPTY); return start + delimLen; } // handle found token final int quoteLen = getQuoteMatcher().isMatch(srcChars, start, start, len); if (quoteLen > 0) { return readWithQuotes(srcChars, start + quoteLen, len, workArea, tokenList, start, quoteLen); } return readWithQuotes(srcChars, start, len, workArea, tokenList, 0, 0); } /** * Reads a possibly quoted string token. * * @param srcChars the character array being tokenized * @param start the first character of field * @param len the length of the character array being tokenized * @param workArea a temporary work area * @param tokenList the list of parsed tokens * @param quoteStart the start position of the matched quote, 0 if no quoting * @param quoteLen the length of the matched quote, 0 if no quoting * @return The starting position of the next field (the character * immediately after the delimiter, or if end of string found, * then the length of string */ private int readWithQuotes(final char[] srcChars, final int start, final int len, final StrBuilder workArea, final List tokenList, final int quoteStart, final int quoteLen) { // Loop until we've found the end of the quoted // string or the end of the input workArea.clear(); int pos = start; boolean quoting = quoteLen > 0; int trimStart = 0; while (pos < len) { // quoting mode can occur several times throughout a string // we must switch between quoting and non-quoting until we // encounter a non-quoted delimiter, or end of string if (quoting) { // In quoting mode // If we've found a quote character, see if it's // followed by a second quote. If so, then we need // to actually put the quote character into the token // rather than end the token. if (isQuote(srcChars, pos, len, quoteStart, quoteLen)) { if (isQuote(srcChars, pos + quoteLen, len, quoteStart, quoteLen)) { // matched pair of quotes, thus an escaped quote workArea.append(srcChars, pos, quoteLen); pos += quoteLen * 2; trimStart = workArea.size(); continue; } // end of quoting quoting = false; pos += quoteLen; continue; } } else { // Not in quoting mode // check for delimiter, and thus end of token final int delimLen = getDelimiterMatcher().isMatch(srcChars, pos, start, len); if (delimLen > 0) { // return condition when end of token found addToken(tokenList, workArea.substring(0, trimStart)); return pos + delimLen; } // check for quote, and thus back into quoting mode if (quoteLen > 0 && isQuote(srcChars, pos, len, quoteStart, quoteLen)) { quoting = true; pos += quoteLen; continue; } // check for ignored (outside quotes), and ignore final int ignoredLen = getIgnoredMatcher().isMatch(srcChars, pos, start, len); if (ignoredLen > 0) { pos += ignoredLen; continue; } // check for trimmed character // don't yet know if its at the end, so copy to workArea // use trimStart to keep track of trim at the end final int trimmedLen = getTrimmerMatcher().isMatch(srcChars, pos, start, len); if (trimmedLen > 0) { workArea.append(srcChars, pos, trimmedLen); pos += trimmedLen; continue; } } // copy regular character from inside quotes workArea.append(srcChars[pos++]); trimStart = workArea.size(); } // return condition when end of string found addToken(tokenList, workArea.substring(0, trimStart)); return -1; } /** * Unsupported ListIterator operation. * * @throws UnsupportedOperationException always */ @Override public void remove() { throw new UnsupportedOperationException("remove() is unsupported"); } /** * Resets this tokenizer, forgetting all parsing and iteration already completed. *

* This method allows the same tokenizer to be reused for the same String. * * @return this, to enable chaining */ public StrTokenizer reset() { tokenPos = 0; tokens = null; return this; } /** * Reset this tokenizer, giving it a new input string to parse. * In this manner you can re-use a tokenizer with the same settings * on multiple input lines. * * @param input the new character array to tokenize, not cloned, null sets no text to parse * @return this, to enable chaining */ public StrTokenizer reset(final char[] input) { reset(); if (input != null) { this.chars = input.clone(); } else { this.chars = null; } return this; } /** * Reset this tokenizer, giving it a new input string to parse. * In this manner you can re-use a tokenizer with the same settings * on multiple input lines. * * @param input the new string to tokenize, null sets no text to parse * @return this, to enable chaining */ public StrTokenizer reset(final String input) { reset(); if (input != null) { this.chars = input.toCharArray(); } else { this.chars = null; } return this; } /** * Unsupported ListIterator operation. * @param obj this parameter ignored. * @throws UnsupportedOperationException always */ @Override public void set(final String obj) { throw new UnsupportedOperationException("set() is unsupported"); } /** * Sets the field delimiter character. * * @param delim the delimiter character to use * @return this, to enable chaining */ public StrTokenizer setDelimiterChar(final char delim) { return setDelimiterMatcher(StrMatcher.charMatcher(delim)); } /** * Sets the field delimiter matcher. *

* The delimiter is used to separate one token from another. *

* * @param delim the delimiter matcher to use * @return this, to enable chaining */ public StrTokenizer setDelimiterMatcher(final StrMatcher delim) { if (delim == null) { this.delimMatcher = StrMatcher.noneMatcher(); } else { this.delimMatcher = delim; } return this; } /** * Sets the field delimiter string. * * @param delim the delimiter string to use * @return this, to enable chaining */ public StrTokenizer setDelimiterString(final String delim) { return setDelimiterMatcher(StrMatcher.stringMatcher(delim)); } /** * Sets whether the tokenizer should return empty tokens as null. * The default for this property is false. * * @param emptyAsNull whether empty tokens are returned as null * @return this, to enable chaining */ public StrTokenizer setEmptyTokenAsNull(final boolean emptyAsNull) { this.emptyAsNull = emptyAsNull; return this; } /** * Set the character to ignore. *

* This character is ignored when parsing the String, unless it is * within a quoted region. *

* * @param ignored the ignored character to use * @return this, to enable chaining */ public StrTokenizer setIgnoredChar(final char ignored) { return setIgnoredMatcher(StrMatcher.charMatcher(ignored)); } /** * Set the matcher for characters to ignore. *

* These characters are ignored when parsing the String, unless they are * within a quoted region. *

* * @param ignored the ignored matcher to use, null ignored * @return this, to enable chaining */ public StrTokenizer setIgnoredMatcher(final StrMatcher ignored) { if (ignored != null) { this.ignoredMatcher = ignored; } return this; } /** * Sets whether the tokenizer should ignore and not return empty tokens. * The default for this property is true. * * @param ignoreEmptyTokens whether empty tokens are not returned * @return this, to enable chaining */ public StrTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) { this.ignoreEmptyTokens = ignoreEmptyTokens; return this; } /** * Sets the quote character to use. *

* The quote character is used to wrap data between the tokens. * This enables delimiters to be entered as data. *

* * @param quote the quote character to use * @return this, to enable chaining */ public StrTokenizer setQuoteChar(final char quote) { return setQuoteMatcher(StrMatcher.charMatcher(quote)); } /** * Set the quote matcher to use. *

* The quote character is used to wrap data between the tokens. * This enables delimiters to be entered as data. *

* * @param quote the quote matcher to use, null ignored * @return this, to enable chaining */ public StrTokenizer setQuoteMatcher(final StrMatcher quote) { if (quote != null) { this.quoteMatcher = quote; } return this; } /** * Sets the matcher for characters to trim. *

* These characters are trimmed off on each side of the delimiter * until the token or quote is found. *

* * @param trimmer the trimmer matcher to use, null ignored * @return this, to enable chaining */ public StrTokenizer setTrimmerMatcher(final StrMatcher trimmer) { if (trimmer != null) { this.trimmerMatcher = trimmer; } return this; } /** * Gets the number of tokens found in the String. * * @return The number of matched tokens */ public int size() { checkTokenized(); return tokens.length; } /** * Internal method to performs the tokenization. *

* Most users of this class do not need to call this method. This method * will be called automatically by other (public) methods when required. *

*

* This method exists to allow subclasses to add code before or after the * tokenization. For example, a subclass could alter the character array, * offset or count to be parsed, or call the tokenizer multiple times on * multiple strings. It is also be possible to filter the results. *

*

* {@code StrTokenizer} will always pass a zero offset and a count * equal to the length of the array to this method, however a subclass * may pass other values, or even an entirely different array. *

* * @param srcChars the character array being tokenized, may be null * @param offset the start position within the character array, must be valid * @param count the number of characters to tokenize, must be valid * @return The modifiable list of String tokens, unmodifiable if null array or zero count */ protected List tokenize(final char[] srcChars, final int offset, final int count) { if (srcChars == null || count == 0) { return Collections.emptyList(); } final StrBuilder buf = new StrBuilder(); final List tokenList = new ArrayList<>(); int pos = offset; // loop around the entire buffer while (pos >= 0 && pos < count) { // find next token pos = readNextToken(srcChars, pos, count, buf, tokenList); // handle case where end of string is a delimiter if (pos >= count) { addToken(tokenList, StringUtils.EMPTY); } } return tokenList; } /** * Gets the String content that the tokenizer is parsing. * * @return The string content being parsed */ @Override public String toString() { if (tokens == null) { return "StrTokenizer[not tokenized yet]"; } return "StrTokenizer" + getTokenList(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy