
uk.org.retep.util.string.Tokenizer Maven / Gradle / Ivy
/*
* Copyright (c) 1998-2010, Peter T Mount
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the retep.org.uk nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package uk.org.retep.util.string;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import javax.annotation.concurrent.NotThreadSafe;
/**
* This class provides a better equivalent to java.util.StringTokenizer,
* except it firstly returns tokens of 0 length, and provides both
* Enumeration & Iterator's for the tokens
*
* Unlike StringTokenizer, which scans in realtime, this class tokenizes
* the String immediately.
*
* @author Peter T Mount
* @version 1.0
*/
@NotThreadSafe
public class Tokenizer
{
/**
* The default delimiters
*/
public static final String DELIMITERS = " \t\n\r\f";
private String string;
private String delimiters;
private boolean returnDelimiters;
private List tokens;
private List positions;
// The initial position for the Iterator & Enumerator interfaces
private int pos;
/**
* Tokenize the string using the default delimiters
*
* @param aString the String to tokenize
*/
public Tokenizer(String aString)
{
this(aString,DELIMITERS,false);
}
/**
* Tokenize the string using the supplied delimiters
*
* @param aString the String to tokenize
* @param aDelimiter the required delimiters
*/
public Tokenizer(String aString,String aDelimiter)
{
this(aString,aDelimiter,false);
}
/**
* Tokenize the string using the supplied delimiters
*
* @param aString the String to tokenize
* @param retDelims if true, the delimiters are also returned as tokens
*/
public Tokenizer(String aString,boolean retDelims)
{
this(aString,DELIMITERS,retDelims);
}
/**
* Tokenize the string using the supplied delimiters
*
* @param aString the String to tokenize
* @param aDelimiter the required delimiters
* @param retDelims if true, the delimiters are also returned as tokens
*/
public Tokenizer(String aString,String aDelimiter,boolean retDelims)
{
tokens = new ArrayList();
positions = new ArrayList();
tokenize(aString,aDelimiter,retDelims);
}
/**
* @return the String that has been Tokenized
*/
public String getString()
{
return string;
}
/**
* @return the delimiters used to tokenize the string
*/
public String getDelimiters()
{
return delimiters;
}
/**
* @return true if delimiters are being included in returned tokens
*/
public boolean isReturnDelimiters()
{
return returnDelimiters;
}
/**
* @return the number of tokens
*/
public int countTokens()
{
return tokens.size();
}
/**
* Reuse this instance on a new String using the same set of delimiters
*
* @param aString the String to tokenize
*/
public void tokenize(String aString)
{
tokenize(aString,getDelimiters(),isReturnDelimiters());
}
/**
* Reuse this instance on a new String and a new set of Delimiters
*
* @param aString the String to tokenize
* @param aDelimiter the required delimiters
*/
public void tokenize(String aString,String aDelimiter)
{
tokenize(aString,aDelimiter,isReturnDelimiters());
}
/**
* Reuse this instance on a new String and a new set of Delimiters
*
* @param aString the String to tokenize
* @param aDelimiter the required delimiters
* @param retDelims if true, the delimiters are also returned as tokens
*/
public void tokenize(String aString,String aDelimiter,boolean retDelims)
{
string= aString;
delimiters= aDelimiter;
returnDelimiters = retDelims;
// Reset the Iterator & Enumerator
reset();
// Clear any existing tokens
tokens.clear();
positions.clear();
int s=0, p=0;
while(p>-1)
{
// Add the current token
positions.add(s);
// Now test the current token, and if it's not a delimiter then add it to the
// tokens array.
//
// The (s+1) test handles if the string terminates with a delimiter
//
if((s+1)-1)
{
// Add the delimiter, but not if it's a null token, otherwise we will
// get a duplicate entry
if(p>s)
{
positions.add(p);
}
// Now if we want delimiters included, then add it to the tokens here
if(returnDelimiters)
{
tokens.add(positions.size()-1);
}
// move the start to the next position after the delimiter
s=p+1;
}
}
}
/**
* Resets the Iterator & Enumeration interfaces
*/
public void reset()
{
pos=0;
}
/**
*
* @return true if there are more tokens available
*
*/
public boolean hasMoreTokens()
{
// do not use countTokens() here
return pos iterator()
{
return new Iterator()
{
public boolean hasNext()
{
return hasMoreTokens();
}
public String next()
{
return nextToken();
}
public void remove()
{
throw new java.lang.UnsupportedOperationException("Method remove() not supported");
}
};
}
/**
*
* @return Enumeration of the Tokens. Each element is returned as an
*
* Integer object.
*
*/
public Enumeration enumerator()
{
return new Enumeration()
{
public boolean hasMoreElements()
{
return hasMoreTokens();
}
public String nextElement()
{
return nextToken();
}
};
}
}
/*
* $Log: Tokenizer.java,v $
* Revision 1.6 2007/01/06 16:56:25 peter
* Fixed copyright to account for 2007
*
* Revision 1.5 2006/09/10 22:26:44 peter
* Code cleanup and some changes to new package layout
*
* Revision 1.1 2006/09/10 20:59:24 peter
* Code cleanup and importing some code from original retepTools project
*
* Revision 1.4 2003/05/27 15:27:58 petermount
* Cleaned up code (indenting, imports, PMD compliance)
*
*/