net.sf.jett.parser.MetadataScanner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jett-core Show documentation
Show all versions of jett-core Show documentation
JETT is a Java API that reads an Excel spreadsheet as a template, takes your data, and
creates a new Excel spreadsheet that contains your data, formatted as in the template. It
works with .xls and .xlsx template spreadsheets.
package net.sf.jett.parser;
/**
* A MetadataScanner
object scans metadata text and returns tokens.
*
* @author Randy Gettman
*/
public class MetadataScanner
{
/**
* Enumeration for the different types of Tokens in Metadata.
*/
public static enum Token
{
TOKEN_ERROR_EOI_IN_SQUOTES(-4),
TOKEN_ERROR_EOI_IN_DQUOTES(-3),
TOKEN_ERROR_BUF_NULL(-2),
TOKEN_UNKNOWN(-1),
TOKEN_WHITESPACE(0),
TOKEN_STRING(1),
TOKEN_SINGLE_QUOTE(11),
TOKEN_DOUBLE_QUOTE(12),
TOKEN_SEMICOLON(13),
TOKEN_EQUALS(14),
TOKEN_EOI(99);
private int myCode;
// Create a token with a code.
private Token(int code)
{
myCode = code;
}
/**
* Returns the unique code associated with this Token
.
* @return The unique code.
*/
public int getCode()
{
return myCode;
}
}
//private static final String PUNCT_CHARS_NOT_AS_STRING = "\"';=";
private static final String PUNCT_CHARS_NOT_AS_STRING = "\";=";
//private static final String PUNCT_CHARS_NOT_AS_STRING = ";=";
private String myMetadataText;
private int myOffset;
private boolean amIInsideSingleQuotes;
private boolean amIInsideDoubleQuotes;
private String myCurrLexeme;
/**
* Construct a MetadataScanner
object, with empty input.
*/
public MetadataScanner()
{
this("");
}
/**
* Construct a MetadataScanner
object, with the given input.
* @param metadataText The metadata text to scan.
*/
public MetadataScanner(String metadataText)
{
setMetadataText(metadataText);
}
/**
* Returns the Token
. After this call completes, the current
* lexeme is available via a call to getCurrLexeme
.
* Starts looking at the current offset, and once the token is found, then
* the offset is advanced to the start of the next token.
* @return A Token
.
* @see #getCurrLexeme
*/
public Token getNextToken()
{
int iStartOfToken = myOffset;
int iTokenLength = 0;
Token tokenType = Token.TOKEN_UNKNOWN;
// Inside single-quotes, the whole thing until EOI or another single-quote
// is one string!
if (amIInsideSingleQuotes)
{
if (iStartOfToken >= myMetadataText.length())
{
// EOI while in single quotes -- error!
iTokenLength = 0;
tokenType = Token.TOKEN_ERROR_EOI_IN_SQUOTES;
}
else if (myMetadataText.charAt(iStartOfToken) == '\'')
{
iTokenLength = 1;
tokenType = Token.TOKEN_SINGLE_QUOTE;
amIInsideSingleQuotes = false;
}
else
{
while ((iStartOfToken + iTokenLength) < myMetadataText.length() &&
myMetadataText.charAt(iStartOfToken + iTokenLength) != '\'')
iTokenLength++;
tokenType = Token.TOKEN_STRING;
}
}
else if (amIInsideDoubleQuotes)
{
if (iStartOfToken >= myMetadataText.length())
{
// EOI while in double quotes -- error!
iTokenLength = 0;
tokenType = Token.TOKEN_ERROR_EOI_IN_DQUOTES;
}
else if (myMetadataText.charAt(iStartOfToken) == '"')
{
iTokenLength = 1;
tokenType = Token.TOKEN_DOUBLE_QUOTE;
amIInsideDoubleQuotes = false;
}
else
{
while ((iStartOfToken + iTokenLength) < myMetadataText.length() &&
myMetadataText.charAt(iStartOfToken + iTokenLength) != '"')
iTokenLength++;
tokenType = Token.TOKEN_STRING;
}
}
else
{
// EOI test.
if (iStartOfToken >= myMetadataText.length())
{
// End of input string.
iTokenLength = 0;
tokenType = Token.TOKEN_EOI;
}
// First char starts a string consisting of letters, numbers, and
// all but a few punctuation characters.
else if ((iStartOfToken + iTokenLength) < myMetadataText.length() &&
//!Character.isWhitespace(myMetadataText.charAt(iStartOfToken + iTokenLength)) &&
PUNCT_CHARS_NOT_AS_STRING.indexOf(myMetadataText.charAt(iStartOfToken + iTokenLength)) == -1)
{
// String mode.
while ((iStartOfToken + iTokenLength) < myMetadataText.length() &&
//!Character.isWhitespace(myMetadataText.charAt(iStartOfToken + iTokenLength)) &&
PUNCT_CHARS_NOT_AS_STRING.indexOf(myMetadataText.charAt(iStartOfToken + iTokenLength)) == -1)
{
iTokenLength++;
}
tokenType = Token.TOKEN_STRING;
}
else if (myMetadataText.charAt(iStartOfToken) == ';')
{
// Semicolon.
iTokenLength = 1;
tokenType = Token.TOKEN_SEMICOLON;
}
else if (myMetadataText.charAt(iStartOfToken) == '=')
{
// Equals.
iTokenLength = 1;
tokenType = Token.TOKEN_EQUALS;
}
// else if (myMetadataText.charAt(iStartOfToken) == '\'')
// {
// // Single Quote.
// iTokenLength = 1;
// tokenType = Token.TOKEN_SINGLE_QUOTE;
// amIInsideSingleQuotes = true;
// }
else if (myMetadataText.charAt(iStartOfToken) == '"')
{
// Double Quote.
iTokenLength = 1;
tokenType = Token.TOKEN_DOUBLE_QUOTE;
amIInsideDoubleQuotes = true;
}
else if (Character.isWhitespace(myMetadataText.charAt(iStartOfToken)))
{
// Whitespace.
while ((iStartOfToken + iTokenLength) < myMetadataText.length() &&
Character.isWhitespace(myMetadataText.charAt(iStartOfToken + iTokenLength)))
iTokenLength++;
tokenType = Token.TOKEN_WHITESPACE;
}
} // End else from if (amIInsideDoubleQuotes)
// Note down lexeme for access later.
myCurrLexeme = myMetadataText.substring(iStartOfToken, iStartOfToken + iTokenLength);
// Update the offset.
myOffset += iTokenLength;
return tokenType;
}
/**
* Returns the current lexeme after a call to getNextToken
.
* @return The current lexeme, or null
if
* getNextToken
hasn't been called yet after a reset.
* @see #getNextToken
* @see #reset
*/
public String getCurrLexeme()
{
return myCurrLexeme;
}
/**
* Resets the scanner to the beginning of the metadata text.
*/
public void reset()
{
myOffset = 0;
amIInsideDoubleQuotes = false;
amIInsideSingleQuotes = false;
myCurrLexeme = null;
}
/**
* Give the MetadataScanner
another metadata text to scan.
* Resets to the beginning of the string.
* @param metadataText The metadata text to scan.
*/
public void setMetadataText(String metadataText)
{
myMetadataText = metadataText;
reset();
}
}