Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package net.sf.jett.parser;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CreationHelper;
import org.apache.poi.ss.usermodel.RichTextString;
import net.sf.jett.exception.TagParseException;
import net.sf.jett.util.RichTextStringUtil;
import net.sf.jett.util.SheetUtil;
/**
* A TagParser parses one JETT XML tag, either a begin tag or an
* end tag, including the tag namespace (if any), the tag name, and any
* attributes.
*
* @author Randy Gettman
*/
public class TagParser
{
/**
* Determines the beginning of an XML start tag.
*/
public static final String BEGIN_START_TAG = "<";
/**
* Determines the beginning of an XML end tag.
*/
public static final String BEGIN_END_TAG = "";
/**
* Determines the ending of an XML start tag that is bodiless.
*/
public static final String END_BODILESS_TAG = "/>";
private Cell myCell;
private String myCellText;
private RichTextString myCellRichTextString;
private int myStartIdx;
private String myNamespace;
private String myTagName;
private boolean amIATag;
private boolean amIEndTag;
private boolean amIBodiless;
private Map myAttributes = new HashMap();
private int myTagStartIdx;
private int myTagEndIdx;
/**
* Create a TagParser object that will parse the given tag text.
* @param cell The Cell that contains text of the tag.
*/
public TagParser(Cell cell)
{
this(cell, 0);
}
/**
* Create a TagParser object that will parse the given tag
* text, starting at the given position in the string..
* @param cell The Cell that contains text of the tag.
* @param startIdx The 0-based index into the string.
*/
public TagParser(Cell cell, int startIdx)
{
myCell = cell;
setCellText(cell.getStringCellValue().substring(startIdx));
myStartIdx = startIdx;
myCellRichTextString = cell.getRichStringCellValue();
}
/**
* Sets the tag text to the given tag text and resets the parser.
* @param tagText The new tag text.
*/
public void setCellText(String tagText)
{
myCellText = tagText;
reset();
}
/**
* Resets this TagParser, usually at creation time and when new
* input arrives.
*/
private void reset()
{
myNamespace = null;
myTagName = null;
amIATag = false;
amIEndTag = false;
amIBodiless = false;
myAttributes.clear();
myTagStartIdx = -1;
myTagEndIdx = -1;
}
/**
* Parses the tag text.
*/
public void parse()
{
TagScanner scanner = new TagScanner(myCellText);
// Tags must begin with "<" or "", "<\""
// But "<:" is a bad tag, with no namespace.
if (token != TagScanner.Token.TOKEN_STRING && token != TagScanner.Token.TOKEN_COLON)
{
myTagStartIdx = -1;
amIATag = false;
return;
}
if (token == TagScanner.Token.TOKEN_STRING)
{
String lexeme = scanner.getCurrLexeme();
token = scanner.getNextToken();
if (token == TagScanner.Token.TOKEN_COLON)
{
token = scanner.getNextToken();
if (token == TagScanner.Token.TOKEN_STRING)
{
// namespace:tagName
myNamespace = lexeme;
myTagName = scanner.getCurrLexeme();
token = scanner.getNextToken();
}
else
{
throw new TagParseException("Cannot find tag name in tag text: " + myCellText + SheetUtil.getCellLocation(myCell));
}
}
else
{
// tagName
myNamespace = "";
myTagName = lexeme;
}
}
else if (token == TagScanner.Token.TOKEN_COLON)
{
throw new TagParseException("Cannot find namespace in tag text: " + myCellText + SheetUtil.getCellLocation(myCell));
}
// Parse any attribute name/value pairs: attrName="value".
String attrName = null;
boolean insideDoubleQuotes = false;
while (token.getCode() >= 0 && token != TagScanner.Token.TOKEN_END_ANGLE_BRACKET &&
token != TagScanner.Token.TOKEN_SLASH_END_ANGLE_BRACKET)
{
switch(token)
{
case TOKEN_WHITESPACE:
// Ignore.
break;
case TOKEN_STRING:
if (insideDoubleQuotes)
{
// Add newly complete attribute name/value pair.
if (attrName == null)
throw new TagParseException("Value found without attribute name: " + myCellText + SheetUtil.getCellLocation(myCell));
// Store the RichTextString attribute value.
int pos = myStartIdx + scanner.getNextPosition();
CreationHelper helper = myCell.getSheet().getWorkbook().getCreationHelper();
RichTextString attrValue = RichTextStringUtil.substring(myCellRichTextString,
helper, pos - scanner.getCurrLexeme().length(), pos);
// Replace _all_ tabs, carriage returns, linefeeds with spaces.
attrValue = RichTextStringUtil.replaceValues(attrValue, helper,
Arrays.asList("\n", "\r", "\t"),
Arrays.asList(" " , " " , " " ),
true);
// Perform escape-sequence replacement.
attrValue = RichTextStringUtil.performEscaping(attrValue, helper);
myAttributes.put(attrName, attrValue);
attrName = null;
}
else
attrName = scanner.getCurrLexeme();
break;
case TOKEN_EQUALS:
if (attrName == null)
throw new TagParseException("Attribute name missing before \"=\": " + myCellText + SheetUtil.getCellLocation(myCell));
break;
case TOKEN_COLON:
throw new TagParseException("Colon not allowed in attribute name: " + myCellText + SheetUtil.getCellLocation(myCell));
case TOKEN_DOUBLE_QUOTE:
insideDoubleQuotes = !insideDoubleQuotes;
break;
case TOKEN_BEGIN_ANGLE_BRACKET:
case TOKEN_BEGIN_ANGLE_BRACKET_SLASH:
throw new TagParseException("Cannot start a tag within another tag: " + myCellText + SheetUtil.getCellLocation(myCell));
case TOKEN_EOI:
throw new TagParseException("Tags must start with \"" + BEGIN_START_TAG + "\" or \"" +
BEGIN_END_TAG + "\" and end with \"" + END_TAG + "\" or \"" + END_BODILESS_TAG +
"\": " + myCellText + " at " + SheetUtil.getCellLocation(myCell));
default:
throw new TagParseException("Parse error occurred: " + myCellText + SheetUtil.getCellLocation(myCell));
}
token = scanner.getNextToken();
}
// Found end angle bracket before attribute value found.
if (attrName != null)
throw new TagParseException("Found end of tag before attribute value: " + myCellText + SheetUtil.getCellLocation(myCell));
if (token.getCode() < 0)
throw new TagParseException("Found end of input while scanning attribute value: " + myCellText + SheetUtil.getCellLocation(myCell));
// If "/>", then the tag is bodiless, else (">") there is a body.
amIBodiless = (token == TagScanner.Token.TOKEN_SLASH_END_ANGLE_BRACKET);
// We have reached the end angle bracket. Bodiless tags cannot have tag
// text before or after the tag.
myTagEndIdx = scanner.getNextPosition();
}
/**
* Returns whether the given tag text is in fact a tag. That is, if the tag
* text starts with BEGIN_START_TAG or
* BEGIN_END_TAG and ends with END_TAG.
* @return true if the tag text represents a tag,
* false otherwise.
* @see #BEGIN_START_TAG
* @see #BEGIN_END_TAG
* @see #END_TAG
*/
public boolean isTag()
{
return amIATag;
}
/**
* Returns whether this tag is the end of the tag or not. That is, if the
* tag text starts with BEGIN_END_TAG.
* @return true if the tag text represents an end tag,
* false if the tag text represents a start tag.
* @see #BEGIN_START_TAG
* @see #BEGIN_END_TAG
*/
public boolean isEndTag()
{
return amIEndTag;
}
/**
* Returns whether this tag is bodiless. That is, if the
* tag text ends with END_BODILESS_TAG.
* @return true if the tag text represents an end tag,
* false if the tag text represents a start tag.
* @see #END_TAG
* @see #END_BODILESS_TAG
*/
public boolean isBodiless()
{
return amIBodiless;
}
/**
* Returns the namespace found, if any. That is, the text before the colon
* in the tag name. E.g. <namespace:tagname ...>
* @return The namespace, or null if missing.
*/
public String getNamespace()
{
return myNamespace;
}
/**
* Returns the tag name found, if any. That is, the text after the colon in
* the tag name, or the whole tag name if no colon is found. E.g.
* <namespace:tagname ...> or <tagname ...>.
* @return The tag name.
*/
public String getTagName()
{
return myTagName;
}
/**
* Returns a formatted string containing the namespace, followed by a colon
* (if the namespace exists), followed by the tag name, e.g.
* getNamespace() + ":" + getTagName().
* @return A formatted string.
*/
public String getNamespaceAndTagName()
{
if (myNamespace != null && myNamespace.length() > 0)
return myNamespace + ":" + myTagName;
else
return myTagName;
}
/**
* Returns a Map of attribute names mapped to attribute values,
* possibly empty.
* E.g.<namespace:tagname attr1="value1" attr2="value2">
* is returned as ["attr1"=>"value1", "attr2"=>"value2"].
* @return A Map of attribute names and attribute values.
*/
public Map getAttributes()
{
return myAttributes;
}
/**
* Returns the Cell whose tag text is being parsed.
* @return The Cell.
*/
public Cell getCell()
{
return myCell;
}
/**
* Returns the portion of the cell text that is the tag text.
* @return The portion of the cell text that is the tag text.
*/
public String getTagText()
{
if (myTagStartIdx != -1 && myTagEndIdx != -1)
return myCellText.substring(myTagStartIdx, myTagEndIdx);
return null;
}
/**
* Returns the 0-based index into the cell text that is after the tag.
* @return The 0-based index into the cell text that is after the tag.
*/
public int getAfterTagIdx()
{
return myTagEndIdx;
}
}