![JAR search and dependency download from the Maven repository](/logo.png)
com.adobe.xfa.text.markup.MarkupEngineIn Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
//----------------------------------------------------------------------
//
// ADOBE CONFIDENTIAL
// __________________
//
// Copyright 1995 - 2003 Adobe Systems Incorporated. All
// Rights Reserved.
//
// NOTICE: All information contained herein is, and remains
// the property of Adobe Systems Incorporated and its
// suppliers, if any. The intellectual and technical
// concepts contained herein are proprietary to Adobe Systems
// Incorporated and its suppliers and may be covered by U.S.
// and Foreign Patents, patents in process, and are protected
// by trade secret or copyright law. Dissemination of this
// information or reproduction of this material is strictly
// forbidden unless prior written permission is obtained from
// Adobe Systems Incorporated.
//
//----------------------------------------------------------------------
package com.adobe.xfa.text.markup;
import com.adobe.xfa.text.TextAttr;
import com.adobe.xfa.text.TextTab;
import com.adobe.xfa.ut.Storage;
import com.adobe.xfa.ut.UnitSpan;
/**
* Class MarkupEngineIn defines a general-purpose markup engine
* for handling RTF-like markup languages. Most of the actual parsing
* of the markup language and the sequencing of rich text generation
* calls to the base class (MarkupIn) occurs in this class. The
* derived class primarily supplies tables to drive the process.
*
*
* Unfortunately this model doesn't work well for XHTML markup. Even
* though the XHTML input markup engine extends this class, it does most
* of the processing itself.
*
*
* For more information, please see the extenral documentation.
*
*
* @exclude from published api -- Mike Tardif, May 2006.
*/
public abstract class MarkupEngineIn extends MarkupIn {
// Finite state machine states
protected final static int STATE_TEXT = 0; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command
protected final static int STATE_COMMAND = 1; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command
protected final static int STATE_COMMAND_UNKNOWN = 2; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command
protected final static int STATE_QUOTE = 3; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command
protected final static int DEFAULT_UNITS_PER_POINT = 20;
protected int meState;
// Data
protected final boolean mbBlockScoping; // nested command block support
protected final char mcBlockPrefix; // open scoping char
protected final char mcBlockSuffix; // close scoping char
protected final char mcCommandPrefix; // command marker
protected TextAttr moCurrentAttr;
protected final Storage moAttrList;
protected TextAttr moAmbientAttr;
protected int mnIndex; // current index into source
protected String mpStrSource; // source string
protected final static UnitSpan DEFAULT_FONT_SIZE = new UnitSpan (UnitSpan.POINTS_1K, 12000);
protected final static String DEFAULT_TYPEFACE_NAME = "Courier Std";
private final MarkupAttr mpMarkupAttr; // markup lang attributes
//private String moOutputText;
private int mnTextStart; // start of most recent text chunk
private int mePendingTab; // pending tab justification
public void translate () {
//final char LF = 0x0A; // Line feed
final char CR = 0x0D; // Carriage return
StringBuilder oStrCommand = new StringBuilder();
//boolean bFirstCommandInBlock = true;
for (; mnIndex < mpStrSource.length(); mnIndex++) {
// This is the finite state machine. It iterates once for each source
// character.
char c = mpStrSource.charAt (mnIndex); // TODO: C++ implementation doesn't use UniChar()
switch (meState) {
case STATE_TEXT:
if (c == mcCommandPrefix) { // Accumulating text
// A command prefix causes a state switch. Flush any accumulated
// text and restart the command string. Otherwise, simply rely on
// the loop counter.
if (mpStrSource.substring(mnIndex,mnIndex+2).equals (mpMarkupAttr.lookup (MarkupAttr.MARKUP_HEX_CHARACTER))) {
// Hex characters don't get nicely terminated by a delimeter
if (mnIndex > mnTextStart) {
// If there's any pending text, emit it first
flushText();
}
// Grab the two hex digits we're expecting,
// and process the command
oStrCommand.setLength (0);
oStrCommand.append(mpStrSource, mnIndex + 1, mnIndex + 4);
mnIndex += 3;
int eTag = mpMarkupAttr.extractTag (oStrCommand);
onCommand (eTag, oStrCommand.toString());
meState = STATE_TEXT;
oStrCommand.setLength (0); // Fresh start
restartTextCollection();
break;
}
flushText();
oStrCommand.setLength (0);
meState = STATE_COMMAND;
} else if (c == CR) {
// Toss only CR's (removed check for LF). It was losing line breaks when
// pasting or loading text (paragraph breaks were OK). This is still not
// correct for RTF, but we'll sort it out when we do a full RTF
// implementation.
flushText();
oStrCommand.setLength (0);
restartTextCollection();
} else if (mbBlockScoping) {
if (c == mcBlockPrefix) {
parseBlock();
oStrCommand.setLength (0);
break;
} else if (c == mcBlockSuffix) {
flushText();
restartTextCollection();
return; // Exit nested block (assumes we're recursing).
}
}
break;
// Accumulating a command: Check for end of command char and process if
// required. An opening quote puts us in the quote state (without
// accumulating). Otherwise, it's a raw command character.
case STATE_COMMAND:
if (mpMarkupAttr.isDelimiter (c)) {
if (oStrCommand.length() == 0) { // e.g., "\\" or "\."
String cc = "";
if (c == '~') {
// In RTF a tilde is a non-breaking space (Unicode 160)
// The jfString constructor expects UTF-8 so we use 0xC2A0.
cc += '\u00A0';
} else {
// We know this cast is safe, since rtf contains only
// 7 or 8 bit characters.
cc += c;
}
text (cc); // accumulate single character
restartTextCollection();
meState = STATE_TEXT;
}
else { // A command is sitting in oStrCommand
// Handle the markup command and empty the accumulated text. If the command
// is not handled, a transition is made to the UNKNOWN state (which discards
// all subsequent text until a block scoping character or new command is
// encountered). Otherwise, a transition is made back to accumulating text.
int eTag = mpMarkupAttr.extractTag (oStrCommand);
if (! onCommand (eTag, oStrCommand.toString())) {
restartTextCollection();
// At this point we used to go into
// STATE_COMMAND_UNKNOWN
// except when it was the first command in a block.
// This scenario failed when we encountered
// rtf that looked like:
// {\cgrid0 some text following...
meState = STATE_TEXT;
} else {
if (skipThisCommand (eTag)) {
mnIndex = skipBlock (mnIndex, mpStrSource);
restartTextCollection();
return;
}
meState = STATE_TEXT;
}
if (eTag == MarkupAttr.MARKUP_UNICODE_CHARACTER) {
// When we processed our UNICODE command, we
// skipped over some characters and should have
// a new delimeter
c = mpStrSource.charAt (mnIndex); // TODO: C++ implementation doesn't use UniChar()
}
oStrCommand.setLength (0); // Fresh start
restartTextCollection();
if (c == mcCommandPrefix) { // Maintain state new command
meState = STATE_COMMAND;
if (mpStrSource.substring(mnIndex,mnIndex+2).equals (mpMarkupAttr.lookup (MarkupAttr.MARKUP_HEX_CHARACTER))) {
// Hex characters don't get nicely terminated by a delimeter
if (mnIndex > mnTextStart) {
// If there's any pending text, emit it first
flushText();
}
// Grab the two hex digits we're expecting,
// and process the command
oStrCommand.setLength (0);
oStrCommand.append(mpStrSource, mnIndex + 1, mnIndex + 4);
mnIndex += 3;
eTag = mpMarkupAttr.extractTag (oStrCommand);
onCommand (eTag, oStrCommand.toString());
meState = STATE_TEXT;
oStrCommand.setLength (0); // Fresh start
restartTextCollection();
break;
}
} else if (mbBlockScoping) {
if (c == mcBlockPrefix) {
parseBlock();
} else if (c == mcBlockSuffix) {
// Make sure we flush any MBText that might be
// hanging around.
commitPending (true);
return; // Exit nested block
}
}
}
}
else if (c == '"') {
meState = STATE_QUOTE;
} else {
oStrCommand.append (c);
}
break;
// Unknown command: continue reading/discarding until a block scoping character
// or command prefix is hit.
case STATE_COMMAND_UNKNOWN:
if (c == mcCommandPrefix) {
meState = STATE_COMMAND;
} else if (mbBlockScoping) { // block nesting
if (c == mcBlockPrefix) {
parseBlock();
} else if (c == mcBlockSuffix) {
return; // Exit nested block
}
}
restartTextCollection(); // Squeaky clean
break;
// In a quote: Closing quote goes back to command state (without
// accumulating). Otherwise, accumulate.
case STATE_QUOTE:
if (c == '"') {
meState = STATE_COMMAND;
}
else {
oStrCommand.append (c);
}
break;
}
}
switch (meState) {
// Handle any loose text or command at the end of the string.
case STATE_TEXT:
flushText();
break;
case STATE_COMMAND:
case STATE_QUOTE:
if (oStrCommand.length() > 0) {
onCommand (mpMarkupAttr.extractTag (oStrCommand), oStrCommand.toString());
}
break;
}
}
/**
* Set the source markup string to parse.
* @param sStrSource - String containing the markup to process.
*/
public void setSourceText (String sStrSource) {
mpStrSource = sStrSource;
reset();
}
/**
* Protected constructor.
* @param sStrSource - String containing the markup to process.
* @param pMarkupAttr - Markup attributes (table) to drive the
* translation of this markup language.
*/
protected MarkupEngineIn (String sStrSource, MarkupAttr pMarkupAttr) {
mePendingTab = TextTab.TYPE_LEFT;
mpStrSource = sStrSource;
mpMarkupAttr = pMarkupAttr;
mbBlockScoping = pMarkupAttr.hasBlockScoping();
mcBlockPrefix = pMarkupAttr.blockPrefix();
mcBlockSuffix = pMarkupAttr.blockSuffix();
mcCommandPrefix = pMarkupAttr.commandPrefix();
moAttrList = new Storage();
moCurrentAttr = new TextAttr();
moAmbientAttr = new TextAttr();
reset();
}
/**
* Pure virtual: Process a command from the markup language.
* @param eTag - Command ID from the markup attribute table.
* @param sStrCommand - Command data.
*/
abstract protected boolean onCommand (int eTag, String sStrCommand);
/**
* Obtain the source text.
* @return Pointer to source text string.
*/
protected String sourceText () {
return mpStrSource;
}
/**
* Obtain the current text attributes.
* @return A reference to the current text attributes.
*/
protected TextAttr textAttr () {
return moCurrentAttr;
}
/**
* Obtain the current markup attribute table.
* @return Pointer to the current markup attribute table.
*/
protected MarkupAttr markupAttr () {
return mpMarkupAttr;
}
protected void flushAttr () {
if (!moCurrentAttr.isEmpty()) {
attr (moCurrentAttr);
}
mePendingTab = TextTab.TYPE_LEFT;
}
protected void flushText () {
if (hasPendingMBText()) {
flushAttr();
// Force the MB text through the same codepath as non-MB text.
// Derived classes should see both.
// CommitPending (TRUE);
text (mbText());
}
if (mnIndex > mnTextStart) {
flushAttr();
// This path is called for plain text found in the rtf document. As such, we need
// to interpret the text in the codepage of the current charconverter.
// char pszSource = ((char) (mpStrSource));
// String sOut (pszSource + mnTextStart, mnIndex - mnTextStart, CharConverter());
text (mpStrSource.substring (mnTextStart, mnIndex)); // TODO: probably completely wrong for RTF
}
}
protected UnitSpan loadNumber (String oStrSource) {
return loadNumber (oStrSource, UnitSpan.defaultUnits(), DEFAULT_UNITS_PER_POINT);
}
protected UnitSpan loadNumber (String oStrSource, int eUnits) {
return loadNumber (oStrSource, eUnits, DEFAULT_UNITS_PER_POINT);
}
protected UnitSpan loadNumber (String oStrSource, int eUnits, int nUnitsPerPoint) {
StringBuilder oStrTemp = new StringBuilder (oStrSource); // Don't modify parameter
int nValue;
while ((oStrTemp.length() > 0) && mpMarkupAttr.isDelimiter (oStrTemp.charAt (0))) {
// Remove any leading characters which are not number related.
// Note: '-' is valid, indicating a negative value
oStrTemp.deleteCharAt (0);
}
nValue = Integer.parseInt (oStrTemp.toString());
nValue *= 1000;
nValue = Math.round ((nValue * 1000.0f) / (float) nUnitsPerPoint);
return new UnitSpan (eUnits, UnitSpan.POINTS_1K, nValue);
}
//-----------------------------------------------------------------------------
// Function: ParseBlock
//
// Description:
// Called by Translate(). Saves the current block parsing context before
// recursing into Translate() for a new block. Upon return, the context
// is restored and the pre-recursion text attributes are restored if
// changed in the recursive call to Translate().
//
// Parameters: None.
//
// Returns: Void.
//
//-----------------------------------------------------------------------------
protected void parseBlock () {
startBlock();
// Save the context sensitive attributes to restore after the block
// has been handled
int eStateSaved = meState;
int ePendingTabSaved = mePendingTab;
flushText(); // Flush outstanding text/attr
flushAttr(); // Explicitly flush our attributes.
pushAttr();
restartTextCollection(); // Reset text indices
++mnIndex; // Discard the block opening char
meState = STATE_TEXT; // Fresh start
translate(); // Recurse away
flushText(); // Flush translated block text/attr
restartTextCollection(); // Reset indices
endBlock();
// Restore the pre-block context information
popAttr();
flushAttr();
// Force a Text flush. There could be pending char* (MBText) data
// waiting to be emitted.
commitPending (true);
meState = eStateSaved;
mePendingTab = ePendingTabSaved;
}
//-----------------------------------------------------------------------------
// Function: PendingTab
//
// Description:
// Sets the pending tab member variable.
//
// Parameters: ePendingTab - enumerated tab value
//
// Returns: N/A.
//
//-----------------------------------------------------------------------------
protected void pendingTab (int ePendingTab) {
mePendingTab = ePendingTab;
}
//-----------------------------------------------------------------------------
// Function: PendingTab
//
// Description:
// Returns the pending tab member variable value.
//
// Parameters: None.
//
// Returns: Enumerated tab value.
//
//-----------------------------------------------------------------------------
protected int pendingTab () {
return mePendingTab;
}
//-----------------------------------------------------------------------------
// Function: RestartTextCollection
//
// Description:
// Resets the beginning index for text accumulation to the character
// following the current index.
//
// Parameters: None.
//
// Returns: Void.
//
//-----------------------------------------------------------------------------
protected void restartTextCollection () {
mnTextStart = mnIndex + 1;
}
//---------------------------------------------------------------------------
// Default implementation of some methods which are really only needed by RTF
//---------------------------------------------------------------------------
protected boolean skipThisCommand (int eTag) {
return false;
}
// Default values
protected UnitSpan defaultFontSize () {
return DEFAULT_FONT_SIZE;
}
protected String defaultTypeface () {
return DEFAULT_TYPEFACE_NAME;
}
//---------------------------------------------------------------------------
//
// SkipBlock.
// When we find ourselves inside a block which we know we can't parse, we
// need to skip this block and all the nested blocks before we continue.
//---------------------------------------------------------------------------
protected int skipBlock (int nStart, String sRTF) {
int nBraceLevel = 1;
int nIndex = nStart;
while (nIndex < sRTF.length()) {
char c = sRTF.charAt (nIndex); // TODO: C++ implementation doesn't use UniChar()
if (c == mcBlockPrefix) {
nBraceLevel++;
}
else if (c == mcBlockSuffix) {
nBraceLevel--;
}
if (nBraceLevel == 0) {
break;
}
nIndex++;
}
return nIndex;
}
protected void startBlock () {
}
protected void endBlock () {
}
//-----------------------------------------------------------------------------
// Function: Reset
//
// Description:
// Resets member variables for beginning of translation.
//
// Parameters: None.
//
// Returns: Void.
//
//-----------------------------------------------------------------------------
protected void reset () {
meState = STATE_TEXT;
mnIndex = 0;
mnTextStart = 0;
// Establish our default attribute settings.
moCurrentAttr.setDefault (true);
// Special handling for Type and size. We don't want
// specified from the start, since if they're not, the environment should
// take over and provide the default. e.g. The default font for a field.
// The old FF99 edit control will emit xhtml with no font specified if the
// font is the same as the field.
moCurrentAttr.typefaceEnable (false);
moCurrentAttr.sizeEnable (false);
// Disable the paragraph attributes. Since we push and pop the current
// attributes, we don't want to pop the paragraph attributes which would
// override any attributes previously set in the same paragraph.
moCurrentAttr.specialEnable (false);
moCurrentAttr.justifyVEnable (false);
moCurrentAttr.justifyHEnable (false);
moCurrentAttr.tabsEnable (false);
moCurrentAttr.spacingEnable (false);
moCurrentAttr.spaceBeforeEnable (false);
moCurrentAttr.spaceAfterEnable (false);
moCurrentAttr.marginLEnable (false);
moCurrentAttr.marginREnable (false);
// Special handling for background colour. This is not set in xhtml, so
// we don't want this attr's default (white) clobbering the control values.
moCurrentAttr.colourBgEnable (false);
moAttrList.clear();
pushAttr();
}
protected void popAttr () {
moCurrentAttr = moAttrList.last();
moAttrList.removeLast();
// We want to get back to our previous context, but we don't yet
// want our previous paragraph attributes. It's not safe to apply
// them unless there's been a paragraph inserted.
TextAttr oNonParaAttrs = new TextAttr (moCurrentAttr);
oNonParaAttrs.isolatePara (false, false);
attr (oNonParaAttrs);
}
protected void pushAttr () {
moAttrList.add (new TextAttr (moCurrentAttr));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy