com.adobe.xfa.text.markup.MarkupEngineIn Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
There is a newer version: 2024.11.18751.20241128T090041Z-241100
//----------------------------------------------------------------------
//
//	ADOBE CONFIDENTIAL
//	__________________
//
//		Copyright 1995 - 2003 Adobe Systems Incorporated.  All
//		Rights Reserved.
//
//		NOTICE:  All information contained herein is, and remains
//		the property of Adobe Systems Incorporated and its
//		suppliers, if any.	The intellectual and technical
//		concepts contained herein are proprietary to Adobe Systems
//		Incorporated and its suppliers and may be covered by U.S.
//		and Foreign Patents, patents in process, and are protected
//		by trade secret or copyright law.  Dissemination of this
//		information or reproduction of this material is strictly
//		forbidden unless prior written permission is obtained from
//		Adobe Systems Incorporated.
//
//----------------------------------------------------------------------

package com.adobe.xfa.text.markup;

import com.adobe.xfa.text.TextAttr;
import com.adobe.xfa.text.TextTab;

import com.adobe.xfa.ut.Storage;
import com.adobe.xfa.ut.UnitSpan;


/**
 * Class MarkupEngineIn defines a general-purpose markup engine
 * for handling RTF-like markup languages.	Most of the actual parsing
 * of the markup language and the sequencing of rich text generation
 * calls to the base class (MarkupIn) occurs in this class.  The
 * derived class primarily supplies tables to drive the process.
 * 
 * 
 * Unfortunately this model doesn't work well for XHTML markup.  Even
 * though the XHTML input markup engine extends this class, it does most
 * of the processing itself.
 * 
 * 
 * For more information, please see the extenral documentation.
 * 
 *
 * @exclude from published api -- Mike Tardif, May 2006.
 */

public abstract class MarkupEngineIn extends MarkupIn {
// Finite state machine states
	protected final static int STATE_TEXT = 0; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command
	protected final static int STATE_COMMAND = 1; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command
	protected final static int STATE_COMMAND_UNKNOWN = 2; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command
	protected final static int STATE_QUOTE = 3; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command

	protected final static int DEFAULT_UNITS_PER_POINT = 20;

	protected int meState;

// Data
	protected final boolean mbBlockScoping; // nested command block support
	protected final char mcBlockPrefix; // open scoping char
	protected final char mcBlockSuffix; // close scoping char
	protected final char mcCommandPrefix; // command marker
	
	protected TextAttr moCurrentAttr;
	protected final Storage moAttrList;
	protected TextAttr moAmbientAttr;
	protected int mnIndex; // current index into source
	protected String mpStrSource; // source string

	protected final static UnitSpan DEFAULT_FONT_SIZE = new UnitSpan (UnitSpan.POINTS_1K, 12000);
	protected final static String DEFAULT_TYPEFACE_NAME = "Courier Std";

	private final MarkupAttr mpMarkupAttr; // markup lang attributes
	//private String moOutputText;
	private int mnTextStart; // start of most recent text chunk
	private int mePendingTab; // pending tab justification

	public void translate () {
		//final char LF = 0x0A; // Line feed
		final char CR = 0x0D; // Carriage return

		StringBuilder oStrCommand = new StringBuilder();
		//boolean bFirstCommandInBlock = true;

		for (; mnIndex < mpStrSource.length(); mnIndex++) {
// This is the finite state machine.  It iterates once for each source
// character.
			char c = mpStrSource.charAt (mnIndex);		// TODO: C++ implementation doesn't use UniChar()

			switch (meState) {
				case STATE_TEXT:
					if (c == mcCommandPrefix) { // Accumulating text
// A command prefix causes a state switch.	Flush any accumulated
// text and restart the command string.  Otherwise, simply rely on
// the loop counter.
						if (mpStrSource.substring(mnIndex,mnIndex+2).equals (mpMarkupAttr.lookup (MarkupAttr.MARKUP_HEX_CHARACTER))) {
// Hex characters don't get nicely terminated by a delimeter
							if (mnIndex > mnTextStart) {
// If there's any pending text, emit it first
								flushText();
							}

// Grab the two hex digits we're expecting,
// and process the command
							oStrCommand.setLength (0);
							oStrCommand.append(mpStrSource, mnIndex + 1, mnIndex + 4);
							mnIndex += 3;

							int eTag = mpMarkupAttr.extractTag (oStrCommand);

							onCommand (eTag, oStrCommand.toString());
							meState = STATE_TEXT;

							oStrCommand.setLength (0); // Fresh start
							restartTextCollection();
							break;
						}
						flushText();
						oStrCommand.setLength (0);
						meState = STATE_COMMAND;
					} else if (c == CR) {
// Toss only CR's (removed check for LF).  It was losing line breaks when
// pasting or loading text (paragraph breaks were OK).	This is still not
// correct for RTF, but we'll sort it out when we do a full RTF
// implementation.
						flushText();
						oStrCommand.setLength (0);
						restartTextCollection();
					} else if (mbBlockScoping) {
						if (c == mcBlockPrefix) {
							parseBlock();
							oStrCommand.setLength (0);
							break;
						} else if (c == mcBlockSuffix) {
							flushText();
							restartTextCollection();
							return; // Exit nested block (assumes we're recursing).
						}
					}

					break;
// Accumulating a command: Check for end of command char and process if
// required.  An opening quote puts us in the quote state (without
// accumulating).  Otherwise, it's a raw command character.
				case STATE_COMMAND:
					if (mpMarkupAttr.isDelimiter (c)) {
						if (oStrCommand.length() == 0) { // e.g., "\\" or "\."
							String cc = "";

							if (c == '~') {
// In RTF a tilde is a non-breaking space (Unicode 160)
// The jfString constructor expects UTF-8 so we use 0xC2A0.
								cc += '\u00A0';
							} else {
// We know this cast is safe, since rtf contains only
// 7 or 8 bit characters.
								cc += c;
							}

							text (cc); // accumulate single character
							restartTextCollection();
							meState = STATE_TEXT;
						}
						else { // A command is sitting in oStrCommand

// Handle the markup command and empty the accumulated text.  If the command
// is not handled, a transition is made to the UNKNOWN state (which discards
// all subsequent text until a block scoping character or new command is
// encountered).  Otherwise, a transition is made back to accumulating text.
							int eTag = mpMarkupAttr.extractTag (oStrCommand);

							if (! onCommand (eTag, oStrCommand.toString())) {
								restartTextCollection();

// At this point we used to go into
// STATE_COMMAND_UNKNOWN
// except when it was the first command in a block.
// This scenario failed when we encountered
// rtf that looked like:
// {\cgrid0 some text following...
								meState = STATE_TEXT;
							} else {
								if (skipThisCommand (eTag)) {
									mnIndex = skipBlock (mnIndex, mpStrSource);
									restartTextCollection();
									return;
								}

								meState = STATE_TEXT;
							}
							if (eTag == MarkupAttr.MARKUP_UNICODE_CHARACTER) {
// When we processed our UNICODE command, we
// skipped over some characters and should have
// a new delimeter
								c = mpStrSource.charAt (mnIndex);		// TODO: C++ implementation doesn't use UniChar()
							}

							oStrCommand.setLength (0); // Fresh start
							restartTextCollection();

							if (c == mcCommandPrefix) { // Maintain state new command
								meState = STATE_COMMAND;

								if (mpStrSource.substring(mnIndex,mnIndex+2).equals (mpMarkupAttr.lookup (MarkupAttr.MARKUP_HEX_CHARACTER))) {
// Hex characters don't get nicely terminated by a delimeter
									if (mnIndex > mnTextStart) {
// If there's any pending text, emit it first
										flushText();
									}

// Grab the two hex digits we're expecting,
// and process the command
									oStrCommand.setLength (0);
									oStrCommand.append(mpStrSource, mnIndex + 1, mnIndex + 4);
									mnIndex += 3;

									eTag = mpMarkupAttr.extractTag (oStrCommand);

									onCommand (eTag, oStrCommand.toString());
									meState = STATE_TEXT;

									oStrCommand.setLength (0); // Fresh start
									restartTextCollection();
									break;
								}
							} else if (mbBlockScoping) {
								if (c == mcBlockPrefix) {
									parseBlock();
								} else if (c == mcBlockSuffix) {
// Make sure we flush any MBText that might be
// hanging around.
									commitPending (true);
									return; // Exit nested block
								}
							}
						}
					}
					else if (c == '"') {
						meState = STATE_QUOTE;
					} else {
						oStrCommand.append (c);
					}

					break;
// Unknown command: continue reading/discarding until a block scoping character
// or command prefix is hit.
				case STATE_COMMAND_UNKNOWN:
					if (c == mcCommandPrefix) {
						meState = STATE_COMMAND;
					} else if (mbBlockScoping) { // block nesting
						if (c == mcBlockPrefix) {
							parseBlock();
						} else if (c == mcBlockSuffix) {
							return; // Exit nested block
						}
					}

					restartTextCollection(); // Squeaky clean

					break;
// In a quote: Closing quote goes back to command state (without
// accumulating).  Otherwise, accumulate.
				case STATE_QUOTE:
					if (c == '"') {
						meState = STATE_COMMAND;
					}
					else {
						oStrCommand.append (c);
					}
					break;
			}
		}

		switch (meState) {
// Handle any loose text or command at the end of the string.
			case STATE_TEXT:
				flushText();
				break;
			case STATE_COMMAND:
			case STATE_QUOTE:
				if (oStrCommand.length() > 0) {
					onCommand (mpMarkupAttr.extractTag (oStrCommand), oStrCommand.toString());
				}
				break;
		}
	}

/**
 * Set the source markup string to parse.
 * @param sStrSource - String containing the markup to process.
 */
	public void setSourceText (String sStrSource) {
		mpStrSource = sStrSource;
		reset();
	}

/**
 * Protected constructor.
 * @param sStrSource - String containing the markup to process.
 * @param pMarkupAttr - Markup attributes (table) to drive the
 * translation of this markup language.
 */
	protected MarkupEngineIn (String sStrSource, MarkupAttr pMarkupAttr) {
		mePendingTab = TextTab.TYPE_LEFT;
		mpStrSource = sStrSource;
		mpMarkupAttr = pMarkupAttr;
		mbBlockScoping = pMarkupAttr.hasBlockScoping();
		mcBlockPrefix = pMarkupAttr.blockPrefix();
		mcBlockSuffix = pMarkupAttr.blockSuffix();
		mcCommandPrefix = pMarkupAttr.commandPrefix();
		moAttrList = new Storage();
		moCurrentAttr = new TextAttr();
		moAmbientAttr = new TextAttr();
		reset();
	}

/**
 * Pure virtual: Process a command from the markup language.
 * @param eTag - Command ID from the markup attribute table.
 * @param sStrCommand - Command data.
 */
	abstract protected boolean onCommand (int eTag, String sStrCommand);

/**
 * Obtain the source text.
 * @return Pointer to source text string.
 */
	protected String sourceText () {
		return mpStrSource;
	}

/**
 * Obtain the current text attributes.
 * @return A reference to the current text attributes.
 */
	protected TextAttr textAttr () {
		return moCurrentAttr;
	}

/**
 * Obtain the current markup attribute table.
 * @return Pointer to the current markup attribute table.
 */
	protected MarkupAttr markupAttr () {
		return mpMarkupAttr;
	}

	protected void flushAttr () {
		if (!moCurrentAttr.isEmpty()) {
			attr (moCurrentAttr);
		}

		mePendingTab = TextTab.TYPE_LEFT;
	}

	protected void flushText () {
		if (hasPendingMBText()) {
			flushAttr();

// Force the MB text through the same codepath as non-MB text.
// Derived classes should see both.
//		CommitPending (TRUE);
			text (mbText());
		}

		if (mnIndex > mnTextStart) {
			flushAttr();

// This path is called for plain text found in the rtf document. As such, we need
// to interpret the text in the codepage of the current charconverter.
//			char pszSource = ((char) (mpStrSource));
//			String sOut (pszSource + mnTextStart, mnIndex - mnTextStart, CharConverter());
			text (mpStrSource.substring (mnTextStart, mnIndex));	// TODO: probably completely wrong for RTF
		}
	}

	protected UnitSpan loadNumber (String oStrSource) {
		return loadNumber (oStrSource, UnitSpan.defaultUnits(), DEFAULT_UNITS_PER_POINT);
	}

	protected UnitSpan loadNumber (String oStrSource, int eUnits) {
		return loadNumber (oStrSource, eUnits, DEFAULT_UNITS_PER_POINT);
	}

	protected UnitSpan loadNumber (String oStrSource, int eUnits, int nUnitsPerPoint) {
		StringBuilder oStrTemp = new StringBuilder (oStrSource); // Don't modify parameter
		int nValue;

		while ((oStrTemp.length() > 0) && mpMarkupAttr.isDelimiter (oStrTemp.charAt (0))) {
// Remove any leading characters which are not number related.
// Note: '-' is valid, indicating a negative value
			oStrTemp.deleteCharAt (0);
		}

		nValue = Integer.parseInt (oStrTemp.toString());
		nValue *= 1000;
		nValue = Math.round ((nValue * 1000.0f) / (float) nUnitsPerPoint);

		return new UnitSpan (eUnits, UnitSpan.POINTS_1K, nValue);
	}

//-----------------------------------------------------------------------------
//	Function:	ParseBlock
//
//	Description:
//		Called by Translate().	Saves the current block parsing context before
//		recursing into Translate() for a new block.  Upon return, the context
//		is restored and the pre-recursion text attributes are restored if
//		changed in the recursive call to Translate().
//
//	Parameters:	None.
//
//	Returns: Void.
//
//-----------------------------------------------------------------------------
	protected void parseBlock () {
		startBlock();

// Save the context sensitive attributes to restore after the block
// has been handled
		int eStateSaved = meState;
		int ePendingTabSaved = mePendingTab;

		flushText(); // Flush outstanding text/attr
		flushAttr(); // Explicitly flush our attributes.
		pushAttr();

		restartTextCollection(); // Reset text indices

		++mnIndex; // Discard the block opening char
		meState = STATE_TEXT; // Fresh start
		translate(); // Recurse away

		flushText(); // Flush translated block text/attr
		restartTextCollection(); // Reset indices

		endBlock();

// Restore the pre-block context information
		popAttr();
		flushAttr();

// Force a Text flush.	There could be pending char* (MBText) data
// waiting to be emitted.
		commitPending (true);

		meState = eStateSaved;
		mePendingTab = ePendingTabSaved;
	}

//-----------------------------------------------------------------------------
//	Function:	PendingTab
//
//	Description:
//		Sets the pending tab member variable.
//
//	Parameters:	ePendingTab - enumerated tab value
//
//	Returns: N/A.
//
//-----------------------------------------------------------------------------
	protected void pendingTab (int ePendingTab) {
		mePendingTab = ePendingTab;
	}

//-----------------------------------------------------------------------------
//	Function:	PendingTab
//
//	Description:
//		Returns the pending tab member variable value.
//
//	Parameters:	None.
//
//	Returns: Enumerated tab value.
//
//-----------------------------------------------------------------------------
	protected int pendingTab () {
		return mePendingTab;
	}

//-----------------------------------------------------------------------------
//	Function:	RestartTextCollection
//
//	Description:
//		Resets the beginning index for text accumulation to the character
//		following the current index.
//
//	Parameters:	None.
//
//	Returns: Void.
//
//-----------------------------------------------------------------------------
	protected void restartTextCollection () {
		mnTextStart = mnIndex + 1;
	}

//---------------------------------------------------------------------------
// Default implementation of some methods which are really only needed by RTF
//---------------------------------------------------------------------------
	protected boolean skipThisCommand (int eTag) {
		return false;
	}

// Default values
	protected UnitSpan defaultFontSize () {
		return DEFAULT_FONT_SIZE;
	}

	protected String defaultTypeface () {
		return DEFAULT_TYPEFACE_NAME;
	}

//---------------------------------------------------------------------------
//
// SkipBlock.
// When we find ourselves inside a block which we know we can't parse, we
// need to skip this block and all the nested blocks before we continue.
//---------------------------------------------------------------------------
	protected int skipBlock (int nStart, String sRTF) {
		int nBraceLevel = 1;
		int nIndex = nStart;
		while (nIndex < sRTF.length()) {
			char c = sRTF.charAt (nIndex);		// TODO: C++ implementation doesn't use UniChar()

			if (c == mcBlockPrefix) {
				nBraceLevel++;
			}

			else if (c == mcBlockSuffix) {
				nBraceLevel--;
			}

			if (nBraceLevel == 0) {
				break;
			}

			nIndex++;
		}
		return nIndex;
	}

	protected void startBlock () {
	}

	protected void endBlock () {
	}

//-----------------------------------------------------------------------------
//	Function:	Reset
//
//	Description:
//		Resets member variables for beginning of translation.
//
//	Parameters:	None.
//
//	Returns: Void.
//
//-----------------------------------------------------------------------------
	protected void reset () {
		meState = STATE_TEXT;
		mnIndex = 0;
		mnTextStart = 0;

// Establish our default attribute settings.
		moCurrentAttr.setDefault (true);

// Special handling for Type and size.	We don't want
// specified from the start, since if they're not, the environment should
// take over and provide the default.  e.g. The default font for a field.
// The old FF99 edit control will emit xhtml with no font specified if the
// font is the same as the field.
		moCurrentAttr.typefaceEnable (false);
		moCurrentAttr.sizeEnable (false);

// Disable the paragraph attributes.  Since we push and pop the current
// attributes, we don't want to pop the paragraph attributes which would
// override any attributes previously set in the same paragraph.
		moCurrentAttr.specialEnable (false);
		moCurrentAttr.justifyVEnable (false);
		moCurrentAttr.justifyHEnable (false);
		moCurrentAttr.tabsEnable (false);
		moCurrentAttr.spacingEnable (false);
		moCurrentAttr.spaceBeforeEnable (false);
		moCurrentAttr.spaceAfterEnable (false);
		moCurrentAttr.marginLEnable (false);
		moCurrentAttr.marginREnable (false);
// Special handling for background colour.	This is not set in xhtml, so
// we don't want this attr's default (white) clobbering the control values.
		moCurrentAttr.colourBgEnable (false);

		moAttrList.clear();
		pushAttr();
	}

	protected void popAttr () {
		moCurrentAttr = moAttrList.last();
		moAttrList.removeLast();

// We want to get back to our previous context, but we don't yet
// want our previous paragraph attributes.	It's not safe to apply
// them unless there's been a paragraph inserted.
		TextAttr oNonParaAttrs = new TextAttr (moCurrentAttr);
		oNonParaAttrs.isolatePara (false, false);

		attr (oNonParaAttrs);
	}

	protected void pushAttr () {
		moAttrList.add (new TextAttr (moCurrentAttr));
	}
}