All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.xfa.text.markup.MarkupEngineIn Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
//----------------------------------------------------------------------
//
//	ADOBE CONFIDENTIAL
//	__________________
//
//		Copyright 1995 - 2003 Adobe Systems Incorporated.  All
//		Rights Reserved.
//
//		NOTICE:  All information contained herein is, and remains
//		the property of Adobe Systems Incorporated and its
//		suppliers, if any.	The intellectual and technical
//		concepts contained herein are proprietary to Adobe Systems
//		Incorporated and its suppliers and may be covered by U.S.
//		and Foreign Patents, patents in process, and are protected
//		by trade secret or copyright law.  Dissemination of this
//		information or reproduction of this material is strictly
//		forbidden unless prior written permission is obtained from
//		Adobe Systems Incorporated.
//
//----------------------------------------------------------------------

package com.adobe.xfa.text.markup;

import com.adobe.xfa.text.TextAttr;
import com.adobe.xfa.text.TextTab;

import com.adobe.xfa.ut.Storage;
import com.adobe.xfa.ut.UnitSpan;


/**
 * Class MarkupEngineIn defines a general-purpose markup engine
 * for handling RTF-like markup languages.	Most of the actual parsing
 * of the markup language and the sequencing of rich text generation
 * calls to the base class (MarkupIn) occurs in this class.  The
 * derived class primarily supplies tables to drive the process.
 * 

*

* Unfortunately this model doesn't work well for XHTML markup. Even * though the XHTML input markup engine extends this class, it does most * of the processing itself. *

*

* For more information, please see the extenral documentation. *

* * @exclude from published api -- Mike Tardif, May 2006. */ public abstract class MarkupEngineIn extends MarkupIn { // Finite state machine states protected final static int STATE_TEXT = 0; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command protected final static int STATE_COMMAND = 1; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command protected final static int STATE_COMMAND_UNKNOWN = 2; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command protected final static int STATE_QUOTE = 3; // accumulating plain text // accumulating a command // unknown/unhandled command // accumulating quoted string in command protected final static int DEFAULT_UNITS_PER_POINT = 20; protected int meState; // Data protected final boolean mbBlockScoping; // nested command block support protected final char mcBlockPrefix; // open scoping char protected final char mcBlockSuffix; // close scoping char protected final char mcCommandPrefix; // command marker protected TextAttr moCurrentAttr; protected final Storage moAttrList; protected TextAttr moAmbientAttr; protected int mnIndex; // current index into source protected String mpStrSource; // source string protected final static UnitSpan DEFAULT_FONT_SIZE = new UnitSpan (UnitSpan.POINTS_1K, 12000); protected final static String DEFAULT_TYPEFACE_NAME = "Courier Std"; private final MarkupAttr mpMarkupAttr; // markup lang attributes //private String moOutputText; private int mnTextStart; // start of most recent text chunk private int mePendingTab; // pending tab justification public void translate () { //final char LF = 0x0A; // Line feed final char CR = 0x0D; // Carriage return StringBuilder oStrCommand = new StringBuilder(); //boolean bFirstCommandInBlock = true; for (; mnIndex < mpStrSource.length(); mnIndex++) { // This is the finite state machine. It iterates once for each source // character. char c = mpStrSource.charAt (mnIndex); // TODO: C++ implementation doesn't use UniChar() switch (meState) { case STATE_TEXT: if (c == mcCommandPrefix) { // Accumulating text // A command prefix causes a state switch. Flush any accumulated // text and restart the command string. Otherwise, simply rely on // the loop counter. if (mpStrSource.substring(mnIndex,mnIndex+2).equals (mpMarkupAttr.lookup (MarkupAttr.MARKUP_HEX_CHARACTER))) { // Hex characters don't get nicely terminated by a delimeter if (mnIndex > mnTextStart) { // If there's any pending text, emit it first flushText(); } // Grab the two hex digits we're expecting, // and process the command oStrCommand.setLength (0); oStrCommand.append(mpStrSource, mnIndex + 1, mnIndex + 4); mnIndex += 3; int eTag = mpMarkupAttr.extractTag (oStrCommand); onCommand (eTag, oStrCommand.toString()); meState = STATE_TEXT; oStrCommand.setLength (0); // Fresh start restartTextCollection(); break; } flushText(); oStrCommand.setLength (0); meState = STATE_COMMAND; } else if (c == CR) { // Toss only CR's (removed check for LF). It was losing line breaks when // pasting or loading text (paragraph breaks were OK). This is still not // correct for RTF, but we'll sort it out when we do a full RTF // implementation. flushText(); oStrCommand.setLength (0); restartTextCollection(); } else if (mbBlockScoping) { if (c == mcBlockPrefix) { parseBlock(); oStrCommand.setLength (0); break; } else if (c == mcBlockSuffix) { flushText(); restartTextCollection(); return; // Exit nested block (assumes we're recursing). } } break; // Accumulating a command: Check for end of command char and process if // required. An opening quote puts us in the quote state (without // accumulating). Otherwise, it's a raw command character. case STATE_COMMAND: if (mpMarkupAttr.isDelimiter (c)) { if (oStrCommand.length() == 0) { // e.g., "\\" or "\." String cc = ""; if (c == '~') { // In RTF a tilde is a non-breaking space (Unicode 160) // The jfString constructor expects UTF-8 so we use 0xC2A0. cc += '\u00A0'; } else { // We know this cast is safe, since rtf contains only // 7 or 8 bit characters. cc += c; } text (cc); // accumulate single character restartTextCollection(); meState = STATE_TEXT; } else { // A command is sitting in oStrCommand // Handle the markup command and empty the accumulated text. If the command // is not handled, a transition is made to the UNKNOWN state (which discards // all subsequent text until a block scoping character or new command is // encountered). Otherwise, a transition is made back to accumulating text. int eTag = mpMarkupAttr.extractTag (oStrCommand); if (! onCommand (eTag, oStrCommand.toString())) { restartTextCollection(); // At this point we used to go into // STATE_COMMAND_UNKNOWN // except when it was the first command in a block. // This scenario failed when we encountered // rtf that looked like: // {\cgrid0 some text following... meState = STATE_TEXT; } else { if (skipThisCommand (eTag)) { mnIndex = skipBlock (mnIndex, mpStrSource); restartTextCollection(); return; } meState = STATE_TEXT; } if (eTag == MarkupAttr.MARKUP_UNICODE_CHARACTER) { // When we processed our UNICODE command, we // skipped over some characters and should have // a new delimeter c = mpStrSource.charAt (mnIndex); // TODO: C++ implementation doesn't use UniChar() } oStrCommand.setLength (0); // Fresh start restartTextCollection(); if (c == mcCommandPrefix) { // Maintain state new command meState = STATE_COMMAND; if (mpStrSource.substring(mnIndex,mnIndex+2).equals (mpMarkupAttr.lookup (MarkupAttr.MARKUP_HEX_CHARACTER))) { // Hex characters don't get nicely terminated by a delimeter if (mnIndex > mnTextStart) { // If there's any pending text, emit it first flushText(); } // Grab the two hex digits we're expecting, // and process the command oStrCommand.setLength (0); oStrCommand.append(mpStrSource, mnIndex + 1, mnIndex + 4); mnIndex += 3; eTag = mpMarkupAttr.extractTag (oStrCommand); onCommand (eTag, oStrCommand.toString()); meState = STATE_TEXT; oStrCommand.setLength (0); // Fresh start restartTextCollection(); break; } } else if (mbBlockScoping) { if (c == mcBlockPrefix) { parseBlock(); } else if (c == mcBlockSuffix) { // Make sure we flush any MBText that might be // hanging around. commitPending (true); return; // Exit nested block } } } } else if (c == '"') { meState = STATE_QUOTE; } else { oStrCommand.append (c); } break; // Unknown command: continue reading/discarding until a block scoping character // or command prefix is hit. case STATE_COMMAND_UNKNOWN: if (c == mcCommandPrefix) { meState = STATE_COMMAND; } else if (mbBlockScoping) { // block nesting if (c == mcBlockPrefix) { parseBlock(); } else if (c == mcBlockSuffix) { return; // Exit nested block } } restartTextCollection(); // Squeaky clean break; // In a quote: Closing quote goes back to command state (without // accumulating). Otherwise, accumulate. case STATE_QUOTE: if (c == '"') { meState = STATE_COMMAND; } else { oStrCommand.append (c); } break; } } switch (meState) { // Handle any loose text or command at the end of the string. case STATE_TEXT: flushText(); break; case STATE_COMMAND: case STATE_QUOTE: if (oStrCommand.length() > 0) { onCommand (mpMarkupAttr.extractTag (oStrCommand), oStrCommand.toString()); } break; } } /** * Set the source markup string to parse. * @param sStrSource - String containing the markup to process. */ public void setSourceText (String sStrSource) { mpStrSource = sStrSource; reset(); } /** * Protected constructor. * @param sStrSource - String containing the markup to process. * @param pMarkupAttr - Markup attributes (table) to drive the * translation of this markup language. */ protected MarkupEngineIn (String sStrSource, MarkupAttr pMarkupAttr) { mePendingTab = TextTab.TYPE_LEFT; mpStrSource = sStrSource; mpMarkupAttr = pMarkupAttr; mbBlockScoping = pMarkupAttr.hasBlockScoping(); mcBlockPrefix = pMarkupAttr.blockPrefix(); mcBlockSuffix = pMarkupAttr.blockSuffix(); mcCommandPrefix = pMarkupAttr.commandPrefix(); moAttrList = new Storage(); moCurrentAttr = new TextAttr(); moAmbientAttr = new TextAttr(); reset(); } /** * Pure virtual: Process a command from the markup language. * @param eTag - Command ID from the markup attribute table. * @param sStrCommand - Command data. */ abstract protected boolean onCommand (int eTag, String sStrCommand); /** * Obtain the source text. * @return Pointer to source text string. */ protected String sourceText () { return mpStrSource; } /** * Obtain the current text attributes. * @return A reference to the current text attributes. */ protected TextAttr textAttr () { return moCurrentAttr; } /** * Obtain the current markup attribute table. * @return Pointer to the current markup attribute table. */ protected MarkupAttr markupAttr () { return mpMarkupAttr; } protected void flushAttr () { if (!moCurrentAttr.isEmpty()) { attr (moCurrentAttr); } mePendingTab = TextTab.TYPE_LEFT; } protected void flushText () { if (hasPendingMBText()) { flushAttr(); // Force the MB text through the same codepath as non-MB text. // Derived classes should see both. // CommitPending (TRUE); text (mbText()); } if (mnIndex > mnTextStart) { flushAttr(); // This path is called for plain text found in the rtf document. As such, we need // to interpret the text in the codepage of the current charconverter. // char pszSource = ((char) (mpStrSource)); // String sOut (pszSource + mnTextStart, mnIndex - mnTextStart, CharConverter()); text (mpStrSource.substring (mnTextStart, mnIndex)); // TODO: probably completely wrong for RTF } } protected UnitSpan loadNumber (String oStrSource) { return loadNumber (oStrSource, UnitSpan.defaultUnits(), DEFAULT_UNITS_PER_POINT); } protected UnitSpan loadNumber (String oStrSource, int eUnits) { return loadNumber (oStrSource, eUnits, DEFAULT_UNITS_PER_POINT); } protected UnitSpan loadNumber (String oStrSource, int eUnits, int nUnitsPerPoint) { StringBuilder oStrTemp = new StringBuilder (oStrSource); // Don't modify parameter int nValue; while ((oStrTemp.length() > 0) && mpMarkupAttr.isDelimiter (oStrTemp.charAt (0))) { // Remove any leading characters which are not number related. // Note: '-' is valid, indicating a negative value oStrTemp.deleteCharAt (0); } nValue = Integer.parseInt (oStrTemp.toString()); nValue *= 1000; nValue = Math.round ((nValue * 1000.0f) / (float) nUnitsPerPoint); return new UnitSpan (eUnits, UnitSpan.POINTS_1K, nValue); } //----------------------------------------------------------------------------- // Function: ParseBlock // // Description: // Called by Translate(). Saves the current block parsing context before // recursing into Translate() for a new block. Upon return, the context // is restored and the pre-recursion text attributes are restored if // changed in the recursive call to Translate(). // // Parameters: None. // // Returns: Void. // //----------------------------------------------------------------------------- protected void parseBlock () { startBlock(); // Save the context sensitive attributes to restore after the block // has been handled int eStateSaved = meState; int ePendingTabSaved = mePendingTab; flushText(); // Flush outstanding text/attr flushAttr(); // Explicitly flush our attributes. pushAttr(); restartTextCollection(); // Reset text indices ++mnIndex; // Discard the block opening char meState = STATE_TEXT; // Fresh start translate(); // Recurse away flushText(); // Flush translated block text/attr restartTextCollection(); // Reset indices endBlock(); // Restore the pre-block context information popAttr(); flushAttr(); // Force a Text flush. There could be pending char* (MBText) data // waiting to be emitted. commitPending (true); meState = eStateSaved; mePendingTab = ePendingTabSaved; } //----------------------------------------------------------------------------- // Function: PendingTab // // Description: // Sets the pending tab member variable. // // Parameters: ePendingTab - enumerated tab value // // Returns: N/A. // //----------------------------------------------------------------------------- protected void pendingTab (int ePendingTab) { mePendingTab = ePendingTab; } //----------------------------------------------------------------------------- // Function: PendingTab // // Description: // Returns the pending tab member variable value. // // Parameters: None. // // Returns: Enumerated tab value. // //----------------------------------------------------------------------------- protected int pendingTab () { return mePendingTab; } //----------------------------------------------------------------------------- // Function: RestartTextCollection // // Description: // Resets the beginning index for text accumulation to the character // following the current index. // // Parameters: None. // // Returns: Void. // //----------------------------------------------------------------------------- protected void restartTextCollection () { mnTextStart = mnIndex + 1; } //--------------------------------------------------------------------------- // Default implementation of some methods which are really only needed by RTF //--------------------------------------------------------------------------- protected boolean skipThisCommand (int eTag) { return false; } // Default values protected UnitSpan defaultFontSize () { return DEFAULT_FONT_SIZE; } protected String defaultTypeface () { return DEFAULT_TYPEFACE_NAME; } //--------------------------------------------------------------------------- // // SkipBlock. // When we find ourselves inside a block which we know we can't parse, we // need to skip this block and all the nested blocks before we continue. //--------------------------------------------------------------------------- protected int skipBlock (int nStart, String sRTF) { int nBraceLevel = 1; int nIndex = nStart; while (nIndex < sRTF.length()) { char c = sRTF.charAt (nIndex); // TODO: C++ implementation doesn't use UniChar() if (c == mcBlockPrefix) { nBraceLevel++; } else if (c == mcBlockSuffix) { nBraceLevel--; } if (nBraceLevel == 0) { break; } nIndex++; } return nIndex; } protected void startBlock () { } protected void endBlock () { } //----------------------------------------------------------------------------- // Function: Reset // // Description: // Resets member variables for beginning of translation. // // Parameters: None. // // Returns: Void. // //----------------------------------------------------------------------------- protected void reset () { meState = STATE_TEXT; mnIndex = 0; mnTextStart = 0; // Establish our default attribute settings. moCurrentAttr.setDefault (true); // Special handling for Type and size. We don't want // specified from the start, since if they're not, the environment should // take over and provide the default. e.g. The default font for a field. // The old FF99 edit control will emit xhtml with no font specified if the // font is the same as the field. moCurrentAttr.typefaceEnable (false); moCurrentAttr.sizeEnable (false); // Disable the paragraph attributes. Since we push and pop the current // attributes, we don't want to pop the paragraph attributes which would // override any attributes previously set in the same paragraph. moCurrentAttr.specialEnable (false); moCurrentAttr.justifyVEnable (false); moCurrentAttr.justifyHEnable (false); moCurrentAttr.tabsEnable (false); moCurrentAttr.spacingEnable (false); moCurrentAttr.spaceBeforeEnable (false); moCurrentAttr.spaceAfterEnable (false); moCurrentAttr.marginLEnable (false); moCurrentAttr.marginREnable (false); // Special handling for background colour. This is not set in xhtml, so // we don't want this attr's default (white) clobbering the control values. moCurrentAttr.colourBgEnable (false); moAttrList.clear(); pushAttr(); } protected void popAttr () { moCurrentAttr = moAttrList.last(); moAttrList.removeLast(); // We want to get back to our previous context, but we don't yet // want our previous paragraph attributes. It's not safe to apply // them unless there's been a paragraph inserted. TextAttr oNonParaAttrs = new TextAttr (moCurrentAttr); oNonParaAttrs.isolatePara (false, false); attr (oNonParaAttrs); } protected void pushAttr () { moAttrList.add (new TextAttr (moCurrentAttr)); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy