All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.xfa.text.markup.MarkupRtfIn Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
//----------------------------------------------------------------------
//
//	ADOBE CONFIDENTIAL
//	__________________
//
//		Copyright 1995 - 2003 Adobe Systems Incorporated.  All
//		Rights Reserved.
//
//		NOTICE:  All information contained herein is, and remains
//		the property of Adobe Systems Incorporated and its
//		suppliers, if any.	The intellectual and technical
//		concepts contained herein are proprietary to Adobe Systems
//		Incorporated and its suppliers and may be covered by U.S.
//		and Foreign Patents, patents in process, and are protected
//		by trade secret or copyright law.  Dissemination of this
//		information or reproduction of this material is strictly
//		forbidden unless prior written permission is obtained from
//		Adobe Systems Incorporated.
//
//----------------------------------------------------------------------
package com.adobe.xfa.text.markup;

import com.adobe.xfa.gfx.GFXColour;
import com.adobe.xfa.gfx.GFXTextAttr;

import com.adobe.xfa.text.TextBaselineShift;
import com.adobe.xfa.text.TextAttr;
import com.adobe.xfa.text.TextMeasurement;
import com.adobe.xfa.text.TextTab;
import com.adobe.xfa.text.TextTabList;

import com.adobe.xfa.ut.StringUtils;
import com.adobe.xfa.ut.UnitSpan;

import java.util.HashMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;


/**
 * This class is the RTF input markup engine.  To use it, one creates an
 * instance, passing the string to be translated in a constructor
 * parameter.  Then the engine is passed to a Markup() method on either
 * a text stream or range.
 * 

*

* For more information, please see the extenral documentation. *

* * @exclude from published api -- Mike Tardif, May 2006. */ public class MarkupRtfIn extends MarkupEngineIn { //private final static String gsBig5 ="Big5"; private final static String gsBlue ="\\blue"; private final static String gsCodePage ="\\ansicpg"; private final static String gsColourtbl ="{\\colortbl"; //private final static String gsGBK ="GBK"; private final static String gsGreen ="\\green"; //private final static String gsKSC5601 ="KSC-5601"; private final static String gsPt ="pt"; private final static String gsRed ="\\red"; //private final static String gsShiftJIS ="Shift-JIS"; private final static String gsSkip =" "; private final static String gsStyleSheet ="{\\stylesheet"; private final static String gsTab ="\t"; // private final static String gsWindows874 ="windows-874"; // private final static String gsWindows1250 ="windows-1250"; // private final static String gsWindows1251 ="windows-1251"; // private final static String gsWindows1252 ="windows-1252"; // private final static String gsWindows1253 ="windows-1253"; // private final static String gsWindows1254 ="windows-1254"; // private final static String gsWindows1255 ="windows-1255"; // private final static String gsWindows1256 ="windows-1256"; // private final static String gsWindows1257 ="windows-1257"; // private final static String gsWindows1258 ="windows-1258"; // The following members are used when parsing the stylesheet area // of the RTF file. private final Map moStyleSheets = new HashMap(); private boolean mbCollectingStyle; private int mnStyleNum; private final Map moFontMap = new HashMap(); private final Map moFontCharset = new HashMap(); private final List moColourTable = new ArrayList(); private int mnDefaultFont; // Tracks the number of characters we're // supposed to skip when we encounter a // Unicode character. private int mnUC_count; /** * Constructor with source string. * @param sStrSource - RTF source string to translate. * @param pMarkupAttr - (optional) Pointer to RTF translation attribute * table to use instead of the default. It is strongly recommended that * you leave this parameter NULL (default). */ public MarkupRtfIn (String sStrSource, MarkupAttr pMarkupAttr) { super (sStrSource, (pMarkupAttr == null) ? RTFAttr.GetDefault() : pMarkupAttr); mnUC_count = 1; mbCollectingStyle = false; } public MarkupRtfIn (String sStrSource) { this (sStrSource, null); } //----------------------------------------------------------------------------- // Function: Translate // // Description: // Ancestor override to extract the font table information from the // source string before the translation begins. // // Parameters: None. // // Returns: Void. // //----------------------------------------------------------------------------- public void translate () { StringBuilder sStrCopy = new StringBuilder (sourceText()); String sStrRtfFontMap = null; // Strip all carriage returns followed by a commands int nPos; for (nPos = sStrCopy.indexOf ("\n\\"); nPos >= 0; nPos = sStrCopy.indexOf ("\n\\")) { sStrCopy.deleteCharAt (nPos); } // Detect Paragraph followed by carriage return for (nPos = sStrCopy.indexOf ("\\par\n"); nPos >= 0; nPos = sStrCopy.indexOf ("\\par\n")) { sStrCopy.replace (nPos+4, nPos+4, " "); } // Strip out extra carriage returns and spaces. char cPrev = 0; for (int i = 0; i < sStrCopy.length(); i++) { char c = sStrCopy.charAt (i); // TODO: C++ implementation doesn't use UniChar() if ((i > 0) && (c == mcBlockPrefix)) { // Delete all spaces and carriage returns preceding a block prefix while ((cPrev == '\r') || (cPrev == ' ')) { sStrCopy.deleteCharAt (i - 1); i--; if (i == 0) { break; } cPrev = sStrCopy.charAt (i - 1); } } if ((c == mcBlockSuffix) && (cPrev != mcCommandPrefix) && ((i + 1) < sStrCopy.length())) { // Delete all spaces and carriage returns after a command block suffix char cNext = sStrCopy.charAt (i + 1); while ((cNext == '\r') || (cNext == ' ')) { sStrCopy.deleteCharAt (i + 1); if (i == sStrCopy.length()) { break; } cNext = sStrCopy.charAt (i + 1); } } if ((c == '\r') && ((i + 1) < sStrCopy.length())) { // Strip out single carriage returns; if there's a real carriage return // it will be represented as two carriage returns. sStrCopy.deleteCharAt (i); if (sStrCopy.charAt (i) == '\r') { i++; } } cPrev = sStrCopy.charAt (i); } sStrRtfFontMap = extractFontTableData (sStrCopy); String finalSource = sStrCopy.toString(); if (sStrRtfFontMap != null) { // The font map is constructed only. The initial RTF string contains // font table information, but is removed after font map construction. // ExtractFontTableData should fail when it cannot find it. // Look for the code page specifier. If it occurs, we need to respect // it for translating font names. int nFoundAt = finalSource.indexOf (gsCodePage); if (nFoundAt >= 0) { int nLength = 0; nFoundAt += gsCodePage.length(); for (int i = nFoundAt; Character.isDigit (finalSource.charAt (i)); i++) { nLength++; } //String sNum = finalSource.substring (nFoundAt, nFoundAt + nLength); //Integer conv = StringUtils.number (sNum); // TODO: char converter stuff // if (sNum.Number (lNum) && jfCharConverter.IsAnsiCodePage (lNum)) { // jfCharConverter poConv = jfCharConverter.Default().Clone (sNum); // CharConverter (poConv); // } } buildFontMap (sStrRtfFontMap); setSourceText (finalSource); } buildColourTable (finalSource); buildStyleSheets (finalSource); super.translate(); flushAttr(); // Force a Text flush. There could be pending char* (MBText) data // waiting to be emitted. commitPending (true); } public String defaultTypeface () { // in Rtf it seems the first font is the default. return moFontMap.get(mnDefaultFont); } public boolean skipThisCommand (int eTag) { if ((eTag == MarkupAttr.MARKUP_STYLESHEET) || (eTag == MarkupAttr.MARKUP_INFO) || (eTag == MarkupAttr.MARKUP_PICTURE) || (eTag == MarkupAttr.MARKUP_HEADER) || (eTag == MarkupAttr.MARKUP_HEADER_LEFT) || (eTag == MarkupAttr.MARKUP_HEADER_RIGHT) || (eTag == MarkupAttr.MARKUP_HEADER_FIRST) || (eTag == MarkupAttr.MARKUP_FOOTER) || (eTag == MarkupAttr.MARKUP_FOOTER_LEFT) || (eTag == MarkupAttr.MARKUP_FOOTER_RIGHT) || (eTag == MarkupAttr.MARKUP_FOOTER_FIRST) || ((eTag == MarkupAttr.MARKUP_DESTINATION_GROUP) && (! mbCollectingStyle)) || (eTag == MarkupAttr.MARKUP_COLOUR_TABLE)) { return true; } return false; } //----------------------------------------------------------------------------- // Function: FlushAttr // // Description: // Writes any pending attributes to the text stream, then clears them. // RTF expects hanging indentation to be added to the left margin, so we // must subtract it out again to get things in a consistent state. // //----------------------------------------------------------------------------- protected void flushAttr () { if (! textAttr().isEmpty()) { super.flushAttr(); } } //----------------------------------------------------------------------------- // Function: OnCommand // // Description: // Performs handling for RTF markup commands. // // Parameters: // eTag - enumerated command tag. // sCommand - command string (without markup command) // // Returns: TRUE if the command is handled, FALSE otherwise. // //----------------------------------------------------------------------------- protected boolean onCommand (int eTag, String sCommand) { final int seUnder0 = GFXTextAttr.UNDER_NONE; final int seUnderS = (GFXTextAttr.UNDER_ALL | GFXTextAttr.UNDER_SINGLE); final int seUnderDbl = (GFXTextAttr.UNDER_ALL | GFXTextAttr.UNDER_DOUBLE); final int seUnderW = (GFXTextAttr.UNDER_WORD | GFXTextAttr.UNDER_SINGLE); boolean bCommandHandled = true; switch (eTag) { // paragraph case MarkupAttr.MARKUP_PARAGRAPH_START: flushText(); // +++ // New paragraph break; case MarkupAttr.MARKUP_PARAGRAPH_END: flushAttr(); // No associated text para(); break; case MarkupAttr.MARKUP_PARAGRAPH_DEFAULT: { // Lookup and use the default style TextAttr value = moStyleSheets.get(0); if (value != null) { moCurrentAttr = value; } break; } case MarkupAttr.MARKUP_PARAGRAPH_PLAIN: { // Revert to application-defined attributes (Except for Typeface) String sTypeface = textAttr().typeface(); textAttr().setDefault (true); textAttr().typeface (sTypeface); textAttr().size (defaultFontSize()); break; } // Special character case MarkupAttr.MARKUP_HEX_CHARACTER: { byte[] cc = new byte [1]; cc[0] = getHexValue (sCommand); mbText (cc); break; } case MarkupAttr.MARKUP_UC_COUNT: { mnUC_count = StringUtils.safeNumber (sCommand); break; } case MarkupAttr.MARKUP_TABLE_ROW: { text ("\n"); break; } case MarkupAttr.MARKUP_TABLE_CELL: { text ("\t"); break; } case MarkupAttr.MARKUP_UNICODE_CHARACTER: { Integer lUnicodeVal = StringUtils.number (sCommand); if (lUnicodeVal != null) { // Watson #1192752. // At this point we may have collected some attribute changes in // MarkupEngineIn::moCurrentAttr. We need to make sure and // flush these into MarkupIn::moPendingAttr before the call to // MarkupIn::Text() so that they are written to the stream in // MarkupIn::CommitPending() flushAttr(); // RTF 1.6 control words generally accept signed 16-bit numbers // as arguments. For this reason, Unicode values greater than // 32767 must be expressed as negative numbers, which means. // lUnicodeVal may be negative. Masking low order 16-bits is // therefore required. Mike Tardif -- May 14, 2004. char c = (char) (lUnicodeVal.intValue() & 0xFFFF); String sOut = ""; sOut += c; text (sOut); if (mpStrSource.charAt (mnIndex) == ' ') { // Skip over possible command-terminating space mnIndex++; } for (int j = 0; j < mnUC_count; j++) { // Now skip over the number of characters specified by // MARKUP_UC_COUNT. These will most likely be escaped hex // values. char cNext = mpStrSource.charAt (mnIndex); if (cNext == '\\') { if (mpStrSource.charAt (mnIndex + 1) == '\'') { // Skip the hex-value character // looks like: \'a3 mnIndex += 4; } } else { // Skip a single character mnIndex++; } } } // We want to set bCommandHandled = 0 so that when we exit // this method we'll get back into text collection mode. // Text collection restarts at +1 character after, so decrement // mnIndex to get to the correct position. // Prior to this fix we'd fail to get back to text collection mode // and we'd lose the trailing character (i). // Roach #57143 bCommandHandled = false; mnIndex--; break; } case MarkupAttr.MARKUP_ENDASH: text (" -"); break; case MarkupAttr.MARKUP_EMDASH: text (" --"); break; case MarkupAttr.MARKUP_ENSPACE: text (" "); break; case MarkupAttr.MARKUP_EMSPACE: text (" "); break; case MarkupAttr.MARKUP_BULLET: text ("\u00b7"); break; case MarkupAttr.MARKUP_LQUOTE: text ("`"); break; case MarkupAttr.MARKUP_RQUOTE: text ("'"); break; case MarkupAttr.MARKUP_LDBLQUOTE: case MarkupAttr.MARKUP_RDBLQUOTE: text ("\""); break; case MarkupAttr.MARKUP_LINE: text ("\n"); break; // indentation case MarkupAttr.MARKUP_INDENT_FIRST_LINE: { UnitSpan oValue = loadNumber (sCommand); if (oValue.value() < 0) { // -ve Special means indent all but the 1st line; +ve Special // means indent 1st line only. So, if we get a +ve indent 1st line // from RTF, we're OK. If it's negative, subtract it from our // current Left Margin. UnitSpan oOffset = oValue; if (textAttr().marginLEnable()) { oOffset = oOffset.add (textAttr().marginL().getLength()); } textAttr().marginL (new TextMeasurement (oOffset)); } textAttr().special (new TextMeasurement (oValue)); break; } case MarkupAttr.MARKUP_INDENT_LEFT: { if (sCommand.length() == 0) { flushAttr(); // Indent(); } else { // See comments under INDENT_FIRST_LINE UnitSpan oValue = loadNumber (sCommand); if (textAttr().special().getLengthValue() < 0) { oValue = oValue.add (textAttr().special().getLength()); } textAttr().marginL (new TextMeasurement (oValue)); } break; } case MarkupAttr.MARKUP_INDENT_RIGHT: textAttr().marginR (new TextMeasurement (loadNumber (sCommand))); break; case MarkupAttr.MARKUP_SPACE_BEFORE: textAttr().spaceBefore (new TextMeasurement (loadNumber (sCommand, UnitSpan.POINTS_1K))); break; case MarkupAttr.MARKUP_SPACE_AFTER: textAttr().spaceAfter (new TextMeasurement (loadNumber (sCommand, UnitSpan.POINTS_1K))); break; case MarkupAttr.MARKUP_LINE_SPACE: { UnitSpan oSpacing = loadNumber (sCommand, UnitSpan.POINTS_1K); if (oSpacing.gt (textAttr().size())) { // Text Services only supports the spacing option where it's a fixed // height irregardless of font height // (represented by a negative value) // When the value is positive, we use it only if it's larger than the // tallest character. It's not easy for us to figure out the largest // character, so we'll do the next best thing and check if it's // larger than the current font size. textAttr().spacing (new TextMeasurement (oSpacing)); } else if (oSpacing.value() < 0) { oSpacing = new UnitSpan(oSpacing.units(), -oSpacing.value()); textAttr().spacing (new TextMeasurement (oSpacing)); } break; } // font case MarkupAttr.MARKUP_FONT_NAME: { // Sending down some empty string should flush any // pending multi-byte text commitPending (true); int uFontIndex = StringUtils.safeNumber (sCommand); textAttr().typeface (moFontMap.get(uFontIndex)); // Make sure that the char converter agrees with the font. charset (moFontCharset.get(uFontIndex)); flushAttr(); } break; case MarkupAttr.MARKUP_FONT_SIZE: textAttr().size (loadNumber (sCommand, UnitSpan.POINTS_1K, markupAttr().pointSizeFactor())); flushAttr(); break; case MarkupAttr.MARKUP_COLOUR: { int lColour = StringUtils.safeNumber (sCommand); textAttr().colour (moColourTable.get (lColour)); break; } case MarkupAttr.MARKUP_DEFAULT_FONT: { int lFontIndex = StringUtils.safeNumber (sCommand); mnDefaultFont = lFontIndex; break; } // effects case MarkupAttr.MARKUP_BOLD: // assume bold disabled if succeeding char (i.e. '0') // TextAttr().Weight ((sCommand.length() == 0) ? GFXTypeface.WEIGHT_BOLD : GFXTypeface.WEIGHT_NORMAL); textAttr().weight ((sCommand.length() == 0) ? 700 : 400); // TODO break; case MarkupAttr.MARKUP_STRIKEOUT: case MarkupAttr.MARKUP_STRIKEOUT_DOUBLE: if (sCommand.charAt (0) == '0') { textAttr().strikeout (GFXTextAttr.STRIKEOUT_NONE); } else { textAttr().strikeout (GFXTextAttr.STRIKEOUT_SINGLE); } break; case MarkupAttr.MARKUP_ITALIC: // assume italic disabled if succeeding char (i.e. '0') textAttr().italic (sCommand.length() == 0); break; // underlining case MarkupAttr.MARKUP_UNDERLINE: // Default all the weird underline types to underline single. case MarkupAttr.MARKUP_UNDERLINE_DOTTED: case MarkupAttr.MARKUP_UNDERLINE_DASH: case MarkupAttr.MARKUP_UNDERLINE_DASH_DOT: case MarkupAttr.MARKUP_UNDERLINE_DASH_DOT_DOT: case MarkupAttr.MARKUP_UNDERLINE_HEAVY_WAVE: case MarkupAttr.MARKUP_UNDERLINE_LONG_DASH: case MarkupAttr.MARKUP_UNDERLINE_THICK: case MarkupAttr.MARKUP_UNDERLINE_THICK_DOT: case MarkupAttr.MARKUP_UNDERLINE_THICK_DASH: case MarkupAttr.MARKUP_UNDERLINE_THICK_DASH_DOT: case MarkupAttr.MARKUP_UNDERLINE_THICK_DASH_DOT_DOT: case MarkupAttr.MARKUP_UNDERLINE_THICK_LONG_DASH: case MarkupAttr.MARKUP_UNDERLINE_DOUBLE_WAVE: case MarkupAttr.MARKUP_UNDERLINE_WAVE: // assume underline disabled if succeeding char (i.e. '0') textAttr().underline ((sCommand.length() == 0) ? seUnderS : seUnder0); // Single // None break; case MarkupAttr.MARKUP_UNDERLINE_END: case MarkupAttr.MARKUP_UNDERLINE_END2: textAttr().underline (seUnder0); break; case MarkupAttr.MARKUP_UNDERLINE_DOUBLE: textAttr().underline (seUnderDbl); break; case MarkupAttr.MARKUP_UNDERLINE_WORD: textAttr().underline (seUnderW); break; case MarkupAttr.MARKUP_SUPER: { UnitSpan oShift = textAttr().size(); oShift = oShift.multiply (-0.31f); if (textAttr().baselineShiftEnable()) { UnitSpan oBase = new UnitSpan (textAttr().baselineShift().getString (false)); oShift = oShift.add (oBase); } textAttr().baselineShift (new TextBaselineShift (oShift)); UnitSpan size = textAttr().size(); size = size.multiply (0.66f); textAttr().size (size); break; } case MarkupAttr.MARKUP_UP: { // units are in half points UnitSpan oShift = new UnitSpan (sCommand + gsPt); oShift = oShift.multiply (-0.5f); if (textAttr().baselineShiftEnable()) { UnitSpan oBase = new UnitSpan (textAttr().baselineShift().getString (false)); oShift = oShift.add (oBase); } textAttr().baselineShift (new TextBaselineShift (oShift)); break; } case MarkupAttr.MARKUP_DOWN: { // units are in half points UnitSpan oShift = new UnitSpan (sCommand + gsPt); oShift = oShift.multiply (0.5f); if (textAttr().baselineShiftEnable()) { UnitSpan oBase = new UnitSpan (textAttr().baselineShift().getString (false)); oShift = oShift.add (oBase); } textAttr().baselineShift (new TextBaselineShift (oShift)); break; } case MarkupAttr.MARKUP_SUB: { UnitSpan oShift = textAttr().size(); oShift = oShift.multiply (0.15f); if (textAttr().baselineShiftEnable()) { UnitSpan oBase = new UnitSpan (textAttr().baselineShift().getString (false)); oShift = oShift.add (oBase); } textAttr().baselineShift (new TextBaselineShift (oShift)); UnitSpan size = textAttr().size(); size = size.multiply (0.66f); textAttr().size (size); break; } // justification // Horizontal case MarkupAttr.MARKUP_JUSTIFY_SPREAD: textAttr().justifyH (TextAttr.JUST_H_SPREAD); break; case MarkupAttr.MARKUP_JUSTIFY_SPREAD_ALL: textAttr().justifyH (TextAttr.JUST_H_SPREAD_ALL); break; case MarkupAttr.MARKUP_JUSTIFY_HORZ_LEFT: textAttr().justifyH (TextAttr.JUST_H_LEFT); break; case MarkupAttr.MARKUP_JUSTIFY_HORZ_CENTER: textAttr().justifyH (TextAttr.JUST_H_CENTRE); break; case MarkupAttr.MARKUP_JUSTIFY_HORZ_RIGHT: textAttr().justifyH (TextAttr.JUST_H_RIGHT); break; // Vertical case MarkupAttr.MARKUP_JUSTIFY_VERT_TOP: textAttr().justifyV (TextAttr.JUST_V_TOP); break; case MarkupAttr.MARKUP_JUSTIFY_VERT_CENTER: textAttr().justifyV (TextAttr.JUST_V_MIDDLE); break; case MarkupAttr.MARKUP_JUSTIFY_VERT_BOTTOM: textAttr().justifyV (TextAttr.JUST_V_BOTTOM); break; // tabs case MarkupAttr.MARKUP_TAB: flushAttr(); // No associated text text (gsTab); break; case MarkupAttr.MARKUP_TAB_DEFAULT: { TextTab oTab = new TextTab (loadNumber (sCommand), TextTab.TYPE_ALIGN_AFTER); TextTabList oTabList = new TextTabList(); if (textAttr().tabsEnable()) { oTabList = textAttr().tabs(); } oTabList.uniform (oTab); textAttr().tabs (oTabList); } break; case MarkupAttr.MARKUP_TAB_POSITION: { TextTab oTab = new TextTab (loadNumber (sCommand), pendingTab()); TextTabList oTabList = new TextTabList(); if (textAttr().tabsEnable()) { oTabList = textAttr().tabs(); } oTabList.set (oTab); textAttr().tabs (oTabList); } break; case MarkupAttr.MARKUP_TAB_ALIGN_LEFT: pendingTab (TextTab.TYPE_LEFT); break; case MarkupAttr.MARKUP_TAB_ALIGN_CENTER: pendingTab (TextTab.TYPE_CENTRE); break; case MarkupAttr.MARKUP_TAB_ALIGN_RIGHT: pendingTab (TextTab.TYPE_RIGHT); break; case MarkupAttr.MARKUP_TAB_ALIGN_DECIMAL: pendingTab (TextTab.TYPE_DECIMAL); break; case MarkupAttr.MARKUP_STYLE_ADDITIVE: mnStyleNum *= -1; break; case MarkupAttr.MARKUP_STYLE_BASED_ON: { if (mbCollectingStyle) { int nLookup = StringUtils.safeNumber (sCommand); // Look up the index TextAttr value = moStyleSheets.get(nLookup); if (value == null) { nLookup *= -1; value = moStyleSheets.get(nLookup); } if (value != null) { // We've found the style we're based on. TextAttr oStyle = value; // Define anything that hasn't shown up in our style. moCurrentAttr.addDisabled (oStyle); } } break; } case MarkupAttr.MARKUP_STYLE_REF: case MarkupAttr.MARKUP_CHARACTER_STYLE: { int nFontIndex = StringUtils.safeNumber (sCommand); if (mbCollectingStyle) { mnStyleNum = nFontIndex; } else { // Look up the index int nLookup = nFontIndex; TextAttr value = moStyleSheets.get(nLookup); boolean bAdditive = false; if (value == null) { nLookup *= -1; value = moStyleSheets.get(nLookup); bAdditive = true; } if (value != null) { TextAttr oStyle = value; if (bAdditive) { // Character attributes only -- not paragraph oStyle.isolatePara (false); moCurrentAttr.override (oStyle); } else { moCurrentAttr = oStyle; } } } break; } // Unsupported commands are simply ignored case MarkupAttr.MARKUP_NOT_SUPPORTED: case MarkupAttr.MARKUP_STYLESHEET: case MarkupAttr.MARKUP_INFO: case MarkupAttr.MARKUP_PICTURE: case MarkupAttr.MARKUP_DESTINATION_GROUP: case MarkupAttr.MARKUP_HEADER: case MarkupAttr.MARKUP_HEADER_LEFT: case MarkupAttr.MARKUP_HEADER_RIGHT: case MarkupAttr.MARKUP_HEADER_FIRST: case MarkupAttr.MARKUP_FOOTER: case MarkupAttr.MARKUP_FOOTER_LEFT: case MarkupAttr.MARKUP_FOOTER_RIGHT: case MarkupAttr.MARKUP_FOOTER_FIRST: case MarkupAttr.MARKUP_COLOUR_TABLE: break; case MarkupAttr.MARKUP_UNKNOWN: default: bCommandHandled = false; // not handled here break; } return bCommandHandled; } protected void startBlock () { if (mbCollectingStyle) { moCurrentAttr.setDefault (false); } } protected void endBlock () { if (mbCollectingStyle) { if (mnStyleNum >= 0) { if (! moCurrentAttr.typefaceEnable()) { // If we're not an additive style, make sure all // attributes are explicitly set. // If a typeface hasn't been specified, go get // the default font. String value = moFontMap.get(mnDefaultFont); if (value != null) { String sTypeface = value; moCurrentAttr.typeface (sTypeface); // Make sure that the char converter agrees with the font. charset (moFontCharset.get(mnDefaultFont)); } } // Make sure all attributes are explicitly set. moCurrentAttr.addDisabled (TextAttr.defaultFull); } moStyleSheets.put (mnStyleNum, moCurrentAttr); mnStyleNum = 0; } } protected void charset (int lCharset) { // String sCodePage; // // switch (lCharset) { // case 128: /* SHIFTJIS */ // sCodePage = gsShiftJIS; // break; // case 129: /* HANGEUL */ // sCodePage = gsKSC5601; // break; // case 134: /* GB2312 */ // sCodePage = gsGBK; // break; // case 136: /* BIG5 */ // sCodePage = gsBig5; // break; // case 161: /* GREEK */ // sCodePage = gsWindows1253; // break; //// Watson #1123411: add support for Turkish charset. // case 162: /* TURKISH */ // sCodePage = gsWindows1254; // break; //// Watson #1171468: add support for Hebrew, Thai and Vietnamese // case 163: /* VIETNAMESE */ // sCodePage = gsWindows1258; // break; // case 177: /* HEBREW */ // sCodePage = gsWindows1255; // break; //// vantive 668202: add support for Arabic, Baltic and Russian // case 178: /* ARABIC */ // sCodePage = gsWindows1256; // break; // case 186: /* BALTIC */ // sCodePage = gsWindows1257; // break; // case 204: /* RUSSIAN/CYRILLIC */ // sCodePage = gsWindows1251; // break; // case 222: /* THAI */ // sCodePage = gsWindows874; // break; // case 238: /* EASTEUROPE */ // sCodePage = gsWindows1250; // break; // default: // sCodePage = gsWindows1252; /* ANSI_CHARSET */ //// Charset "2" is the symbol charset. Eventually we'd like to have //// a special code page handler for this... // assert (lCharset == 0 || lCharset == 2); // } // jfCharConverter pNewConverter = jfCharConverter.Default().Clone (sCodePage); // CharConverter (pNewConverter); // TODO } // Inherited from MarkupIn protected void text (String sText) { if (! mbCollectingStyle) { super.text (sText); } } protected void attr (TextAttr oAttr) { if (! mbCollectingStyle) { super.attr (oAttr); } } //----------------------------------------------------------------------------- // Function: BuildFontMap // // Description: // Performs extraction of RTF font table information to construct // moFontMap. The RTF font table block's command appears as: // // { // \fonttbl // { // \f0\froman\fcharset0\fprq Tms Rmn // { // \*\falt Times New Roman // }; // } // { // \f4\froman\fcharset0\fprq Times New Roman; // } // } // // When this method is called, the '\fonttbl' command has been // read and identified. mnIndex indexes the first block open // scope character. // // The strategy used for the font name storage is to use the // alternate font specification ('\falt') if specified. Why? // Good question! It appears that specifying an alternate font // only occurs when the first choice is questionable. There is // no way to indicate/insert this information into the text // stream, so the 'no-question' font is used. // // Parameters: // oStrRtfFontMap - the RTF font map text // // Returns: Void. // //----------------------------------------------------------------------------- private void buildFontMap (String oStrRtfFontMap) { final int ciINVALID_FONT = -1; final int FT_COMMAND = 0; final int FT_NAME = 1; final int FT_UNKNOWN = 2; int eState = FT_UNKNOWN; int nIndex = 0; StringBuilder sCommand = new StringBuilder(); RTFAttr pMkAttr = (RTFAttr) markupAttr(); int iFontTableIndex = ciINVALID_FONT; int iFontCharset = 0; moFontMap.clear(); // Squeaky clean! StringBuilder cFontName = new StringBuilder(); for (; nIndex < oStrRtfFontMap.length(); nIndex++) { // This is another the finite state machine. It iterates once for each source // character. char c = oStrRtfFontMap.charAt (nIndex); if ((c == '\\') && (oStrRtfFontMap.charAt (nIndex + 1) == '\'')) { // Look for a hex character // The font name could be specified as a series of Hex characters byte b = getHexValue (oStrRtfFontMap.substring (nIndex+2, nIndex+4)); cFontName.append (b); eState = FT_NAME; nIndex += 3; continue; } if (eState == FT_UNKNOWN) { // '@' or ' ' seems to indicate we're starting a font name if ((c == '@') && (iFontTableIndex != ciINVALID_FONT)) { eState = FT_NAME; continue; } if ((c == ' ') && (cFontName.length() == 0) && (iFontTableIndex != ciINVALID_FONT)) { eState = FT_NAME; continue; } } switch (eState) { // Accumulating a font table command: Check for end of command char and // process if required. case FT_COMMAND: if (pMkAttr.isDelimiter (c)) { if (sCommand.length() > 0) { int eTag = pMkAttr.extractTag (sCommand); String finalCommand = sCommand.toString(); if (eTag == MarkupAttr.MARKUP_FONT_NAME) { // RTF font table entry - e.g. 'f0', 'f4', etc. Used for commands AND // font table declarations. Note: numbers are loaded as 1/1000ths of a point iFontTableIndex = StringUtils.safeNumber (finalCommand); } else if (eTag == MarkupAttr.MARKUP_CHARSET) { iFontCharset = StringUtils.safeNumber (finalCommand); } else if (pMkAttr.IsFontNameSpecifier (finalCommand)) { // \falt or \fname are followed by a face name if (c == pMkAttr.blockPrefix()) { // Skip any embedded Blocks nIndex = skipBlock (nIndex + 1, oStrRtfFontMap); } eState = FT_NAME; } else if (sCommand.toString().equals ("panose")) { nIndex = skipBlock (nIndex + 1, oStrRtfFontMap); eState = FT_NAME; } if (c == ' ') { // If our delimiter is a space, we probably // should start collecting a font name. // Watson #1179965 - Not necessarily, see note below. eState = FT_NAME; } sCommand.delete (0, sCommand.length()); } } else if (c != pMkAttr.commandPrefix()) { sCommand.append (c); // strip '\' character } break; // Accumulating font table name string. case FT_NAME: if (c == mcCommandPrefix) { // Watson #1179965 // If we encounter a command prefix when we think we're about to start // processing a font name, we need to switch state back to FT_COMMAND. // It's possible to end up in here when we encounter the first space // character in a font definition that looks like this: // \f42\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic); // Obviously, we don't want to parse \froman as the font name. eState = FT_COMMAND; } else if (pMkAttr.isDelimiter (c)) { if ((c != ' ') && (c != '(') && (c != ')') && (c != '_') && (cFontName.length() != 0)) { // Spaces are allowed in font names // Brackets can be in font names // Underscores are allowed. if (iFontTableIndex != ciINVALID_FONT) { moFontMap.remove (iFontTableIndex); // 1 entry per index // String sFontName (cFontName, CharConverter()); // TODO String sFontName = cFontName.toString(); moFontMap.put (iFontTableIndex, sFontName); moFontCharset.remove (iFontTableIndex); // 1 entry per index moFontCharset.put (iFontTableIndex, iFontCharset); iFontCharset = 0; iFontTableIndex = ciINVALID_FONT; } cFontName.delete (0, cFontName.length()); // Restart eState = FT_UNKNOWN; // No assumptions } else if (c == pMkAttr.blockPrefix()) { // Skip any embedded Blocks nIndex = skipBlock (nIndex + 1, oStrRtfFontMap); } else if (c != pMkAttr.commandPrefix() && c != '@') { // This cast is safe. We know RTF files are always // 7 or 8 bit characters cFontName.append (c); } } else if (c != '@') { cFontName.append (c); } break; // Unknown command: continue reading/discarding until a command is encountered case FT_UNKNOWN: if (c == mcCommandPrefix) { eState = FT_COMMAND; } break; } if (c == mcBlockSuffix) { // End of block sCommand.delete (0, sCommand.length()); iFontTableIndex = ciINVALID_FONT; iFontCharset = 0; } } } //----------------------------------------------------------------------------- // Function: ExtractFontTableData // // Description: // Extracts the text range consituting the RTF font table data. // // Parameters: // oStrSource - RTF source string. // oStrRtfFontMap - RTF font table data block (extracted from oStrSource). // // Returns: TRUE on success, FALSE otherwise. // //----------------------------------------------------------------------------- private String extractFontTableData (StringBuilder oStrSource) { String fontMap = null; boolean bValid = false; // False until proven TRUE... int nStartIndex = oStrSource.indexOf ("{\\fonttbl"); if (nStartIndex >= 0) { int iScopedBlockCount = 1; // Block open preceding '\fonttbl' command int nIndex = nStartIndex + 1; // Next char after opening '{' for (; (iScopedBlockCount > 0) && (nIndex < oStrSource.length()); ++nIndex) { // Determine the text range constituting the font table information char c = oStrSource.charAt (nIndex); if (c == mcBlockPrefix) { ++iScopedBlockCount; } else if (c == mcBlockSuffix) { --iScopedBlockCount; if (iScopedBlockCount == 0) { bValid = true; break; } } } if (bValid) { // Extract and remove the font table data string from the source fontMap = oStrSource.substring (nStartIndex, nIndex); oStrSource.delete (nStartIndex, nIndex); } } return fontMap; } private void buildColourTable (String oSource) { int nFoundAt = oSource.indexOf (gsColourtbl); if (nFoundAt < 0) { return; } int nFoundAt2 = oSource.indexOf ('}', nFoundAt); String sColourTable = oSource.substring (nFoundAt, nFoundAt2); int nPos = StringUtils.skipOver (sColourTable, gsSkip, gsColourtbl.length()); if (sColourTable.charAt (nPos) == ';') { // If the first entry in the colour table is blank, // begin the colour table with our default colour (black) moColourTable.add (new GFXColour()); } int nRed; int nGreen; int nBlue; int nOffset = 0; nFoundAt = sColourTable.indexOf (gsRed); while (nFoundAt >= 0) { nRed = getNumber (sColourTable, nFoundAt + gsRed.length()); nFoundAt = sColourTable.indexOf (gsGreen, nOffset); if (nFoundAt >= 0) { nGreen = getNumber (sColourTable, nFoundAt + gsGreen.length()); nFoundAt = sColourTable.indexOf (gsBlue, nOffset); if (nFoundAt >= 0) { nBlue = getNumber (sColourTable, nFoundAt + gsBlue.length()); moColourTable.add (new GFXColour (nRed, nGreen, nBlue, 255)); } } nOffset = nFoundAt + gsBlue.length(); nFoundAt = sColourTable.indexOf (gsRed, nOffset); } } private void buildStyleSheets (String sSource) { int nFoundAt = sSource.indexOf ("\\deff"); if (nFoundAt >= 0) { // First make sure we have the default font. It's important for knowing // how styles are defined. StringBuilder sNum = new StringBuilder(); nFoundAt += 5; while ((sSource.charAt (nFoundAt) >= '0') && (sSource.charAt (nFoundAt) <= '9')) { // Extract the numeric portion sNum.append(sSource.charAt (nFoundAt++)); } mnDefaultFont = StringUtils.safeNumber (sNum.toString()); } nFoundAt = sSource.indexOf (gsStyleSheet); if (nFoundAt < 0) { return; } mbCollectingStyle = true; mnStyleNum = 0; // Find the closing brace int nBraceLevel = 1; // position past the \stylesheet nFoundAt += gsStyleSheet.length(); int i = nFoundAt; for (; i < sSource.length(); i++) { if (sSource.charAt (i) == '{') { nBraceLevel++; } else if (sSource.charAt (i) == '}') { nBraceLevel--; } if (nBraceLevel == 0) { break; } } String sTable = sSource.substring (nFoundAt, i); setSourceText (sTable); super.translate(); setSourceText (sTable); mbCollectingStyle = false; reset(); } private int getNumber (String sStr, int nOffset) { int lValue = 0; while ((sStr.charAt (nOffset) >= '0') && (sStr.charAt (nOffset) <= '9')) { lValue = (lValue * 10) + (sStr.charAt (nOffset) - '0'); nOffset++; } return lValue; } private byte getHexValue (String sCommand) { return Byte.decode(sCommand).byteValue(); // TODO: can this throw? } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy