All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.xml.msofficexml.docx.wordprocessingml.HtmlToWmlConverter Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.xml.msofficexml.docx.wordprocessingml;

import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.hfg.css.*;
import com.hfg.exception.ProgrammingException;
import com.hfg.graphics.units.Pixels;
import com.hfg.graphics.units.Points;
import com.hfg.html.*;
import com.hfg.html.attribute.HTMLColor;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.Recursion;
import com.hfg.util.StringUtil;
import com.hfg.util.io.GZIP;
import com.hfg.xml.XMLNode;
import com.hfg.xml.XMLTag;
import com.hfg.xml.msofficexml.docx.Docx;
import com.hfg.xml.msofficexml.docx.wordprocessingml.style.WmlJustification;
import com.hfg.xml.msofficexml.docx.wordprocessingml.style.WmlParagraphStyle;
import com.hfg.xml.msofficexml.docx.wordprocessingml.style.WmlStyle;
import com.hfg.xml.msofficexml.docx.wordprocessingml.style.WmlTableCellBorder;
import com.hfg.xml.msofficexml.docx.wordprocessingml.style.WmlTextDirection;
import com.hfg.xml.msofficexml.docx.wordprocessingml.style.WmlVerticalJustification;


//------------------------------------------------------------------------------
/**
 * For converting HTML into WordprocessingML.
 *
 * @author J. Alex Taylor, hairyfatguy.com
 */
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------

public class HtmlToWmlConverter
{
   private CSS mCSS;

   private static final Pattern PTS_SIZE_PATTERN = Pattern.compile("(\\d+)(pt)?");
   private static final Pattern PX_SIZE_PATTERN  = Pattern.compile("(\\d+)(px)?");

   //##########################################################################
   // PUBLIC METHODS
   //##########################################################################

   //---------------------------------------------------------------------------
   /**
    Specify associated CSS to use during conversion.

    @param inValue CSS object containing a collection of CSSRules
    */
   public HtmlToWmlConverter setCSS(CSS inValue)
   {
      mCSS = inValue;
      return this;
   }

   //---------------------------------------------------------------------------
   /**
    Warning: this method is still a brittle work-in-progress.
    */
   public synchronized List convert(HTMLTag inHTMLTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.BR))
      {
         wmlTags.add(new XMLTag(WmlXML.BR));
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.COL))
      {
         // TODO: Ignore for now
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.COLGROUP))
      {
         // TODO: Ignore for now
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.DIV))
      {
         wmlTags.addAll(parseDivTag((Div) inHTMLTag, inDocx));
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.A))
      {
         wmlTags.addAll(parseLinkTag((Link) inHTMLTag, inDocx));
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.NOBR))
      {
         wmlTags.addAll(parseNobrTag((Nobr) inHTMLTag, inDocx));
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.P))
      {
         wmlTags.addAll(parsePTag((P) inHTMLTag, inDocx));
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.PRE))
      {
         wmlTags.addAll(parsePreTag((Pre) inHTMLTag, inDocx));
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.SPAN))
      {
         wmlTags.addAll(parseSpanTag((Span) inHTMLTag, inDocx));
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.TABLE))
      {
         wmlTags.addAll(parseTableTag((Table) inHTMLTag, inDocx));
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.TBODY))
      {
         wmlTags.addAll(parseTBodyTag((TBody) inHTMLTag, inDocx));
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.TR))
      {
         wmlTags.addAll(parseTrTag((Tr) inHTMLTag, inDocx));
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.TH))
      {
         wmlTags.addAll(parseThTag((Th) inHTMLTag, inDocx));
      }
      else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.TD))
      {
         wmlTags.addAll(parseTdTag((Td) inHTMLTag, inDocx));
      }
      else
      {
         throw new ProgrammingException("No tag mapping for " + inHTMLTag.getTagName());
      }

      // Replace any character entities (like """) with the corresponding numeric entities
      if (CollectionUtil.hasValues(wmlTags))
      {
         for (XMLTag xmlTag : wmlTags)
         {
            xmlTag.replaceCharacterEntities();
         }
      }

      return wmlTags;
   }

   //##########################################################################
   // PRIVATE METHODS
   //##########################################################################

   //---------------------------------------------------------------------------
   private List parseTableTag(Table inTableTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      WmlTable table = new WmlTable(inDocx);
      wmlTags.add(table);

      // TODO: Parse table style

      if (inTableTag.hasContentOrSubtags())
      {
         for (XMLNode tableSubtag : inTableTag.getXMLNodeSubtags())
         {
            List subtags = convert((HTMLTag) tableSubtag, inDocx);
            if (CollectionUtil.hasValues(subtags))
            {
               for (XMLTag subtag: subtags)
               {
                  table.addSubtag(subtag);
               }
            }
         }
      }

      return wmlTags;
   }

   //---------------------------------------------------------------------------
   // The TBody tag itself doesn't map to anything but any table data it contains should get parsed.
   private List parseTBodyTag(TBody inTBodyTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      if (inTBodyTag.hasContentOrSubtags())
      {
         for (XMLNode tbodySubtag : inTBodyTag.getXMLNodeSubtags())
         {
            List subtags = convert((HTMLTag) tbodySubtag, inDocx);
            if (CollectionUtil.hasValues(subtags))
            {
               wmlTags.addAll(subtags);
            }
         }
      }

      return wmlTags;
   }

   //---------------------------------------------------------------------------
   private List parseTrTag(Tr inTrTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      List cssDeclarations = getCSSDeclarations(inTrTag);
      if (CollectionUtil.hasValues(cssDeclarations))
      {
         for (CSSDeclaration cssDeclaration : cssDeclarations)
         {
            if (cssDeclaration.getProperty() == CSSProperty.display
                  && cssDeclaration.getValue().equals("none"))
            {
               return wmlTags;
            }
         }
      }

      WmlTableRow tr = new WmlTableRow(inDocx);
      wmlTags.add(tr);

      if (CollectionUtil.hasValues(cssDeclarations))
      {
         convertCSSToTableRowProperties(cssDeclarations, tr.getRowProperties());
      }

      WmlTableCellProperties tcPr = null;

      if (CollectionUtil.hasValues(cssDeclarations))
      {
         tcPr = new WmlTableCellProperties(inDocx);
         convertCSSToTableCellProperties(cssDeclarations, tcPr);
      }

      if (inTrTag.hasContentOrSubtags())
      {
         for (XMLNode trSubtag : inTrTag.getXMLNodeSubtags())
         {
            List subtags = convert((HTMLTag) trSubtag, inDocx);
            if (CollectionUtil.hasValues(subtags))
            {
               for (XMLTag subtag: subtags)
               {
                  tr.addSubtag(subtag);
               }
            }
         }
      }

      // Copy the table cell properties down to any cells since OfficeOpenXML is brain damaged
      if (tcPr != null)
      {
         for (XMLTag wmlTag : wmlTags)
         {
            List tableCells = wmlTag.getSubtagsByName(WmlXML.TABLE_CELL.getLocalName(), Recursion.ON);
            if (CollectionUtil.hasValues(tableCells))
            {
               for (WmlTableCell tc : (List) (Object) tableCells)
               {
                  tcPr.add(tc.getCellProperties());
                  tc.setCellProperties(tcPr);
               }
            }
         }
      }

      return wmlTags;
   }

   //---------------------------------------------------------------------------
   private List parseTdTag(Td inTdTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      WmlTableCell td = new WmlTableCell(inDocx);
      wmlTags.add(td);

      boolean containsCss = false;

      WmlTextRunProperties textRunProperties = null;

      List cssDeclarations = getCSSDeclarations(inTdTag);
      if (CollectionUtil.hasValues(cssDeclarations))
      {
         containsCss = true;
         convertCSSToTableCellProperties(cssDeclarations, td.getCellProperties());

         // Remove bgColor and border styles before setting text run properties
         for (int i = 0; i < cssDeclarations.size(); i++)
         {
            CSSDeclaration cssDeclaration = cssDeclarations.get(i);

            if (cssDeclaration.getProperty().equals(CSSProperty.background_color)
                || cssDeclaration.getProperty().equals(CSSProperty.border))
            {
               cssDeclarations.remove(i--);
            }
         }

         textRunProperties = new WmlTextRunProperties(inDocx);
         convertCSSToTextRunProperties(cssDeclarations, textRunProperties);
      }


      String colSpanString = inTdTag.getColSpan();
      if (StringUtil.isSet(colSpanString))
      {
         td.getCellProperties().setGridSpan(Integer.parseInt(colSpanString));
      }

      if (inTdTag.hasContentOrSubtags())
      {
         WmlParagraph p = td.getParagraph();

         for (Object content : inTdTag.getContentPlusSubtagList())
         {
            if (content instanceof String)
            {
               p.addTextRun((String) content);
            }
            else if (content instanceof byte[]) // Compressed content is stored as a byte[]
            {
               p.addTextRun(GZIP.uncompressToString((byte[]) content));
            }
            else
            {
               List subtags = convert((HTMLTag) content, inDocx);
               if (CollectionUtil.hasValues(subtags))
               {
                  for (XMLTag subtag: subtags)
                  {
                     if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName()))
                     {
                        p.addSubtag(subtag);
                     }
                     else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName()))
                     {
                        WmlTextRun textRun = p.addTextRun();
                        textRun.addSubtag(subtag);
                     }
                     else
                     {
                        p.addSubtag(subtag);
                     }
                  }
               }
            }
         }

         // Copy the text run properties on the cell down to any text runs since OfficeOpenXML is brain damaged
         if (containsCss)
         {
            for (XMLTag wmlTag : wmlTags)
            {
               List paragraphs = wmlTag.getSubtagsByName(WmlXML.P.getLocalName(), Recursion.ON);
               if (CollectionUtil.hasValues(paragraphs))
               {
                  for (WmlParagraph paragraph : (List) (Object) paragraphs)
                  {
                     convertCSSToParagraphProperties(cssDeclarations, paragraph.getProperties());
                  }
               }

               List textRuns = wmlTag.getSubtagsByName(WmlXML.R.getLocalName(), Recursion.ON);
               if (CollectionUtil.hasValues(textRuns))
               {
                  for (WmlTextRun textRun : (List) (Object) textRuns)
                  {
                     WmlTextRunProperties parentProperties = (WmlTextRunProperties) textRunProperties.clone();
                     textRun.setProperties(parentProperties.combine(textRun.getProperties()));
                  }
               }
            }
         }
      }

      return wmlTags;
   }

   //---------------------------------------------------------------------------
   private List parseThTag(Th inThTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      WmlTableCell th = new WmlTableCell(inDocx);
      wmlTags.add(th);

      WmlTextRunProperties textRunProperties = null;
      boolean containsCss = false;

      List cssDeclarations = getCSSDeclarations(inThTag);
      if (CollectionUtil.hasValues(cssDeclarations))
      {
         containsCss = true;
         convertCSSToTableCellProperties(cssDeclarations, th.getCellProperties());

         // Remove bgColor and border styles before setting text run properties
         for (int i = 0; i < cssDeclarations.size(); i++)
         {
            CSSDeclaration cssDeclaration = cssDeclarations.get(i);
            if (cssDeclaration.getProperty().equals(CSSProperty.background_color)
                  || cssDeclaration.getProperty().equals(CSSProperty.border))
            {
               cssDeclarations.remove(i--);
            }
         }

         textRunProperties = new WmlTextRunProperties(inDocx);
         convertCSSToTextRunProperties(cssDeclarations, textRunProperties);
      }


      if (inThTag.hasContentOrSubtags())
      {
         WmlParagraph p = th.getParagraph();

         for (Object content : inThTag.getContentPlusSubtagList())
         {
            if (content instanceof String)
            {
               p.addTextRun((String) content);
            }
            else if (content instanceof byte[]) // Compressed content is stored as a byte[]
            {
               p.addTextRun(GZIP.uncompressToString((byte[]) content));
            }
            else
            {
               List subtags = convert((HTMLTag) content, inDocx);
               if (CollectionUtil.hasValues(subtags))
               {
                  for (XMLTag subtag: subtags)
                  {
                     if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName()))
                     {
                        p.addSubtag(subtag);
                     }
                     else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName()))
                     {
                        WmlTextRun textRun = p.addTextRun();
                        textRun.addSubtag(subtag);
                     }
                     else
                     {
                        p.addSubtag(subtag);
                     }
                  }
               }
            }
         }
      }

      // Copy the text run properties on the cell down to any text runs since OfficeOpenXML is brain damaged
      if (containsCss)
      {
         for (XMLTag wmlTag : wmlTags)
         {
            List paragraphs = wmlTag.getSubtagsByName(WmlXML.P.getLocalName(), Recursion.ON);
            if (CollectionUtil.hasValues(paragraphs))
            {
               for (WmlParagraph paragraph : (List) (Object) paragraphs)
               {
                  convertCSSToParagraphProperties(cssDeclarations, paragraph.getProperties());
               }
            }

            List textRuns = wmlTag.getSubtagsByName(WmlXML.R.getLocalName(), Recursion.ON);
            if (CollectionUtil.hasValues(textRuns))
            {
               for (WmlTextRun textRun : (List) (Object) textRuns)
               {
                  WmlTextRunProperties parentProperties = (WmlTextRunProperties) textRunProperties.clone();
                  textRun.setProperties(parentProperties.combine(textRun.getProperties()));
               }
            }
         }
      }

      return wmlTags;
   }


   //---------------------------------------------------------------------------
   private List parsePreTag(Pre inPreTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      WmlParagraph p = new WmlParagraph(inDocx);
      wmlTags.add(p);

      WmlStyle preStyle = inDocx.getStylesPart().getStyle("pre");
      if (null == preStyle)
      {
         preStyle = new WmlParagraphStyle("pre", inDocx);
         preStyle.getTextRunProperties().setFont("Courier");
         inDocx.getStylesPart().addStyle(preStyle);
      }
      p.getProperties().setStyle(preStyle.getId());


      List cssDeclarations = getCSSDeclarations(inPreTag);
      if (CollectionUtil.hasValues(cssDeclarations))
      {
         convertCSSToTextRunProperties(cssDeclarations, p.getProperties().getRunProperties());
      }

      if (inPreTag.hasContentOrSubtags())
      {
         for (Object content : inPreTag.getContentPlusSubtagList())
         {
            if (content instanceof String)
            {
               p.addTextRun((String) content);
            }
            else if (content instanceof byte[]) // Compressed content is stored as a byte[]
            {
               p.addTextRun(GZIP.uncompressToString((byte[]) content));
            }
            else
            {
               List subtags = convert((HTMLTag) content, inDocx);
               if (CollectionUtil.hasValues(subtags))
               {
                  for (XMLTag subtag: subtags)
                  {
                     if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName()))
                     {
                        p.addSubtag(subtag);
                     }
                     else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName()))
                     {
                        WmlTextRun textRun = p.addTextRun();
                        textRun.addSubtag(subtag);
                     }
                     else
                     {
                        p.addSubtag(subtag);
                     }
                  }
               }
            }
         }

         // Make sure all of the text runs use the pre style
         for (XMLNode xmlTag : p.getXMLNodeSubtags())
         {
            if (xmlTag instanceof WmlTextRun)
            {
               ((WmlTextRun)xmlTag).getProperties().setStyle(preStyle.getId());
            }
         }
      }

      return wmlTags;
   }

   //---------------------------------------------------------------------------
   private List parseDivTag(Div inDivTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      List cssDeclarations = getCSSDeclarations(inDivTag);
      if (CollectionUtil.hasValues(cssDeclarations))
      {
         for (CSSDeclaration cssDeclaration : cssDeclarations)
         {
            if (cssDeclaration.getProperty() == CSSProperty.display
                  && cssDeclaration.getValue().equals("none"))
            {
               return wmlTags;
            }
         }
      }

      WmlParagraph p = new WmlParagraph(inDocx);
      wmlTags.add(p);

      if (CollectionUtil.hasValues(cssDeclarations))
      {
         convertCSSToTextRunProperties(cssDeclarations, p.getProperties().getRunProperties());
      }

      if (inDivTag.hasContentOrSubtags())
      {
         boolean containerClosed = false;
         for (Object content : inDivTag.getContentPlusSubtagList())
         {
            if (content instanceof String)
            {
               if (containerClosed)
               {
                  p = new WmlParagraph(inDocx);
                  wmlTags.add(p);
                  containerClosed = false;
               }

               p.addTextRun((String) content);
            }
            else if (content instanceof byte[]) // Compressed content is stored as a byte[]
            {
               if (containerClosed)
               {
                  p = new WmlParagraph(inDocx);
                  wmlTags.add(p);
                  containerClosed = false;
               }

               p.addTextRun(GZIP.uncompressToString((byte[]) content));
            }
            else
            {
               List subtags = convert((HTMLTag) content, inDocx);
               if (CollectionUtil.hasValues(subtags))
               {
                  for (XMLTag subtag: subtags)
                  {
                     if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName()))
                     {
                        if (containerClosed)
                        {
                           p = new WmlParagraph(inDocx);
                           wmlTags.add(p);
                           containerClosed = false;
                        }

                        p.addSubtag(subtag);
                     }
                     else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName()))
                     {
                        if (containerClosed)
                        {
                           p = new WmlParagraph(inDocx);
                           wmlTags.add(p);
                           containerClosed = false;
                        }

                        WmlTextRun textRun = p.addTextRun();
                        textRun.addSubtag(subtag);
                     }
                     else if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName()))
                     {
                        wmlTags.add(subtag);
                        p = (WmlParagraph) subtag;
                     }
                     else
                     {
                        wmlTags.add(subtag);

                        containerClosed = true;
                     }
                  }
               }
            }
         }
      }

      return wmlTags;
   }


   //---------------------------------------------------------------------------
   private List parsePTag(P inPTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      WmlParagraph p = new WmlParagraph(inDocx);
      wmlTags.add(p);


      List cssDeclarations = getCSSDeclarations(inPTag);
      if (CollectionUtil.hasValues(cssDeclarations))
      {
         convertCSSToTextRunProperties(cssDeclarations, p.getProperties().getRunProperties());
      }

      if (inPTag.hasContentOrSubtags())
      {
         boolean containerClosed = false;
         for (Object content : inPTag.getContentPlusSubtagList())
         {
            if (content instanceof String)
            {
               if (containerClosed)
               {
                  p = new WmlParagraph(inDocx);
                  wmlTags.add(p);
                  containerClosed = false;
               }

               p.addTextRun((String) content);
            }
            else if (content instanceof byte[]) // Compressed content is stored as a byte[]
            {
               if (containerClosed)
               {
                  p = new WmlParagraph(inDocx);
                  wmlTags.add(p);
                  containerClosed = false;
               }

               p.addTextRun(GZIP.uncompressToString((byte[]) content));
            }
            else
            {
               List subtags = convert((HTMLTag) content, inDocx);
               if (CollectionUtil.hasValues(subtags))
               {
                  for (XMLTag subtag: subtags)
                  {
                     if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName()))
                     {
                        if (containerClosed)
                        {
                           p = new WmlParagraph(inDocx);
                           wmlTags.add(p);
                           containerClosed = false;
                        }

                        p.addSubtag(subtag);
                     }
                     else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName()))
                     {
                        if (containerClosed)
                        {
                           p = new WmlParagraph(inDocx);
                           wmlTags.add(p);
                           containerClosed = false;
                        }

                        WmlTextRun textRun = p.addTextRun();
                        textRun.addSubtag(subtag);
                     }
                     else if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName()))
                     {
                        wmlTags.add(subtag);
                        p = (WmlParagraph) subtag;
                     }
                     else
                     {
                        wmlTags.add(subtag);
                        containerClosed = true;
                     }
                  }
               }
            }
         }
      }

      return wmlTags;
   }


   //---------------------------------------------------------------------------
   private List parseSpanTag(Span inSpanTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      List cssDeclarations = getCSSDeclarations(inSpanTag);
      if (CollectionUtil.hasValues(cssDeclarations))
      {
         for (CSSDeclaration cssDeclaration : cssDeclarations)
         {
            if (cssDeclaration.getProperty() == CSSProperty.display
                  && cssDeclaration.getValue().equals("none"))
            {
               return wmlTags;
            }
         }
      }


      WmlTextRun textRun = new WmlTextRun(inDocx);
      wmlTags.add(textRun);

      if (CollectionUtil.hasValues(cssDeclarations))
      {
         convertCSSToTextRunProperties(cssDeclarations, textRun.getProperties());
      }

      if (inSpanTag.hasContentOrSubtags())
      {
         boolean containerClosed = false;
         for (Object content : inSpanTag.getContentPlusSubtagList())
         {
            if (content instanceof String)
            {
               if (containerClosed)
               {
                  textRun = new WmlTextRun(inDocx);
                  wmlTags.add(textRun);
               }

               textRun.addText((String)content);
            }
            else if (content instanceof byte[]) // Compressed content is stored as a byte[]
            {
               if (containerClosed)
               {
                  textRun = new WmlTextRun(inDocx);
               }

               textRun.addText(GZIP.uncompressToString((byte[]) content));
            }
            else
            {
               List subtags = convert((HTMLTag) content, inDocx);
               if (CollectionUtil.hasValues(subtags))
               {
                  for (XMLTag subtag: subtags)
                  {
                     if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName()))
                     {
                        // Word won't allow nesting of text runs
                        containerClosed = true;
                        WmlTextRun runSubtag = (WmlTextRun) subtag;
                        if (textRun.hasProperties())
                        {
                           WmlTextRunProperties parentProperties = (WmlTextRunProperties) textRun.getProperties().clone();
                           runSubtag.setProperties(parentProperties.combine(runSubtag.getProperties()));
                        }
                        wmlTags.add(runSubtag);
                     }
                     else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName()))
                     {
                        if (containerClosed
                              || textRun.hasContentOrSubtags())
                        {
                           containerClosed = true;
                           textRun = new WmlTextRun(inDocx);
                           wmlTags.add(textRun);
                        }
                        textRun.addSubtag(subtag);
                     }
                     else if (containerClosed)
                     {
                        textRun = new WmlTextRun(inDocx);
                        wmlTags.add(textRun);
                        textRun.addSubtag(subtag);
                     }
                     else
                     {
                        textRun.addSubtag(subtag);
                     }
                  }
               }
            }
         }
      }

      return wmlTags;
   }

   //---------------------------------------------------------------------------
   // The Link (A) tag itself doesn't map to anything but any data it contains should get parsed.
   private List parseLinkTag(Link inLinkTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      try
      {
         if (inLinkTag.hasContentOrSubtags())
         {
            WmlHyperlink link = null;
            boolean containerClosed = true;
            for (Object content : inLinkTag.getContentPlusSubtagList())
            {
               if (content instanceof String)
               {
                  if (containerClosed)
                  {
                     link = new WmlHyperlink(new URL(inLinkTag.getURL()), inDocx);
                     wmlTags.add(link);
                  }

                  link.addTextRun((String) content);
               }
               else if (content instanceof byte[]) // Compressed content is stored as a byte[]
               {
                  if (containerClosed)
                  {
                     link = new WmlHyperlink(new URL(inLinkTag.getURL()), inDocx);
                     wmlTags.add(link);
                  }

                  link.addTextRun(GZIP.uncompressToString((byte[]) content));
               }
               else
               {
                  List subtags = convert((HTMLTag) content, inDocx);
                  if (CollectionUtil.hasValues(subtags))
                  {
                     for (XMLTag subtag: subtags)
                     {
                        if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName()))
                        {
                           if (containerClosed)
                           {
                              link = new WmlHyperlink(new URL(inLinkTag.getURL()), inDocx);
                              wmlTags.add(link);
                           }

                           link.addSubtag(subtag);
                        }
                        else if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName()))
                        {
                           if (containerClosed)
                           {
                              link = new WmlHyperlink(new URL(inLinkTag.getURL()), inDocx);
                              wmlTags.add(link);
                           }

                           for (XMLNode runTag : subtag.getSubtagsByName(WmlXML.R))
                           {
                              link.addSubtag(runTag);
                           }
                        }
                        else
                        {
                           if (containerClosed)
                           {
                              link = new WmlHyperlink(new URL(inLinkTag.getURL()), inDocx);
                              wmlTags.add(link);
                           }

                           link.addSubtag(subtag);
                        }
                     }
                  }
               }
            }
         }
      }
      catch (Exception e)
      {
         throw new RuntimeException(e);
      }

      return wmlTags;
   }

   //---------------------------------------------------------------------------
   // The Nobr tag itself doesn't map to anything but any data it contains should get parsed.
   private List parseNobrTag(Nobr inNobrTag, Docx inDocx)
   {
      List wmlTags =  new ArrayList(5);

      if (inNobrTag.hasContentOrSubtags())
      {
         WmlTextRun textRun = null;
         boolean containerClosed = true;
         for (Object content : inNobrTag.getContentPlusSubtagList())
         {
            if (content instanceof String)
            {
               if (containerClosed)
               {
                  textRun = new WmlTextRun(inDocx);
                  wmlTags.add(textRun);
               }

               textRun.addText((String)content);
            }
            else if (content instanceof byte[]) // Compressed content is stored as a byte[]
            {
               if (containerClosed)
               {
                  textRun = new WmlTextRun(inDocx);
               }

               textRun.addText(GZIP.uncompressToString((byte[]) content));
            }
            else
            {
               List subtags = convert((HTMLTag) content, inDocx);
               if (CollectionUtil.hasValues(subtags))
               {
                  for (XMLTag subtag: subtags)
                  {
                     if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName()))
                     {
                        // Word won't allow nesting of text runs
                        containerClosed = true;
                        WmlTextRun runSubtag = (WmlTextRun) subtag;
                        if (textRun.hasProperties())
                        {
                           WmlTextRunProperties parentProperties = (WmlTextRunProperties) textRun.getProperties().clone();
                           runSubtag.setProperties(parentProperties.combine(runSubtag.getProperties()));
                        }
                        wmlTags.add(runSubtag);
                     }
                     else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName()))
                     {
                        if (containerClosed
                              || textRun.hasContentOrSubtags())
                        {
                           containerClosed = true;
                           textRun = new WmlTextRun(inDocx);
                           wmlTags.add(textRun);
                        }
                        textRun.addSubtag(subtag);
                     }
                     else if (containerClosed)
                     {
                        textRun = new WmlTextRun(inDocx);
                        wmlTags.add(textRun);
                        textRun.addSubtag(subtag);
                     }
                     else
                     {
                        textRun.addSubtag(subtag);
                     }
                  }
               }
            }
         }
      }

      return wmlTags;
   }

   //---------------------------------------------------------------------------
   // Returns the in-line and CSS-defined styling for the specified tag.
   private List getCSSDeclarations(HTMLTag inHTMLTag)
   {
      List cssDeclarations = new ArrayList(20);
      if (mCSS != null)
      {
         List declarations = mCSS.getCSSDeclarationsForHTMLTag(inHTMLTag, CSSMediaType.print);
         if (CollectionUtil.hasValues(declarations))
         {
            cssDeclarations.addAll(declarations);
         }
      }

      String styleString = inHTMLTag.getStyle();
      if (StringUtil.isSet(styleString))
      {
         List styleCSSDeclarations = parseCssStyleString(styleString);
         if (CollectionUtil.hasValues(styleCSSDeclarations))
         {
            cssDeclarations.addAll(styleCSSDeclarations);
         }
      }

      return cssDeclarations;
   }

   //---------------------------------------------------------------------------
   private void convertCSSToParagraphProperties(List inCSSDeclarations, WmlParagraphProperties inParagraphProperties)
   {
      for (CSSDeclaration cssDeclaration : inCSSDeclarations)
      {
         if (cssDeclaration.getProperty() == CSSProperty.text_align)
         {
            WmlJustification justification = null;
            if (cssDeclaration.getValue().equalsIgnoreCase("center"))
            {
               justification = WmlJustification.center;
            }
            else if (cssDeclaration.getValue().equalsIgnoreCase("left"))
            {
               justification = WmlJustification.left;
            }
            else if (cssDeclaration.getValue().equalsIgnoreCase("right"))
            {
               justification = WmlJustification.right;
            }

            if (justification != null)
            {
               inParagraphProperties.setJustification(justification);
            }
         }
      }
   }

   //---------------------------------------------------------------------------
   private void convertCSSToTextRunProperties(List inCSSDeclarations, WmlTextRunProperties inTextRunProperties)
   {
      for (CSSDeclaration cssDeclaration : inCSSDeclarations)
      {
         if (cssDeclaration.getProperty() == CSSProperty.color)
         {
            HTMLColor color = HTMLColor.valueOf(cssDeclaration.getValue());
            if (color != null)
            {
               inTextRunProperties.setColor(color);
            }
         }
         else if (cssDeclaration.getProperty() == CSSProperty.background_color)
         {
            HTMLColor color = HTMLColor.valueOf(cssDeclaration.getValue());
            if (color != null)
            {
               inTextRunProperties.getShading().setFill(color);
            }
         }
         else if (cssDeclaration.getProperty() == CSSProperty.font_weight
               && cssDeclaration.getValue().equalsIgnoreCase("bold"))
         {
            inTextRunProperties.setBold();
         }
         else if (cssDeclaration.getProperty() == CSSProperty.font_style
               && cssDeclaration.getValue().equalsIgnoreCase("italic"))
         {
            inTextRunProperties.setItalics();
         }
         else if (cssDeclaration.getProperty() == CSSProperty.font_size
               && cssDeclaration.getValue().endsWith("pt"))
         {
            // Don't know what to do with 'em' sizes
            Matcher m = PTS_SIZE_PATTERN.matcher(cssDeclaration.getValue());
            if (m.matches())
            {
               inTextRunProperties.setSize(new Points(Integer.parseInt(m.group(1))));
            }
         }
         else if (cssDeclaration.getProperty() == CSSProperty.border)
         {
            inTextRunProperties.getBorder(cssDeclaration);
         }
      }
   }

   //---------------------------------------------------------------------------
   //TODO
   private void convertCSSToTableProperties(List inCSSDeclarations, WmlTableProperties inTableProperties)
   {
      for (CSSDeclaration cssDeclaration : inCSSDeclarations)
      {
         if (cssDeclaration.getProperty() == CSSProperty.margin)
         {
            inTableProperties.getTableCellMargins().addMargins(cssDeclaration);
         }
      }
   }


   //---------------------------------------------------------------------------
   private void convertCSSToTableCellProperties(List inCSSDeclarations, WmlTableCellProperties inTableCellProperties)
   {
      for (CSSDeclaration cssDeclaration : inCSSDeclarations)
      {
         if (cssDeclaration.getProperty() == CSSProperty.background_color)
         {
            HTMLColor color = HTMLColor.valueOf(cssDeclaration.getValue());
            if (color != null)
            {
               inTableCellProperties.getShading().setFill(color);
            }
         }
         else if (cssDeclaration.getProperty() == CSSProperty.vertical_align)
         {
            WmlVerticalJustification justification = null;
            if (cssDeclaration.getValue().equalsIgnoreCase("middle"))
            {
               justification = WmlVerticalJustification.center;
            }
            else if (cssDeclaration.getValue().equalsIgnoreCase("top"))
            {
               justification = WmlVerticalJustification.top;
            }
            else if (cssDeclaration.getValue().equalsIgnoreCase("bottom"))
            {
               justification = WmlVerticalJustification.bottom;
            }

            if (justification != null)
            {
               inTableCellProperties.setVerticalJustification(justification);
            }
         }
         else if ((cssDeclaration.getProperty() == CSSProperty.border
                   || cssDeclaration.getProperty() == CSSProperty.border_top
                   || cssDeclaration.getProperty() == CSSProperty.border_bottom
                   || cssDeclaration.getProperty() == CSSProperty.border_left
                   || cssDeclaration.getProperty() == CSSProperty.border_right)
                  && ! cssDeclaration.getValue().equalsIgnoreCase("none"))
         {
            inTableCellProperties.getBorders().addBorder(new WmlTableCellBorder(cssDeclaration));
         }
         else if (cssDeclaration.getProperty() == CSSProperty.width
                  && cssDeclaration.getValue().endsWith("px"))
         {
            // Don't know what to do with 'em' sizes
            Matcher m = PX_SIZE_PATTERN.matcher(cssDeclaration.getValue());
            if (m.matches())
            {
               inTableCellProperties.setWidth(new Pixels(Integer.parseInt(m.group(1))));
            }
         }
         else if (cssDeclaration.getProperty() == CSSProperty.transform)
         {
            if (cssDeclaration.getValue().equals("rotate(-90deg)"))
            {
               inTableCellProperties.setTextDirection(WmlTextDirection.btLr);
            }
         }
      }
   }

   //---------------------------------------------------------------------------
   private void convertCSSToTableRowProperties(List inCSSDeclarations, WmlTableRowProperties inTableRowProperties)
   {
      for (CSSDeclaration cssDeclaration : inCSSDeclarations)
      {
         if (cssDeclaration.getProperty() == CSSProperty.height
             && cssDeclaration.getValue().endsWith("px"))
         {
            // Don't know what to do with 'em' sizes
            Matcher m = PX_SIZE_PATTERN.matcher(cssDeclaration.getValue());
            if (m.matches())
            {
               inTableRowProperties.setExactHeight(new Pixels(Integer.parseInt(m.group(1))));
            }
         }
      }
   }

   //---------------------------------------------------------------------------
   private List parseCssStyleString(String inCssStyleString)
   {
      List cssDeclarations = new ArrayList(20);

      String[] pieces = inCssStyleString.split(";");
      for (String piece : pieces)
      {
         int colonIdx = piece.indexOf(":");
         CSSProperty property  = CSSProperty.valueOf(piece.substring(0, colonIdx).trim());
         if (property != null)
         {
            String value = piece.substring(colonIdx + 1).trim();
            cssDeclarations.add(new CSSDeclaration(property, value));
         }
      }

      return cssDeclarations;
   }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy