All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.xml.XMLBasedDoc Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.xml;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.hfg.exception.ProgrammingException;
import com.hfg.util.StringUtil;
import com.hfg.xml.parser.XMLTagReader;


//------------------------------------------------------------------------------
/**
 Abstract base XML / HTML document.

 @author J. Alex Taylor, hairyfatguy.com
 */
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------

public abstract class XMLBasedDoc
{
   private XMLNode mRootNode;
   private String  mName;
   private XMLSpec mSpec       = DEFAULT_XMLSPEC;
   private Charset mEncoding   = DEFAULT_CHARSET;
   private boolean mByteOrderMarkPresent;
   private boolean mStandalone = true;
   private Doctype mDoctype;
   private List mTopLevelComments;
   // Was the document constructed from objects or read in from a pre-existing file?
   private boolean mConstructedContent = true;

   private static final XMLSpec DEFAULT_XMLSPEC = XMLSpec.v1_0;
   private static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");

   private static final Pattern XML_HEADER_PATTERN = Pattern.compile("^\\s*<\\?xml .+encoding=[\\'\"](\\S+)[\\'\"]", Pattern.CASE_INSENSITIVE);

   protected static final String  NL = System.getProperty("line.separator");

   //###########################################################################
   // CONSTRUCTORS
   //###########################################################################

   //---------------------------------------------------------------------------
   public XMLBasedDoc()
   {

   }

   //---------------------------------------------------------------------------
   public XMLBasedDoc(XMLNode inRootNode)
   {
      setRootNode(inRootNode);
   }

   //---------------------------------------------------------------------------
   /**
    The preferred way to read XML from a file.
    @param inFile The XML file to read.
    */
   public XMLBasedDoc(File inFile)
      throws XMLException, IOException
   {
      if (inFile != null
          && inFile.exists())
      {
         try
         {
            mEncoding = determineEncoding(inFile);

            setName(inFile.getName());

            // Now read in the XML.
            BufferedReader reader = null;
            try
            {
               reader = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), mEncoding));

               if (mByteOrderMarkPresent)
               {
                  reader.read(); // Skip the byte order mark
               }

//               mRootNode = new XMLTag(reader);
               XMLTagReader tagReader = getTagReader();
               tagReader.parse(reader);
               setRootNode(tagReader.getRootNode());
               setDoctype(tagReader.getDoctype());

               mConstructedContent = false;
            }
            finally
            {
               if (reader != null) reader.close();
            }
         }
         catch (XMLException e)
         {
            throw new XMLException("The file " + StringUtil.singleQuote(inFile.getPath()) + " doesn't appear to be in proper XML format!", e);
         }
         catch (IOException e)
         {
            throw new IOException("Problem encountered while reading file " + StringUtil.singleQuote(inFile.getPath()) + "!", e);
         }
      }
   }

   //---------------------------------------------------------------------------
   public XMLBasedDoc(BufferedReader inReader)
   {
      try
      {
         mEncoding = determineEncoding(inReader);

         // Now read in the XML.
         try
         {
            XMLTagReader tagReader = getTagReader();
            tagReader.parse(inReader);
            setRootNode(tagReader.getRootNode());
            setDoctype(tagReader.getDoctype());
            mTopLevelComments = tagReader.getTopLevelComments();

            mConstructedContent = false;
         }
         finally
         {
            if (inReader != null)
            {
               inReader.close();
            }
         }
      }
      catch (IOException e)
      {
         throw new XMLException(e);
      }
   }

   //---------------------------------------------------------------------------
   /**
    The preferred way to read XML from a stream.
    */
   public XMLBasedDoc(BufferedInputStream inStream)
   {
      this(new BufferedReader(new InputStreamReader(inStream)));
   }

   //###########################################################################
   // PUBLIC METHODS
   //###########################################################################


   //---------------------------------------------------------------------------
   public XMLBasedDoc setName(String inValue)
   {
      mName = inValue;
      return this;
   }


   //---------------------------------------------------------------------------
   public String name()
   {
      return mName;
   }

   //---------------------------------------------------------------------------
   @Override
   public XMLBasedDoc clone()
   {
      XMLBasedDoc cloneObj;
      try
      {
         cloneObj = (XMLBasedDoc) super.clone();
      }
      catch (CloneNotSupportedException e)
      {
         throw new ProgrammingException(e);
      }

      if (mRootNode != null)
      {
         cloneObj.mRootNode = (XMLNode) mRootNode.clone();
      }

      return cloneObj;
   }

   //---------------------------------------------------------------------------
   public XMLBasedDoc setSpec(XMLSpec inValue)
   {
      mSpec = inValue;
      return this;
   }

   //---------------------------------------------------------------------------
   public XMLBasedDoc setEncoding(Charset inValue)
   {
      mEncoding = inValue;
      return this;
   }

   //---------------------------------------------------------------------------
   public Charset getEncoding()
   {
      return mEncoding;
   }

   //---------------------------------------------------------------------------
   public XMLBasedDoc setIsStandalone(boolean inValue)
   {
      mStandalone = inValue;
      return this;
   }

   //--------------------------------------------------------------------------
   public XMLBasedDoc setDoctype(Doctype inValue)
   {
      mDoctype = inValue;
      return this;
   }

   //--------------------------------------------------------------------------
   public Doctype getDoctype()
   {
      return mDoctype;
   }

   //---------------------------------------------------------------------------
   public List getTopLevelComments()
   {
      return mTopLevelComments;
   }

   //---------------------------------------------------------------------------
   public void setRootNode(XMLNode inRootNode)
   {
      mRootNode = inRootNode;
   }

   //---------------------------------------------------------------------------
   public XMLNode getRootNode()
   {
      return mRootNode;
   }

   //---------------------------------------------------------------------------
   /**
    @return The XML String encoded in the scheme specified for the XMLDoc.
    */
   public String toXML()
   {
      ByteArrayOutputStream outStream;
      try
      {
         outStream = new ByteArrayOutputStream();
         BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(outStream, mEncoding));
         toXML(bufferedWriter);
         bufferedWriter.close();
      }
      catch (IOException e)
      {
         throw new XMLException(e);
      }

      return outStream.toString();
   }

   //---------------------------------------------------------------------------
   /**
    The preferred way to save XML to a file. Properly encodes the XML based on
    the specified Charset.

    @param inFile the File to which the XML should be written
    */
   public void toXML(File inFile)
   {
      checkPermissions(inFile);

      try
      {
         Writer writer = null;
         try
         {
            writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(inFile), mEncoding));
            toXML(writer);
         }
         finally
         {
            if (writer != null) writer.close();
         }
      }
      catch (IOException e)
      {
         throw new XMLException(e);
      }
   }

   //---------------------------------------------------------------------------
   public void toXML(OutputStream inStream)
   {
      Writer writer = new OutputStreamWriter(inStream, mEncoding);

      toXML(writer);
   }

   //---------------------------------------------------------------------------
   /**
    Writes the XML document to the specified Writer. Note that the caller
    needs to have specified the Charset for the Writer if other than the default encoding is desired.
    */
   public void toXML(Writer inWriter)
   {
      prepareForOutput();

      BufferedWriter bufferedWriter;
      try
      {
         if (inWriter instanceof BufferedWriter)
         {
            bufferedWriter = (BufferedWriter) inWriter;
         }
         else
         {
            bufferedWriter = new BufferedWriter(inWriter);
         }

         bufferedWriter.write(getHeader());
         if (mRootNode != null) mRootNode.toXML(bufferedWriter);
         bufferedWriter.flush();
      }
      catch (IOException e)
      {
         throw new RuntimeException(e);
      }
   }

   //---------------------------------------------------------------------------
   public String toIndentedXML(int inInitialIndentLevel, int inIndentSize)
   {
      ByteArrayOutputStream outStream;
      try
      {
         outStream = new ByteArrayOutputStream();
         toIndentedXML(outStream, inInitialIndentLevel, inIndentSize);
         outStream.close();
      }
      catch (Exception e)
      {
         throw new XMLException(e);
      }

      return outStream.toString();
    }

   //---------------------------------------------------------------------------
   /**
    Writes an indented form of the XML document to the specified OutputStream.
    Flushes but does not close the specified OutputStream after writing.

    @param inStream   The OutputStream to which the XML will be written
    @param inInitialIndentLevel The size of the initial indent
    @param inIndentSize The number of spaces incremented for ea. indent level
    */
   public void toIndentedXML(OutputStream inStream, int inInitialIndentLevel, int inIndentSize)
   {
      OutputStreamWriter writer;
      try
      {
         writer = new OutputStreamWriter(inStream, mEncoding);

         writer.write(getHeader());
         if (mRootNode != null) mRootNode.toIndentedXML(writer, inInitialIndentLevel, inIndentSize);
         writer.flush();
      }
      catch (IOException e)
      {
         throw new XMLException(e);
      }
   }

   //---------------------------------------------------------------------------
   /**
    Writes an indented form of the XML document to the specified Writer. Note that the caller
    needs to have specified the Charset for the Writer if other than the default encoding is desired.
    */
   public void toIndentedXML(Writer inWriter, int inInitialIndentLevel, int inIndentSize)
   {
      prepareForOutput();

      BufferedWriter bufferedWriter;
      try
      {
         if (inWriter instanceof BufferedWriter)
         {
            bufferedWriter = (BufferedWriter) inWriter;
         }
         else
         {
            bufferedWriter = new BufferedWriter(inWriter);
         }

         bufferedWriter.write(getHeader());
         if (mRootNode != null) mRootNode.toIndentedXML(bufferedWriter, inInitialIndentLevel, inIndentSize);
         bufferedWriter.flush();
      }
      catch (IOException e)
      {
         throw new XMLException(e);
      }
   }


   //---------------------------------------------------------------------------
   /**
    The preferred way to save XML to a file. Properly encodes the XML based on
    the specified Charset.

    @param inFile The target output file
    @param inInitialIndentLevel The size of the initial indent
    @param inIndentSize The number of spaces incremented for ea. indent level
    */
   public void toIndentedXML(File inFile, int inInitialIndentLevel, int inIndentSize)
   {
      checkPermissions(inFile);

      try
      {
         Writer writer = null;
         try
         {
            writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(inFile), mEncoding));
            toIndentedXML(writer, inInitialIndentLevel, inIndentSize);
         }
         finally
         {
            if (writer != null) writer.close();
         }
      }
      catch (IOException e)
      {
         throw new XMLException(e);
      }
   }

   //--------------------------------------------------------------------------
   public String getHeader()
   {
      StringBuilder buffer = new StringBuilder();
      buffer.append("");
      buffer.append(NL);

      if (getDoctype() != null)
      {
         buffer.append(getDoctype().toString());
         buffer.append(NL);
      }

      return buffer.toString();
   }

   //--------------------------------------------------------------------------
   public void replaceCharacterEntities()
   {
      mRootNode.replaceCharacterEntities();
   }

   //###########################################################################
   // PROTECTED METHODS
   //###########################################################################

   //--------------------------------------------------------------------------
   protected void  prepareForOutput()
   {

   }

   //--------------------------------------------------------------------------
   protected boolean isConstructedContent()
   {
      return mConstructedContent;
   }

   //--------------------------------------------------------------------------
   protected XMLTagReader getTagReader()
   {
      return new XMLTagReader();
   }

   //###########################################################################
   // PRIVATE METHODS
   //###########################################################################

   //---------------------------------------------------------------------------
   // See:  http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
   private Charset determineEncoding(File inFile)
         throws IOException
   {
      Charset encoding = DEFAULT_CHARSET;

      BufferedInputStream stream = null;
      try
      {
         // Determine the XML file's encoding scheme.
         stream = new BufferedInputStream(new FileInputStream(inFile));

         encoding = determineEncoding(stream);
      }
      finally
      {
         if (stream != null) stream.close();
      }

      return encoding;
   }

   //---------------------------------------------------------------------------
   // See:  http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
   private Charset determineEncoding(BufferedInputStream inStream)
         throws IOException
   {
      Charset encoding = null;
      mByteOrderMarkPresent = false;

      try
      {
         inStream.mark(4);

         // Determine the XML file's encoding scheme.
         byte[] bytes = new byte[4];
         if (inStream.read(bytes) == 4)
         {
            // Is there a BOM (Byte Order Mark)?
            String encodingString = null;
            if ((0xff & bytes[0]) == 0xFE && (0xff & bytes[1]) == 0xFF)
            {
               encodingString = "UTF-16BE";
               mByteOrderMarkPresent = true;
            }
            else if ((0xff & bytes[0]) == 0xFF && (0xff & bytes[1]) == 0xFE)
            {
               encodingString = "UTF-16LE";
               mByteOrderMarkPresent = true;
            }
            else if ((0xff & bytes[0]) == 0xEF && (0xff & bytes[1]) == 0xBB && (0xff & bytes[2]) == 0xBF)
            {
               encodingString = "UTF-8";
               mByteOrderMarkPresent = true;
            }

            if (StringUtil.isSet(encodingString))
            {
               encoding = Charset.forName(encodingString);
            }
         }
      }
      finally
      {
         if (inStream != null)
         {
            inStream.reset();
         }
      }

      if (null == encoding)
      {
         // See if the first line is and xml declaration line that specifies the encoding
         int READ_LIMIT = 1024;

         try
         {
            inStream.mark(READ_LIMIT);
            int theChar;
            int charCount = 0;
            StringBuilder buffer = new StringBuilder();
            while ((theChar = inStream.read()) != -1
                    && charCount < READ_LIMIT)
            {
               charCount++;
               if (theChar == '\n'
                     || theChar == '\r')
               {
                  if (buffer.toString().trim().length() > 0)
                  {
                     break;
                  }
                  else
                  {
                     buffer.setLength(0);
                  }
               }

               buffer.append((char) theChar);
            }

            String xmlHeader = buffer.toString().trim();
            if (StringUtil.isSet(xmlHeader))
            {
               if (mByteOrderMarkPresent) xmlHeader = xmlHeader.substring(4);
               Matcher m = XML_HEADER_PATTERN.matcher(xmlHeader);
               if (m.find())
               {
                  encoding = Charset.forName(m.group(1));
               }
            }
         }
         finally
         {
            if (inStream != null)
            {
               inStream.reset();
            }
         }
      }

      if (null == encoding)
      {
         encoding = DEFAULT_CHARSET;
      }

      return encoding;
   }

   //---------------------------------------------------------------------------
   // See:  http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
   private Charset determineEncoding(BufferedReader inReader)
         throws IOException
   {
      Charset encoding = null;
      mByteOrderMarkPresent = false;

      try
      {
         inReader.mark(4);

         // Determine the XML file's encoding scheme.
         char[] chars = new char[4];
         if (inReader.read(chars) == 4)
         {
            byte[] bytes = new String(chars).getBytes();

            // Is there a BOM (Byte Order Mark)?
            String encodingString = null;
            if ((0xff & bytes[0]) == 0xFE && (0xff & bytes[1]) == 0xFF)
            {
               encodingString = "UTF-16BE";
               mByteOrderMarkPresent = true;
            }
            else if ((0xff & bytes[0]) == 0xFF && (0xff & bytes[1]) == 0xFE)
            {
               encodingString = "UTF-16LE";
               mByteOrderMarkPresent = true;
            }
            else if ((0xff & bytes[0]) == 0xEF && (0xff & bytes[1]) == 0xBB && (0xff & bytes[2]) == 0xBF)
            {
               encodingString = "UTF-8";
               mByteOrderMarkPresent = true;
            }

            if (StringUtil.isSet(encodingString))
            {
               encoding = Charset.forName(encodingString);
            }
         }
      }
      finally
      {
         if (inReader != null)
         {
            inReader.reset();
         }
      }

      if (null == encoding)
      {
         // See if the first line is and xml declaration line that specifies the encoding
         int READ_LIMIT = 1024;

         try
         {
            inReader.mark(READ_LIMIT);
            String xmlHeader = inReader.readLine().trim();
            if (StringUtil.isSet(xmlHeader))
            {
               if (mByteOrderMarkPresent) xmlHeader = xmlHeader.substring(4);
               Matcher m = XML_HEADER_PATTERN.matcher(xmlHeader);
               if (m.find())
               {
                  encoding = Charset.forName(m.group(1));
               }
            }
         }
         finally
         {
            if (inReader != null)
            {
               inReader.reset();
            }
         }
      }

      if (null == encoding)
      {
         encoding = DEFAULT_CHARSET;
      }

      return encoding;
   }

   //--------------------------------------------------------------------------
   private void checkPermissions(File inFile)
   {
      if (null == inFile)
      {
         throw new RuntimeException("The specified file was null!");
      }
      else if (inFile.exists())
      {
         if (! inFile.canWrite())
         {
            throw new RuntimeException("No write permissions for " + StringUtil.singleQuote(inFile.getAbsolutePath()) + "!");
         }
      }
      else if (inFile.getParentFile() != null)
      {
         if (inFile.getParentFile().exists())
         {
            if (! inFile.getParentFile().canWrite())
            {
               throw new RuntimeException("No write permissions for dir " + StringUtil.singleQuote(inFile.getParentFile().getAbsolutePath()) + "!");
            }
         }
         else if (! inFile.getParentFile().mkdirs())
         {
            throw new RuntimeException("Could not create dir " + StringUtil.singleQuote(inFile.getParentFile().getAbsolutePath()) + "!");
         }
      }
   }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy