com.hfg.xml.XMLDoc Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.xml;
import com.hfg.exception.ProgrammingException;
import com.hfg.util.StringUtil;
import com.hfg.xml.parser.XMLTagReader;
import java.io.*;
import java.nio.charset.Charset;
import java.util.List;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import org.xml.sax.SAXException;
//------------------------------------------------------------------------------
/**
XMLDoc is a generic XML document container.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
// https://en.wikipedia.org/wiki/Root_element
public class XMLDoc implements XMLizable
{
private XMLNode mRootNode;
private String mName;
private XMLSpec mSpec = DEFAULT_XMLSPEC;
private Charset mEncoding = DEFAULT_CHARSET;
private boolean mByteOrderMarkPresent;
private boolean mStandalone = true;
private Doctype mDoctype;
private List mTopLevelComments;
private static final XMLSpec DEFAULT_XMLSPEC = XMLSpec.v1_0;
private static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");
private static final Pattern XML_HEADER_PATTERN = Pattern.compile("^\\s*<\\?xml .+encoding=[\\'\"](\\S+)[\\'\"]", Pattern.CASE_INSENSITIVE);
protected static final String NL = System.getProperty("line.separator");
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
public XMLDoc()
{
}
//---------------------------------------------------------------------------
public XMLDoc(XMLNode inRootNode)
{
setRootNode(inRootNode);
}
//---------------------------------------------------------------------------
/**
The preferred way to read XML from a file.
@param inFile The XML file to read.
*/
public XMLDoc(File inFile)
throws XMLException, IOException
{
if (inFile != null
&& inFile.exists())
{
try
{
mEncoding = determineEncoding(inFile);
setName(inFile.getName());
// Now read in the XML.
BufferedReader reader = null;
try
{
reader = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), mEncoding));
if (mByteOrderMarkPresent)
{
reader.read(); // Skip the byte order mark
}
// mRootNode = new XMLTag(reader);
XMLTagReader tagReader = new XMLTagReader();
tagReader.parse(reader);
mRootNode = tagReader.getRootNode();
setDoctype(tagReader.getDoctype());
}
finally
{
if (reader != null) reader.close();
}
}
catch (XMLException e)
{
throw new XMLException("The file " + StringUtil.singleQuote(inFile.getPath()) + " doesn't appear to be in proper XML format!", e);
}
catch (IOException e)
{
throw new IOException("Problem encountered while reading file " + StringUtil.singleQuote(inFile.getPath()) + "!", e);
}
}
}
//---------------------------------------------------------------------------
/**
The preferred way to read XML from a stream.
*/
public XMLDoc(BufferedInputStream inStream)
{
try
{
mEncoding = determineEncoding(inStream);
// Now read in the XML.
BufferedReader reader = null;
try
{
reader = new BufferedReader(new InputStreamReader(inStream, mEncoding));
XMLTagReader tagReader = new XMLTagReader();
tagReader.parse(reader);
mRootNode = tagReader.getRootNode();
setDoctype(tagReader.getDoctype());
mTopLevelComments = tagReader.getTopLevelComments();
}
finally
{
if (reader != null) reader.close();
}
}
catch (IOException e)
{
throw new XMLException(e);
}
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public XMLDoc setName(String inValue)
{
mName = inValue;
return this;
}
//---------------------------------------------------------------------------
public String name()
{
return mName;
}
//---------------------------------------------------------------------------
@Override
public XMLDoc clone()
{
XMLDoc cloneObj;
try
{
cloneObj = (XMLDoc) super.clone();
}
catch (CloneNotSupportedException e)
{
throw new ProgrammingException(e);
}
if (mRootNode != null)
{
cloneObj.mRootNode = (XMLNode) mRootNode.clone();
}
return cloneObj;
}
//---------------------------------------------------------------------------
public XMLDoc setSpec(XMLSpec inValue)
{
mSpec = inValue;
return this;
}
//---------------------------------------------------------------------------
public XMLDoc setEncoding(Charset inValue)
{
mEncoding = inValue;
return this;
}
//---------------------------------------------------------------------------
public Charset getEncoding()
{
return mEncoding;
}
//---------------------------------------------------------------------------
public XMLDoc setIsStandalone(boolean inValue)
{
mStandalone = inValue;
return this;
}
//--------------------------------------------------------------------------
public XMLDoc setDoctype(Doctype inValue)
{
mDoctype = inValue;
return this;
}
//--------------------------------------------------------------------------
public Doctype getDoctype()
{
return mDoctype;
}
//---------------------------------------------------------------------------
public List getTopLevelComments()
{
return mTopLevelComments;
}
//---------------------------------------------------------------------------
public void setRootNode(XMLNode inRootNode)
{
mRootNode = inRootNode;
}
//---------------------------------------------------------------------------
public XMLNode getRootNode()
{
return mRootNode;
}
//---------------------------------------------------------------------------
/**
@return The XML String encoded in the scheme specified for the XMLDoc.
*/
public String toXML()
{
ByteArrayOutputStream outStream;
try
{
outStream = new ByteArrayOutputStream();
BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(outStream, mEncoding));
toXML(bufferedWriter);
bufferedWriter.close();
}
catch (IOException e)
{
throw new XMLException(e);
}
return outStream.toString();
}
//---------------------------------------------------------------------------
/**
The preferred way to save XML to a file. Properly encodes the XML based on
the specified Charset.
@param inFile the File to which the XML should be written
*/
public void toXML(File inFile)
{
checkPermissions(inFile);
try
{
Writer writer = null;
try
{
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(inFile), mEncoding));
toXML(writer);
}
finally
{
if (writer != null) writer.close();
}
}
catch (IOException e)
{
throw new XMLException(e);
}
}
//---------------------------------------------------------------------------
public void toXML(OutputStream inStream)
{
Writer writer = new OutputStreamWriter(inStream, mEncoding);
toXML(writer);
}
//---------------------------------------------------------------------------
/**
Writes the XML document to the specified Writer. Note that the caller
needs to have specified the Charset for the Writer if other than the default encoding is desired.
*/
public void toXML(Writer inWriter)
{
BufferedWriter bufferedWriter;
try
{
if (inWriter instanceof BufferedWriter)
{
bufferedWriter = (BufferedWriter) inWriter;
}
else
{
bufferedWriter = new BufferedWriter(inWriter);
}
bufferedWriter.write(getHeader());
if (mRootNode != null) mRootNode.toXML(bufferedWriter);
bufferedWriter.flush();
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
//---------------------------------------------------------------------------
public String toIndentedXML(int inInitialIndentLevel, int inIndentSize)
{
ByteArrayOutputStream outStream;
try
{
outStream = new ByteArrayOutputStream();
toIndentedXML(outStream, inInitialIndentLevel, inIndentSize);
outStream.close();
}
catch (Exception e)
{
throw new XMLException(e);
}
return outStream.toString();
}
//---------------------------------------------------------------------------
/**
Writes an indented form of the XML document to the specified OutputStream.
Flushes but does not close the specified OutputStream after writing.
@param inStream The OutputStream to which the XML will be written
@param inInitialIndentLevel The size of the initial indent
@param inIndentSize The number of spaces incremented for ea. indent level
*/
public void toIndentedXML(OutputStream inStream, int inInitialIndentLevel, int inIndentSize)
{
OutputStreamWriter writer;
try
{
writer = new OutputStreamWriter(inStream, mEncoding);
writer.write(getHeader());
if (mRootNode != null) mRootNode.toIndentedXML(writer, inInitialIndentLevel, inIndentSize);
writer.flush();
}
catch (IOException e)
{
throw new XMLException(e);
}
}
//---------------------------------------------------------------------------
/**
Writes an indented form of the XML document to the specified Writer. Note that the caller
needs to have specified the Charset for the Writer if other than the default encoding is desired.
*/
public void toIndentedXML(Writer inWriter, int inInitialIndentLevel, int inIndentSize)
{
BufferedWriter bufferedWriter;
try
{
if (inWriter instanceof BufferedWriter)
{
bufferedWriter = (BufferedWriter) inWriter;
}
else
{
bufferedWriter = new BufferedWriter(inWriter);
}
bufferedWriter.write(getHeader());
if (mRootNode != null) mRootNode.toIndentedXML(bufferedWriter, inInitialIndentLevel, inIndentSize);
bufferedWriter.flush();
}
catch (IOException e)
{
throw new XMLException(e);
}
}
//---------------------------------------------------------------------------
/**
The preferred way to save XML to a file. Properly encodes the XML based on
the specified Charset.
@param inFile The target output file
@param inInitialIndentLevel The size of the initial indent
@param inIndentSize The number of spaces incremented for ea. indent level
*/
public void toIndentedXML(File inFile, int inInitialIndentLevel, int inIndentSize)
{
checkPermissions(inFile);
try
{
Writer writer = null;
try
{
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(inFile), mEncoding));
toIndentedXML(writer, inInitialIndentLevel, inIndentSize);
}
finally
{
if (writer != null) writer.close();
}
}
catch (IOException e)
{
throw new XMLException(e);
}
}
//--------------------------------------------------------------------------
public String getHeader()
{
StringBuilder buffer = new StringBuilder();
buffer.append("");
buffer.append(NL);
if (getDoctype() != null)
{
buffer.append(getDoctype().toString());
buffer.append(NL);
}
return buffer.toString();
}
//--------------------------------------------------------------------------
public void replaceCharacterEntities()
{
mRootNode.replaceCharacterEntities();
}
//###########################################################################
// PRIVATE METHODS
//###########################################################################
//---------------------------------------------------------------------------
// See: http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
private Charset determineEncoding(File inFile)
throws IOException
{
Charset encoding = DEFAULT_CHARSET;
BufferedInputStream stream = null;
try
{
// Determine the XML file's encoding scheme.
stream = new BufferedInputStream(new FileInputStream(inFile));
encoding = determineEncoding(stream);
}
finally
{
if (stream != null) stream.close();
}
return encoding;
}
//---------------------------------------------------------------------------
// See: http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
private Charset determineEncoding(BufferedInputStream inStream)
throws IOException
{
Charset encoding = null;
mByteOrderMarkPresent = false;
try
{
inStream.mark(4);
// Determine the XML file's encoding scheme.
byte[] bytes = new byte[4];
if (inStream.read(bytes) == 4)
{
// Is there a BOM (Byte Order Mark)?
String encodingString = null;
if ((0xff & bytes[0]) == 0xFE && (0xff & bytes[1]) == 0xFF)
{
encodingString = "UTF-16BE";
mByteOrderMarkPresent = true;
}
else if ((0xff & bytes[0]) == 0xFF && (0xff & bytes[1]) == 0xFE)
{
encodingString = "UTF-16LE";
mByteOrderMarkPresent = true;
}
else if ((0xff & bytes[0]) == 0xEF && (0xff & bytes[1]) == 0xBB && (0xff & bytes[2]) == 0xBF)
{
encodingString = "UTF-8";
mByteOrderMarkPresent = true;
}
if (StringUtil.isSet(encodingString))
{
encoding = Charset.forName(encodingString);
}
}
}
finally
{
if (inStream != null)
{
inStream.reset();
}
}
if (null == encoding)
{
// See if the first line is and xml declaration line that specifies the encoding
int READ_LIMIT = 1024;
try
{
inStream.mark(READ_LIMIT);
int theChar;
int charCount = 0;
StringBuilder buffer = new StringBuilder();
while ((theChar = inStream.read()) != -1
&& charCount < READ_LIMIT)
{
charCount++;
if (theChar == '\n'
|| theChar == '\r')
{
if (buffer.toString().trim().length() > 0)
{
break;
}
else
{
buffer.setLength(0);
}
}
buffer.append((char) theChar);
}
String xmlHeader = buffer.toString().trim();
if (StringUtil.isSet(xmlHeader))
{
if (mByteOrderMarkPresent) xmlHeader = xmlHeader.substring(4);
Matcher m = XML_HEADER_PATTERN.matcher(xmlHeader);
if (m.find())
{
encoding = Charset.forName(m.group(1));
}
}
}
finally
{
if (inStream != null)
{
inStream.reset();
}
}
}
if (null == encoding)
{
encoding = DEFAULT_CHARSET;
}
return encoding;
}
//--------------------------------------------------------------------------
private void checkPermissions(File inFile)
{
if (null == inFile)
{
throw new RuntimeException("The specified file was null!");
}
else if (inFile.exists())
{
if (! inFile.canWrite())
{
throw new RuntimeException("No write permissions for " + StringUtil.singleQuote(inFile.getAbsolutePath()) + "!");
}
}
else if (inFile.getParentFile() != null)
{
if (inFile.getParentFile().exists())
{
if (! inFile.getParentFile().canWrite())
{
throw new RuntimeException("No write permissions for dir " + StringUtil.singleQuote(inFile.getParentFile().getAbsolutePath()) + "!");
}
}
else if (! inFile.getParentFile().mkdirs())
{
throw new RuntimeException("Could not create dir " + StringUtil.singleQuote(inFile.getParentFile().getAbsolutePath()) + "!");
}
}
}
}