All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.enhydra.xml.io.HTMLFormatter Maven / Gradle / Ivy

The newest version!
/*
 * Enhydra Java Application Server Project
 * 
 * The contents of this file are subject to the Enhydra Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License on
 * the Enhydra web site ( http://www.enhydra.org/ ).
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
 * the License for the specific terms governing rights and limitations
 * under the License.
 * 
 * The Initial Developer of the Enhydra Application Server is Lutris
 * Technologies, Inc. The Enhydra Application Server and portions created
 * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
 * All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * $Id: HTMLFormatter.java,v 1.7 2005/01/26 08:29:24 jkjome Exp $
 */

package org.enhydra.xml.io;

import java.io.IOException;

import org.enhydra.xml.dom.DOMAccess;
import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;

/*
 * FIXME:
 *  - Need to check for optional attributes.
 *  - Give control over newline in OutputOptions or maybe use a PrintWriter???
 *  - Need option to include HTML 4.0 entity references.
 *  - script and style child special handling.
 *  - Need to output HTML header.
 */


/**
 * Formatter for outputting a HTML DOM as a HTML text document.
 */
final class HTMLFormatter extends BaseDOMFormatter implements Formatter {
    /**
     * Default XML encoding.
     */
    private static final String DEFAULT_XML_ENCODING = "ISO-8859-1";

    /**
     * Table use to optimized checking for characters that should be
     * represented as entity references.
     */
    private static final boolean[] fEntityQuickCheck
        = new boolean[MAX_ENTITY_QUICK_CHECK_CHAR+1];

    /**
     * Should SPAN ID attributes be dropped?
     */
    private final boolean fDropSpanIds;

    /**
     * Flag that indicates the ID attribute should be dropped for the current
     * element.
     */
    private boolean fDropThisId;
    
    /**
     * Flag that indicates wether to use all named entites for all HTML 4.0 character
     * entities or not.
     */
    private boolean fUseHTML4Entities;

    /**
     * Nesting count for elements that don't have their content formatted.
     * This is done for  script and style elements.  The contents of these
     * elements are outputted as-is.  Its not legal for them to nest, but a
     * count is used to keep code from being confused by a broken DOM.
     */
    private int fNoFormatNestCount;

    /**
     * Indicates then a text has just been handled
     */
    private boolean fHandleText = false;

    /**
     * Indicates the next Sibling is a Text node
     */
    private boolean fNextSiblingText = false;

    /**
     * Static constructor.
     */
    static {
        for (char ch = 0; ch <= MAX_ENTITY_QUICK_CHECK_CHAR; ch++) {
            fEntityQuickCheck[ch] = (HTMLEntities.charToEntity(ch) != null);
        }
    }

    /**
     * Constructor.
     */
    public HTMLFormatter(Node node,
                         OutputOptions outputOptions,
                         boolean forPreFormatting) {
        super(node, outputOptions, forPreFormatting, DEFAULT_XML_ENCODING, fEntityQuickCheck);
        fDropSpanIds = fOptions.getDropHtmlSpanIds();
        fUseHTML4Entities = fOptions.getUseHTML4Entities();
    }

    /**
     * Get the default OutputOptions for a document formatter with this
     * formatter.  The encoding will not be set, which signals to use the
     * default encoding.
     */
    static OutputOptions getDefaultOutputOptions() {
        return new OutputOptions();  // Nothing special
    }

    /**
     * @see BaseDOMFormatter#getCharacterEntity
     */
    protected final String getCharacterEntity(char textChar) {
        if (fUseHTML4Entities) { 
            return HTMLEntities.charToEntity4(textChar); 
        } else { 
            return HTMLEntities.charToEntity(textChar);
        }
    }

    /**
     * Determine if an attribute's value should be printer. Those that don't
     * normally have values only get them if one was explictly supplied.
     */
    private boolean printableAttrValue(Attr attr) {
        return (!(HTMLElements.isBooleanAttr(attr.getName())));
    }

    /**
     * Output the DOCTYPE declaration, if the information is available.
     */
    private void outputDocType(Document document) throws IOException {
        //FIXME: Don't currently have a way of getting doctype from the parser
        //to here, the only way is via the outputOptions override.
        if ((fPublicId != null) || (fSystemId != null)) {
            fOut.write("');
            writeln();
        }
    }

    /**
     * Handler called for Document nodes.
     * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleDocument
     */
    public void handleDocument(Document document) throws IOException {
        if (!fOptions.getOmitDocType()) {
            outputDocType(document);
        }
        fTraverser.processChildren(document);
    }

    /**
     * Handler called for Document nodes; should never be called.
     * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleDocumentType
     */
    public void handleDocumentType(DocumentType documentType) throws IOException {
        throw new XMLIOError("Unexpected call to handleDocumentType");
    }

    /**
     * Handler called for DocumentFragment nodes; just process children.
     * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleDocumentFragment
     */
    public void handleDocumentFragment(DocumentFragment documentFragment) {
        fTraverser.processChildren(documentFragment);
    }

    /**
     * Handler called for Attr nodes.
     * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleAttr
     */
    public void handleAttr(Attr attr) throws IOException {
        String name = attr.getName();
        if (!(fDropThisId && name.equals("id"))) {
            fOut.write(' ');
            fOut.write(name);
            if (printableAttrValue(attr)) {
                writeAttributeValue(attr);
            }
        }
    }

    /**
     * Write an element open tag.  The hasChildren option is ignored.
     */
    protected final void writeOpenTag(Element element, 
                                      String tagName,
                                      boolean hasChildren) throws IOException {
        String formattedTag = null;
        if (fPrettyPrinting) {
            if (fNextSiblingText) {
                fOut.write('\n');
            }
            fNextSiblingText = (element.getNextSibling() instanceof Text);
        } // end of if ()
        
        if (fUsePreFormattedElements && (element instanceof PreFormattedText)) {
            formattedTag = ((PreFormattedText)element).getPreFormattedText();
        }
        if (formattedTag != null) {
            fOut.write(formattedTag);
            fPreFormattedElementCount++;
        } else {
            if (fPrettyPrinting && !(element.getPreviousSibling() instanceof Text)) {
                printIndent();                
            } // end of if ()
            
            fDropThisId = fDropSpanIds && tagName.equals("SPAN");
            fOut.write('<');
            fOut.write(tagName);
            fTraverser.processAttributes(element);
            fOut.write('>');
            fDynamicFormattedElementCount++;
            if (fPrettyPrinting && !(element.getFirstChild() instanceof Text)) {
                fOut.write('\n');
            } // end of if ()
        }
    }

    /**
     * Write an element close tag.
     */
    private void writeCloseTag(String tagName) throws IOException {
        // Output end tag when legal.
        if (fHandleText) {
            fHandleText = false;
        } else {
            printIndent();             
        } // end of else
        
        if (HTMLElements.hasCloseTag(tagName)) {
            fOut.write("');
        }
        if (fPrettyPrinting && !fNextSiblingText) {
            fOut.write('\n');
        } // end of if ()
    }

    /**
     * Handler called for Element nodes.
     * 

* This optionally corrects problem cases for browsers: *

    *
  • ID attributes are dropped from SPAN tags. This cause Internet * Explorer 4.0 to get confused on keep-alive connections. *
* @see org.enhydra.xml.dom.DOMTraversal.Handler#handleElement */ public void handleElement(Element element) throws IOException { String tagName = element.getTagName(); // Start Barracuda Kludge ====== // check the element to see if it contains a // attribute "visdom". // (org.enhydra.barracuda.core.comp.BComponent.VISIBILITY_MARKER) // This controls DOM visibility. If this value exists and does not // match to true, don't print this particular node. // Note: This should be made generic, but for now... Attr attr = DOMAccess.accessAttribute(fDocument, element, null, "visdom"); if (attr != null && !(Boolean.valueOf(attr.getValue()).booleanValue())) return; // End Barracuda Kludge ====== // HTML version doesn't care if it has childrne. writeOpenTag(element, tagName, false); // Output childern and close boolean isScriptStyle = HTMLElements.isScriptStyle(element); if (isScriptStyle) { fNoFormatNestCount++; } fTraverser.processChildren(element); if (isScriptStyle) { fNoFormatNestCount--; } writeCloseTag(tagName); } /** * Handler called for ProcessingInstruction nodes. * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleProcessingInstruction */ public void handleProcessingInstruction(ProcessingInstruction pi) throws IOException { throw new XMLIOError("Unexpected call to handleProcessingInstruction"); } /** * Handler called for CDATASection nodes. * Non-standard extension: outputs data as-is. * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleCDATASection */ public void handleCDATASection(CDATASection cdata) throws IOException { fOut.write(cdata.getData()); } /** * Handler called for Text nodes. * @see org.enhydra.xml.dom.DOMTraversal.Handler#handleText */ public final void handleText(Text text) throws IOException { fHandleText = true; if (fNoFormatNestCount > 0) { fOut.write(text.getData()); } else { super.handleText(text); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy