All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.day.cq.rewriter.htmlparser.DocumentHandlerToSAXAdapter Maven / Gradle / Ivy

/*
 * Copyright 1997-2008 Day Management AG
 * Barfuesserplatz 6, 4001 Basel, Switzerland
 * All Rights Reserved.
 *
 * This software is the confidential and proprietary information of
 * Day Management AG, ("Confidential Information"). You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Day.
 */
package com.day.cq.rewriter.htmlparser;

import java.io.IOException;
import java.util.Iterator;

import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import com.day.cq.rewriter.pipeline.AttributesImpl;

/**
 * This is an adapter from the document handler events to SAX events.
 * @deprecated This is replaced by the Apache Sling Html parsing.
 */
@Deprecated
public class DocumentHandlerToSAXAdapter implements DocumentHandler {

    public static final String NAMESPACE = "http://com.day/cq/rewriter";

    public static final String END_SLASH_ATTR = "endSlash";

    public static final String QUOTES_ATTR = "quotes";

    private final AttributesImpl atts = new AttributesImpl();

    private final ContentHandler contentHandler;

    private boolean gotStart = false;

    public DocumentHandlerToSAXAdapter(final ContentHandler handler) {
        this.contentHandler = handler;
    }


    /**
     * Called by HtmlParser if character data and tags are to be output for which no
     * special handling is necessary.
     *
     * @param buffer Character data
     * @param offset Offset where character data starts
     * @param length The length of the character data
     *
     * @see com.day.cq.rewriter.htmlparser.DocumentHandler#characters(char[], int, int)
     */
    public void characters(char[] buffer, int offset, int length)
    throws IOException {
        try {
            this.contentHandler.characters(buffer, offset, length);
        } catch (SAXException e) {
            throw handle(e);
        }
    }

    /**
     * Called by HtmlParser for the start element of a tag that requires special handling.
     * Remembers base reference and removes invalid links.
     *
     * @param tagName Tag name
     * @param attributes List of attributes
     * @param buffer Contains the whole tag including attributes
     * @param offset Offset where the character data starts
     * @param length Length of the character data
     * @param endSlash Flag indicating whether the element is closed with an ending slash (xhtml-compliant)
     */
    public void onStartElement(String tagName,
                               AttributeList attributes,
                               char[] buffer, int offset, int length, boolean endSlash)
    throws IOException {
        this.atts.clear();

        final char[] quotes = new char[attributes.attributeCount()];
        int index = 0;
        final Iterator names = attributes.attributeNames();
        while (names.hasNext()) {
            final String name = names.next();
            final String value = attributes.getValue(name);
            if (value != null) {
                atts.addCDATAAttribute(name, value);
            } else {
                this.atts.addCDATAAttribute(name, null);
            }
            quotes[index] = attributes.getQuoteChar(name);
            index++;
        }
        if ( index > 0 ) {
            atts.addCDATAAttribute(NAMESPACE, QUOTES_ATTR, new String(quotes));
        }
        try {
            if (endSlash) {
                // just tell the contentHandler via attribute that an end slash is needed
                // --> see #20065 - Rewriter: Does Not Rewrite Preserving Doctype
                this.atts.addCDATAAttribute(END_SLASH_ATTR, "");
            }
            this.contentHandler.startElement("", tagName, tagName, this.atts);
        } catch (SAXException e) {
            throw handle(e);
        }
    }

    /**
     * Called by HtmlParser for the end element of a tag that requires special handling.
     * Removes end element if corresponding start element (link) has already been removed.
     *
     * @param tagName Tag name
     * @param buffer Contains the whole tag including attributes
     * @param offset Offset where the character data starts
     * @param length Length of the character data
     */
    public void onEndElement(String tagName, char[] buffer, int offset, int length)
    throws IOException {
        try {
            this.contentHandler.endElement("", tagName, tagName);
        } catch (SAXException e) {
            throw handle(e);
        }
    }

    /**
     * @see com.day.cq.rewriter.htmlparser.DocumentHandler#onEnd()
     */
    public void onEnd() throws IOException {
        if ( this.gotStart ) {
            try {
                this.contentHandler.endDocument();
            } catch (SAXException e) {
                throw handle(e);
            }
        }
    }

    /**
     * @see com.day.cq.rewriter.htmlparser.DocumentHandler#onStart()
     */
    public void onStart() throws IOException {
        this.gotStart = true;
        try {
            this.contentHandler.startDocument();
        } catch (SAXException e) {
            throw handle(e);
        }
    }

    protected final IOException handle(SAXException se) {
        if ( se.getCause() != null && se.getCause() instanceof IOException) {
            return (IOException)se.getCause();
        }
        final IOException ioe = new IOException("Unable to parse document");
        ioe.initCause(se);
        return ioe;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy