All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.it.unimi.dsi.parser.callback.BulletParserCallbackContentHandler Maven / Gradle / Ivy

package it.unimi.dsi.parser.callback;

/*
 * DSI utilities
 *
 * Copyright (C) 2010-2017 Sebastiano Vigna
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.fastutil.objects.Reference2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.objects.ReferenceOpenHashSet;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.parser.Attribute;
import it.unimi.dsi.parser.BulletParser;
import it.unimi.dsi.parser.Element;

import java.util.Iterator;

import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;

/** An adapter from callbacks of the bullet parser to standard
 * SAX content handler. Can be used to run, eg., tagsoup
 * and the bullet parser against a page and check that the
 * actual callback invocations are the same.
 */

public class BulletParserCallbackContentHandler extends DefaultHandler {
	/** The delegated callback. */
	private final Callback callback;

	/** The corresponding parser. */
	private final BulletParser parser;

	/** The element enclosing the current CDATA section, or {@code null}
	 * if we're not in a CDATA section. */
	private Element inCdata;

	/** The set of tags enclosing CDATA sections. */
	private final ReferenceSet cdataElements = new ReferenceOpenHashSet<>(new Element[] { Element.SCRIPT, Element.STYLE });

	/** The map used to fake an attribute map. */
	private final Reference2ObjectOpenHashMap attrMap = new Reference2ObjectOpenHashMap<>();

	public BulletParserCallbackContentHandler(final BulletParser parser, final Callback callback) {
		this.parser = parser;
		this.callback = callback;
	}

	@Override
	public void endDocument() {
		callback.endDocument();
	}

	@Override
	public void endElement(final String uri, final String localName, final String qName) {
	    Element element = parser.factory.getElement(new MutableString(localName));
		if (cdataElements.contains(element) && element == inCdata) inCdata = null;
		callback.endElement(element);
	}

	@Override
	public void characters(final char[] ch, final int start, final int length) {
		if (inCdata != null) callback.cdata(inCdata, ch, start, length);
		else callback.characters(ch, start, length, false);
	}

	@Override
	public void ignorableWhitespace(final char[] ch, final int start, final int length) {
		callback.characters(ch, start, length, false);
	}

	@Override
	public void startDocument() {
		callback.startDocument();
	}

	@Override
	public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) {
		attrMap.clear();
		String value;
		Attribute attribute;

		for (Iterator i = parser.parsedAttributes.iterator(); i.hasNext();) {
			attribute = i.next();
			value = attributes.getValue(attribute.toString());
			if (value != null)
				attrMap.put(attribute, new MutableString(value));
		}

		Element element = parser.factory.getElement(new MutableString(localName));
		if (cdataElements.contains(element)) inCdata = element;
		callback.startElement(element, attrMap);
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy