test.it.unimi.dsi.parser.callback.BulletParserCallbackContentHandler Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of dsiutils Show documentation
Show all versions of dsiutils Show documentation
The DSI utilities are a mishmash of classes accumulated during the last twenty years in projects developed at the DSI (Dipartimento di Scienze dell'Informazione, i.e., Information Sciences Department), now DI (Dipartimento di Informatica, i.e., Informatics Department), of the Universita` degli Studi di Milano.
package it.unimi.dsi.parser.callback;
/*
* DSI utilities
*
* Copyright (C) 2010-2018 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.fastutil.objects.Reference2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.objects.ReferenceOpenHashSet;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.parser.Attribute;
import it.unimi.dsi.parser.BulletParser;
import it.unimi.dsi.parser.Element;
import java.util.Iterator;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
/** An adapter from callbacks of the bullet parser to standard
* SAX content handler. Can be used to run, eg., tagsoup
* and the bullet parser against a page and check that the
* actual callback invocations are the same.
*/
public class BulletParserCallbackContentHandler extends DefaultHandler {
/** The delegated callback. */
private final Callback callback;
/** The corresponding parser. */
private final BulletParser parser;
/** The element enclosing the current CDATA section, or {@code null}
* if we're not in a CDATA section. */
private Element inCdata;
/** The set of tags enclosing CDATA sections. */
private final ReferenceSet cdataElements = new ReferenceOpenHashSet<>(new Element[] { Element.SCRIPT, Element.STYLE });
/** The map used to fake an attribute map. */
private final Reference2ObjectOpenHashMap attrMap = new Reference2ObjectOpenHashMap<>();
public BulletParserCallbackContentHandler(final BulletParser parser, final Callback callback) {
this.parser = parser;
this.callback = callback;
}
@Override
public void endDocument() {
callback.endDocument();
}
@Override
public void endElement(final String uri, final String localName, final String qName) {
Element element = parser.factory.getElement(new MutableString(localName));
if (cdataElements.contains(element) && element == inCdata) inCdata = null;
callback.endElement(element);
}
@Override
public void characters(final char[] ch, final int start, final int length) {
if (inCdata != null) callback.cdata(inCdata, ch, start, length);
else callback.characters(ch, start, length, false);
}
@Override
public void ignorableWhitespace(final char[] ch, final int start, final int length) {
callback.characters(ch, start, length, false);
}
@Override
public void startDocument() {
callback.startDocument();
}
@Override
public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) {
attrMap.clear();
String value;
Attribute attribute;
for (Iterator i = parser.parsedAttributes.iterator(); i.hasNext();) {
attribute = i.next();
value = attributes.getValue(attribute.toString());
if (value != null)
attrMap.put(attribute, new MutableString(value));
}
Element element = parser.factory.getElement(new MutableString(localName));
if (cdataElements.contains(element)) inCdata = element;
callback.startElement(element, attrMap);
}
}