org.exist.util.HtmlToXmlParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of exist-core Show documentation
Show all versions of exist-core Show documentation
eXist-db NoSQL Database Core
/*
* eXist-db Open Source Native XML Database
* Copyright (C) 2001 The eXist-db Authors
*
* [email protected]
* http://www.exist-db.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.exist.util;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.evolvedbinary.j8fu.Either;
import org.xml.sax.*;
import java.util.Map;
import java.util.Optional;
/**
* @author Adam Retter
*/
public class HtmlToXmlParser {
private final static Logger LOG = LogManager.getLogger(HtmlToXmlParser.class);
public static final String HTML_TO_XML_PARSER_ELEMENT = "html-to-xml";
public static final String HTML_TO_XML_PARSER_CLASS_ATTRIBUTE = "class";
public static final String HTML_TO_XML_PARSER_PROPERTIES_ELEMENT = "properties";
public static final String HTML_TO_XML_PARSER_FEATURES_ELEMENT = "features";
public static final String HTML_TO_XML_PARSER_PROPERTY = "parser.html-to-xml-parser";
public static final String HTML_TO_XML_PARSER_PROPERTIES_PROPERTY = "parser.html-to-xml-parser.properties";
public static final String HTML_TO_XML_PARSER_FEATURES_PROPERTY = "parser.html-to-xml-parser.features";
public static final String PARSER_ELEMENT_NAME = "parser";
/**
* Returns the Configured HTML to XML parser
*
* @param config The configuration which specifies the classname of the parser to use
*
* @return If the configuration specifies a valid HTML to XML Parser and it
* is available on the classpath, then the result of instantiating it will be
* returned, otherwise {@link Optional#EMPTY}
*/
public static Optional> getHtmlToXmlParser(final Configuration config) {
final Optional parserClassName =
Optional.ofNullable((String)config.getProperty(HTML_TO_XML_PARSER_PROPERTY));
// instantiate the parser
final Optional> inst = parserClassName.map(cn -> {
Either result;
try {
final Class clazz = Class.forName(cn);
if (XMLReader.class.isAssignableFrom(clazz)) {
final XMLReader reader = (XMLReader) clazz.newInstance();
final Map features = (Map)config.getProperty(HTML_TO_XML_PARSER_FEATURES_PROPERTY);
if(features != null) {
for(final Map.Entry feature : features.entrySet()) {
reader.setFeature(feature.getKey(), feature.getValue());
}
}
final Map properties = (Map)config.getProperty(HTML_TO_XML_PARSER_PROPERTIES_PROPERTY);
if(properties != null) {
for(final Map.Entry property : properties.entrySet()) {
reader.setProperty(property.getKey(), property.getValue());
}
}
result = Either.Right(reader);
} else {
result = Either.Left(new ClassCastException(cn + " does not implement org.xml.sax.XMLReader"));
}
} catch(final ClassNotFoundException | InstantiationException | IllegalAccessException | SAXNotRecognizedException | SAXNotSupportedException e) {
result = Either.Left(e);
}
return result;
});
return inst;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy