All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.syndication.io.WireFeedInput Maven / Gradle / Ivy

There is a newer version: 3.1.0-incubating
Show newest version
/*
 * Copyright 2004 Sun Microsystems, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package com.sun.syndication.io;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;

import org.jdom2.Document;
import org.jdom2.JDOMException;
import org.jdom2.input.DOMBuilder;
import org.jdom2.input.JDOMParseException;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;

import com.sun.syndication.feed.WireFeed;
import com.sun.syndication.io.impl.FeedParsers;
import com.sun.syndication.io.impl.XmlFixerReader;

/**
 * Parses an XML document (File, InputStream, Reader, W3C SAX InputSource, W3C DOM Document or JDom DOcument)
 * into an WireFeed (RSS/Atom).
 * 

* It accepts all flavors of RSS (0.90, 0.91, 0.92, 0.93, 0.94, 1.0 and 2.0) and * Atom 0.3 feeds. Parsers are plugable (they must implement the WireFeedParser interface). *

* The WireFeedInput useds liberal parsers. *

* @author Alejandro Abdelnur * */ public class WireFeedInput { private static Map clMap = new WeakHashMap(); private static FeedParsers getFeedParsers() { synchronized(WireFeedInput.class) { FeedParsers parsers = (FeedParsers) clMap.get(Thread.currentThread().getContextClassLoader()); if (parsers == null) { parsers = new FeedParsers(); clMap.put(Thread.currentThread().getContextClassLoader(), parsers); } return parsers; } } private static final InputSource EMPTY_INPUTSOURCE = new InputSource(new ByteArrayInputStream(new byte[0])); private static final EntityResolver RESOLVER = new EmptyEntityResolver(); private static class EmptyEntityResolver implements EntityResolver { public InputSource resolveEntity(String publicId, String systemId) { if(systemId != null && systemId.endsWith(".dtd")) return EMPTY_INPUTSOURCE; return null; } } private boolean _validate; private boolean _xmlHealerOn; /** * Returns the list of supported input feed types. *

* @see WireFeed for details on the format of these strings. *

* @return a list of String elements with the supported input feed types. * */ public static List getSupportedFeedTypes() { return getFeedParsers().getSupportedFeedTypes(); } /** * Creates a WireFeedInput instance with input validation turned off. *

* */ public WireFeedInput() { this (false); } /** * Creates a WireFeedInput instance. *

* @param validate indicates if the input should be validated. NOT IMPLEMENTED YET (validation does not happen) * */ public WireFeedInput(boolean validate) { _validate = false; // TODO FIX THIS THINGY _xmlHealerOn = true; } /** * Enables XML healing in the WiredFeedInput instance. *

* Healing trims leading chars from the stream (empty spaces and comments) until the XML prolog. *

* Healing resolves HTML entities (from literal to code number) in the reader. *

* The healing is done only with the build(File) and build(Reader) signatures. *

* By default is TRUE. *

* @param heals TRUE enables stream healing, FALSE disables it. * */ public void setXmlHealerOn(boolean heals) { _xmlHealerOn = heals; } /** * Indicates if the WiredFeedInput instance will XML heal (if necessary) the character stream. *

* Healing trims leading chars from the stream (empty spaces and comments) until the XML prolog. *

* Healing resolves HTML entities (from literal to code number) in the reader. *

* The healing is done only with the build(File) and build(Reader) signatures. *

* By default is TRUE. *

* @return TRUE if healing is enabled, FALSE if not. * */ public boolean getXmlHealerOn() { return _xmlHealerOn; } /** * Builds an WireFeed (RSS or Atom) from a file. *

* NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom2.Document)'. *

* @param file file to read to create the WireFeed. * @return the WireFeed read from the file. * @throws FileNotFoundException thrown if the file could not be found. * @throws IOException thrown if there is problem reading the file. * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers. * @throws FeedException if the feed could not be parsed * */ public WireFeed build(File file) throws FileNotFoundException,IOException,IllegalArgumentException,FeedException { WireFeed feed; Reader reader = new FileReader(file); if (_xmlHealerOn) { reader = new XmlFixerReader(reader); } feed = build(reader); reader.close(); return feed; } /** * Builds an WireFeed (RSS or Atom) from an Reader. *

* NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom2.Document)'. *

* @param reader Reader to read to create the WireFeed. * @return the WireFeed read from the Reader. * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers. * @throws FeedException if the feed could not be parsed * */ public WireFeed build(Reader reader) throws IllegalArgumentException,FeedException { SAXBuilder saxBuilder = createSAXBuilder(); try { if (_xmlHealerOn) { reader = new XmlFixerReader(reader); } Document document = saxBuilder.build(reader); return build(document); } catch (JDOMParseException ex) { throw new ParsingFeedException("Invalid XML: " + ex.getMessage(), ex); } catch (IllegalArgumentException ex) { throw ex; } catch (Exception ex) { throw new ParsingFeedException("Invalid XML",ex); } } /** * Builds an WireFeed (RSS or Atom) from an W3C SAX InputSource. *

* NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom2.Document)'. *

* @param is W3C SAX InputSource to read to create the WireFeed. * @return the WireFeed read from the W3C SAX InputSource. * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers. * @throws FeedException if the feed could not be parsed * */ public WireFeed build(InputSource is) throws IllegalArgumentException,FeedException { SAXBuilder saxBuilder = createSAXBuilder(); try { Document document = saxBuilder.build(is); return build(document); } catch (JDOMParseException ex) { throw new ParsingFeedException("Invalid XML: " + ex.getMessage(), ex); } catch (IllegalArgumentException ex) { throw ex; } catch (Exception ex) { throw new ParsingFeedException("Invalid XML",ex); } } /** * Builds an WireFeed (RSS or Atom) from an W3C DOM document. *

* NOTE: This method delages to the 'AsbtractFeed WireFeedInput#build(org.jdom2.Document)'. *

* @param document W3C DOM document to read to create the WireFeed. * @return the WireFeed read from the W3C DOM document. * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers. * @throws FeedException if the feed could not be parsed * */ public WireFeed build(org.w3c.dom.Document document) throws IllegalArgumentException,FeedException { DOMBuilder domBuilder = new DOMBuilder(); try { Document jdomDoc = domBuilder.build(document); return build(jdomDoc); } catch (IllegalArgumentException ex) { throw ex; } catch (Exception ex) { throw new ParsingFeedException("Invalid XML",ex); } } /** * Builds an WireFeed (RSS or Atom) from an JDOM document. *

* NOTE: All other build methods delegate to this method. *

* @param document JDOM document to read to create the WireFeed. * @return the WireFeed read from the JDOM document. * @throws IllegalArgumentException thrown if feed type could not be understood by any of the underlying parsers. * @throws FeedException if the feed could not be parsed * */ public WireFeed build(Document document) throws IllegalArgumentException,FeedException { WireFeedParser parser = getFeedParsers().getParserFor(document); if (parser==null) { throw new IllegalArgumentException("Invalid document"); } return parser.parse(document, _validate); } /** * Creates and sets up a org.jdom2.input.SAXBuilder for parsing. * * @return a new org.jdom2.input.SAXBuilder object */ protected SAXBuilder createSAXBuilder() { SAXBuilder saxBuilder = new SAXBuilder(_validate); saxBuilder.setEntityResolver(RESOLVER); // // This code is needed to fix the security problem outlined in http://www.securityfocus.com/archive/1/297714 // // Unfortunately there isn't an easy way to check if an XML parser supports a particular feature, so // we need to set it and catch the exception if it fails. We also need to subclass the JDom SAXBuilder // class in order to get access to the underlying SAX parser - otherwise the features don't get set until // we are already building the document, by which time it's too late to fix the problem. // // Crimson is one parser which is known not to support these features. try { XMLReader parser = saxBuilder.createParser(); try { parser.setFeature("http://xml.org/sax/features/external-general-entities", false); saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false); } catch (SAXNotRecognizedException e) { // ignore } catch (SAXNotSupportedException e) { // ignore } try { parser.setFeature("http://xml.org/sax/features/external-parameter-entities", false); saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false); } catch (SAXNotRecognizedException e) { // ignore } catch (SAXNotSupportedException e) { // ignore } try { parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); saxBuilder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); } catch (SAXNotRecognizedException e) { // ignore } catch (SAXNotSupportedException e) { // ignore } } catch (JDOMException e) { throw new IllegalStateException("JDOM could not create a SAX parser"); } saxBuilder.setExpandEntities(false); return saxBuilder; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy