All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.syndication.fetcher.impl.HttpURLFeedFetcher Maven / Gradle / Ivy

/*
 * Copyright 2004 Sun Microsystems, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package com.sun.syndication.fetcher.impl;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.zip.GZIPInputStream;

import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.fetcher.FetcherEvent;
import com.sun.syndication.fetcher.FetcherException;
import com.sun.syndication.io.FeedException;
import com.sun.syndication.io.SyndFeedInput;
import com.sun.syndication.io.XmlReader;

/**
 * 

Class to retrieve syndication files via HTTP.

* *

If passed a {@link com.sun.syndication.fetcher.impl.FeedFetcherCache} in the * constructor it will use conditional gets to only retrieve modified content.

* *

The class uses the Accept-Encoding: gzip header to retrieve gzipped feeds where * supported by the server.

* *

Simple usage: *

 * 	// create the cache
 *	FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getFeedInfoCache();
 *	// retrieve the feed the first time
 *	// any subsequent request will use conditional gets and only
 *	// retrieve the resource if it has changed
 *	SyndFeed feed = new HttpURLFeedFetcher(feedInfoCache).retrieveFeed(feedUrl);
 *
* *

* * @see http://fishbowl.pastiche.org/2002/10/21/http_conditional_get_for_rss_hackers * @see http://diveintomark.org/archives/2003/07/21/atom_aggregator_behavior_http_level * @see http://bobwyman.pubsub.com/main/2004/09/using_rfc3229_w.html * @author Nick Lothian */ public class HttpURLFeedFetcher extends AbstractFeedFetcher { static final int POLL_EVENT = 1; static final int RETRIEVE_EVENT = 2; static final int UNCHANGED_EVENT = 3; private FeedFetcherCache feedInfoCache; /** * Constructor to use HttpURLFeedFetcher without caching of feeds * */ public HttpURLFeedFetcher() { super(); } /** * Constructor to enable HttpURLFeedFetcher to cache feeds * * @param feedCache - an instance of the FeedFetcherCache interface */ public HttpURLFeedFetcher(FeedFetcherCache feedInfoCache) { this(); setFeedInfoCache(feedInfoCache); } /** * Retrieve a feed over HTTP * * @param feedUrl A non-null URL of a RSS/Atom feed to retrieve * @return A {@link com.sun.syndication.feed.synd.SyndFeed} object * @throws IllegalArgumentException if the URL is null; * @throws IOException if a TCP error occurs * @throws FeedException if the feed is not valid * @throws FetcherException if a HTTP error occurred */ public SyndFeed retrieveFeed(URL feedUrl) throws IllegalArgumentException, IOException, FeedException, FetcherException { if (feedUrl == null) { throw new IllegalArgumentException("null is not a valid URL"); } URLConnection connection = feedUrl.openConnection(); if (!(connection instanceof HttpURLConnection)) { throw new IllegalArgumentException(feedUrl.toExternalForm() + " is not a valid HTTP Url"); } HttpURLConnection httpConnection = (HttpURLConnection)connection; // httpConnection.setInstanceFollowRedirects(true); // this is true by default, but can be changed on a claswide basis FeedFetcherCache cache = getFeedInfoCache(); if (cache != null) { SyndFeedInfo syndFeedInfo = cache.getFeedInfo(feedUrl); setRequestHeaders(connection, syndFeedInfo); httpConnection.connect(); try { fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection); if (syndFeedInfo == null) { // this is a feed that hasn't been retrieved syndFeedInfo = new SyndFeedInfo(); retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection); } else { // check the response code int responseCode = httpConnection.getResponseCode(); if (responseCode != HttpURLConnection.HTTP_NOT_MODIFIED) { // the response code is not 304 NOT MODIFIED // This is either because the feed server // does not support condition gets // or because the feed hasn't changed retrieveAndCacheFeed(feedUrl, syndFeedInfo, httpConnection); } else { // the feed does not need retrieving fireEvent(FetcherEvent.EVENT_TYPE_FEED_UNCHANGED, connection); } } return syndFeedInfo.getSyndFeed(); } finally { httpConnection.disconnect(); } } else { fireEvent(FetcherEvent.EVENT_TYPE_FEED_POLLED, connection); InputStream inputStream = null; setRequestHeaders(connection, null); httpConnection.connect(); try { inputStream = httpConnection.getInputStream(); return getSyndFeedFromStream(inputStream, connection); } catch (java.io.IOException e) { handleErrorCodes(((HttpURLConnection)connection).getResponseCode()); } finally { if (inputStream != null) { inputStream.close(); } httpConnection.disconnect(); } // we will never actually get to this line return null; } } protected void retrieveAndCacheFeed(URL feedUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection connection) throws IllegalArgumentException, FeedException, FetcherException, IOException { handleErrorCodes(connection.getResponseCode()); resetFeedInfo(feedUrl, syndFeedInfo, connection); FeedFetcherCache cache = getFeedInfoCache(); // resetting feed info in the cache // could be needed for some implementations // of FeedFetcherCache (eg, distributed HashTables) if (cache != null) { cache.setFeedInfo(feedUrl, syndFeedInfo); } } protected void resetFeedInfo(URL orignalUrl, SyndFeedInfo syndFeedInfo, HttpURLConnection connection) throws IllegalArgumentException, IOException, FeedException { // need to always set the URL because this may have changed due to 3xx redirects syndFeedInfo.setUrl(connection.getURL()); // the ID is a persistant value that should stay the same even if the URL for the // feed changes (eg, by 3xx redirects) syndFeedInfo.setId(orignalUrl.toString()); // This will be 0 if the server doesn't support or isn't setting the last modified header syndFeedInfo.setLastModified(new Long(connection.getLastModified())); // This will be null if the server doesn't support or isn't setting the ETag header syndFeedInfo.setETag(connection.getHeaderField("ETag")); // get the contents InputStream inputStream = null; try { inputStream = connection.getInputStream(); SyndFeed syndFeed = getSyndFeedFromStream(inputStream, connection); String imHeader = connection.getHeaderField("IM"); if (isUsingDeltaEncoding() && (imHeader!= null && imHeader.indexOf("feed") >= 0)) { FeedFetcherCache cache = getFeedInfoCache(); if (cache != null && connection.getResponseCode() == 226) { // client is setup to use http delta encoding and the server supports it and has returned a delta encoded response // This response only includes new items SyndFeedInfo cachedInfo = cache.getFeedInfo(orignalUrl); if (cachedInfo != null) { SyndFeed cachedFeed = cachedInfo.getSyndFeed(); // set the new feed to be the orginal feed plus the new items syndFeed = combineFeeds(cachedFeed, syndFeed); } } } syndFeedInfo.setSyndFeed(syndFeed); } finally { if (inputStream != null) { inputStream.close(); } } } /** *

Set appropriate HTTP headers, including conditional get and gzip encoding headers

* * @param connection A URLConnection * @param syndFeedInfo The SyndFeedInfo for the feed to be retrieved. May be null */ protected void setRequestHeaders(URLConnection connection, SyndFeedInfo syndFeedInfo) { if (syndFeedInfo != null) { // set the headers to get feed only if modified // we support the use of both last modified and eTag headers if (syndFeedInfo.getLastModified() != null) { Object lastModified = syndFeedInfo.getLastModified(); if (lastModified instanceof Long) { connection.setIfModifiedSince(((Long)syndFeedInfo.getLastModified()).longValue()); } } if (syndFeedInfo.getETag() != null) { connection.setRequestProperty("If-None-Match", syndFeedInfo.getETag()); } } // header to retrieve feed gzipped connection.setRequestProperty("Accept-Encoding", "gzip"); // set the user agent connection.addRequestProperty("User-Agent", getUserAgent()); if (isUsingDeltaEncoding()) { connection.addRequestProperty("A-IM", "feed"); } } private SyndFeed readSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException { BufferedInputStream is; if ("gzip".equalsIgnoreCase(connection.getContentEncoding())) { // handle gzip encoded content is = new BufferedInputStream(new GZIPInputStream(inputStream)); } else { is = new BufferedInputStream(inputStream); } //InputStreamReader reader = new InputStreamReader(is, ResponseHandler.getCharacterEncoding(connection)); //SyndFeedInput input = new SyndFeedInput(); XmlReader reader = null; if (connection.getHeaderField("Content-Type") != null) { reader = new XmlReader(is, connection.getHeaderField("Content-Type"), true); } else { reader = new XmlReader(is, true); } SyndFeedInput syndFeedInput = new SyndFeedInput(); syndFeedInput.setPreserveWireFeed(isPreserveWireFeed()); return syndFeedInput.build(reader); } private SyndFeed getSyndFeedFromStream(InputStream inputStream, URLConnection connection) throws IOException, IllegalArgumentException, FeedException { SyndFeed feed = readSyndFeedFromStream(inputStream, connection); fireEvent(FetcherEvent.EVENT_TYPE_FEED_RETRIEVED, connection, feed); return feed; } /** * @return The FeedFetcherCache used by this fetcher (Could be null) */ public synchronized FeedFetcherCache getFeedInfoCache() { return feedInfoCache; } /** * @param cache The cache to be used by this fetcher (pass null to stop using a cache) */ public synchronized void setFeedInfoCache(FeedFetcherCache cache) { feedInfoCache = cache; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy