All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.tidalwave.semantic.rss.RssFeedReader Maven / Gradle / Ivy

/***********************************************************************************************************************
 *
 * blueBill Core - open source birding
 * Copyright (C) 2009-2011 by Tidalwave s.a.s. (http://www.tidalwave.it)
 *
 ***********************************************************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations under the License.
 *
 ***********************************************************************************************************************
 *
 * WWW: http://bluebill.tidalwave.it
 * SCM: https://java.net/hg/bluebill~core-src
 *
 **********************************************************************************************************************/
package it.tidalwave.semantic.rss;

import javax.annotation.Nonnull;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.io.InputStream;
import org.jaxen.XPath;
import it.tidalwave.util.Id;
import it.tidalwave.xml.XPathProvider;
import it.tidalwave.xml.XmlParser;
import it.tidalwave.xml.dom4j.Dom4jXmlParser;
import it.tidalwave.xml.jaxen.JaxenXPathProvider;
import it.tidalwave.semantic.document.Document;
import static it.tidalwave.semantic.rss.RssVocabulary.*;

/***********************************************************************************************************************
 *
 * @author  Fabrizio Giudici
 * @version $Id$
 *
 **********************************************************************************************************************/
public class RssFeedReader
  {
    /*******************************************************************************************************************
     *
     *
     ******************************************************************************************************************/
    @Nonnull
    public RssFeed readMessages (final @Nonnull InputStream is)
      throws Exception
      {
        final XmlParser xmlParser = new Dom4jXmlParser();
        final Object document = xmlParser.parse(is);
        final XPathProvider xPathProvider = new JaxenXPathProvider();

        final XPath channelExpr = xPathProvider.createXPath(document, "/rss/channel");
        final XPath titleExpr = xPathProvider.createXPath(document, "title");
        final XPath linkExpr = xPathProvider.createXPath(document, "link");
        final XPath descriptionExpr = xPathProvider.createXPath(document, "description");
        final XPath languageExpr = xPathProvider.createXPath(document, "language");
        final XPath copyrightExpr = xPathProvider.createXPath(document, "copyright");
        final XPath lastBuildDate = xPathProvider.createXPath(document, "lastBuildDate");
        final XPath generatorExpr = xPathProvider.createXPath(document, "generator");
        final XPath itemExpr = xPathProvider.createXPath(document, "item");

        final XPath pubDateExpr = xPathProvider.createXPath(document, "pubDate");
        final XPath dcCreatorExpr = xPathProvider.createXPath(document, "dc:creator");
        dcCreatorExpr.addNamespace("dc", "http://purl.org/dc/elements/1.1/");
        final XPath guidExpr = xPathProvider.createXPath(document, "guid");
        final XPath contentExpr = xPathProvider.createXPath(document, "content:encoded");
        contentExpr.addNamespace("content", "http://purl.org/rss/1.0/modules/content/");

        final Object channel = channelExpr.selectSingleNode(document);
        final List itemNodes = itemExpr.selectNodes(channel);

        final List messages = new ArrayList();

        // Tue, 29 Jun 2010 09:44:00 -0500
        final DateFormat df = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", new Locale("en"));

        for (final Object item : itemNodes)
          {
            final String link = linkExpr.stringValueOf(item);
            messages.add(new Document(new Id(link)).with(TITLE, titleExpr.stringValueOf(item))
                                                   .with(PUB_DATE, df.parse(pubDateExpr.stringValueOf(item)))
                                                   .with(DC_CREATOR, dcCreatorExpr.stringValueOf(item))
                                                   .with(LINK, link)
                                                   .with(GUID, guidExpr.stringValueOf(item))
                                                   .with(DESCRIPTION, descriptionExpr.stringValueOf(item))
                                                   .with(CONTENT, contentExpr.stringValueOf(item)));
          }

        final String link = linkExpr.stringValueOf(channel);
        final RssFeed rssFeed = new RssFeed(new Id(link)).with(TITLE, titleExpr.stringValueOf(channel))
                                                         .with(LINK, link)
                                                         .with(DESCRIPTION, descriptionExpr.stringValueOf(channel))
                                                         .with(LANGUAGE, languageExpr.stringValueOf(channel))
                                                         .with(COPYRIGHT, copyrightExpr.stringValueOf(channel))
                                                         .with(LAST_BUILD_DATE, df.parse(lastBuildDate.stringValueOf(channel)))
                                                         .with(GENERATOR, generatorExpr.stringValueOf(channel))
                                                         .with(MESSAGES, messages);

        return rssFeed;
      }
  }