All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.mule.module.rss.transformers.RssParser Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) MuleSoft, Inc.  All rights reserved.  http://www.mulesoft.com
 * The software in this package is published under the terms of the CPAL v1.0
 * license, a copy of which has been included with this distribution in the
 * LICENSE.txt file.
 */

package org.mule.module.rss.transformers;


import static org.jdom2.Namespace.NO_NAMESPACE;

import com.rometools.rome.feed.WireFeed;
import com.rometools.rome.feed.rss.Content;
import com.rometools.rome.feed.rss.Description;
import com.rometools.rome.feed.rss.Guid;
import com.rometools.rome.feed.rss.Item;
import com.rometools.rome.feed.rss.Source;
import com.rometools.rome.io.FeedException;
import com.rometools.rome.io.WireFeedParser;

import static com.rometools.rome.io.impl.DateParser.parseDate;

import com.rometools.rome.io.impl.RSS20Parser;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;

import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Namespace;

/**
 * RSS20Parser considers all the elements added in the last standard (and earlier standards).
 * However Rome doesn't support the use of namespaces although this feature was added in RSS 1.0 Standard.
 * Therefore, RssParser calls in each method the parent method first, and then checks if elements that haven't been parsed by the parent Parser exist but were ignored because of namespaces.
 */
public class RssParser extends RSS20Parser implements WireFeedParser
{

    private List namespaces;

    @Override
    public String getType()
    {
        return "rss_2.0";
    }

    @Override
    public boolean isMyType(Document document)
    {
        return true;
    }

    @Override
    protected Namespace getRSSNamespace()
    {
        return NO_NAMESPACE;
    }

    @Override
    public Item parseItem(final Element rssRoot, final Element itemElement, final Locale locale)
    {
        final Item item = super.parseItem(rssRoot, itemElement, locale);
        parseTitle(item, itemElement);
        parseLink(item, itemElement);
        parseDescription(item, itemElement);
        parseAuthor(item, itemElement);
        parseComments(item, itemElement);
        parseSource(item, itemElement);
        parseGuid(item, itemElement);
        parsePubDate(item, itemElement, locale);
        parseExpirationDate(item, itemElement, locale);
        parseContent(item, itemElement);
        parseCategories(item, itemElement);

        return item;
    }

    private Element parseChildWithNamespaces(Element parentElement, String childName)
    {
        Element element = null;
        Iterator namespacesIterator = namespaces.iterator();

        while (namespacesIterator.hasNext() && element == null)
        {
            Namespace namespace = (Namespace) namespacesIterator.next();
            element = parentElement.getChild(childName, namespace);
        }

        return element;
    }

    private List parseChildrenWithNamespaces(Element parentElement, String childName)
    {
        List elements = new ArrayList<>();
        Iterator namespacesIterator = namespaces.iterator();

        while (namespacesIterator.hasNext() && elements.size() == 0)
        {
            Namespace namespace = (Namespace) namespacesIterator.next();
            elements = parentElement.getChildren(childName, namespace);
        }

        return elements;
    }

    @Override
    public WireFeed parse(final Document document, final boolean validate, final Locale locale) throws IllegalArgumentException, FeedException
    {
        final Element rssRoot = document.getRootElement();
        namespaces = new ArrayList<>(rssRoot.getAdditionalNamespaces());
        namespaces.add(NO_NAMESPACE);
        return parseChannel(rssRoot, locale);
    }

    private void parseTitle(Item item, final Element itemElement)
    {
        if (item.getTitle() == null)
        {
            final Element title = parseChildWithNamespaces(itemElement, "title");
            if (title != null)
            {
                item.setTitle(title.getText());
            }
        }
    }

    private void parseLink(Item item, final Element itemElement)
    {
        if (item.getLink() == null)
        {
            final Element link = parseChildWithNamespaces(itemElement, "link");
            if (link != null)
            {
                item.setLink(link.getText());
                item.setUri(link.getText());
            }
        }
    }

    private void parseDescription(Item item, final Element itemElement)
    {
        if (item.getDescription() == null)
        {
            final Element description = parseChildWithNamespaces(itemElement, "description");
            if (description != null)
            {
                Description descriptionBean = new Description();
                descriptionBean.setValue(description.getText());
                item.setDescription(descriptionBean);
                final String type = description.getAttributeValue("type");
                if (type != null)
                {
                    item.getDescription().setType(type);
                }
            }
        }
    }

    private void parseAuthor(Item item, final Element itemElement)
    {
        if (item.getAuthor() == null)
        {
            final Element author = parseChildWithNamespaces(itemElement, "author");

            if (author != null)
            {
                item.setAuthor(author.getText());
            }

        }
    }

    private void parseComments(Item item, final Element itemElement)
    {
        if (item.getComments() == null)
        {
            final Element comments = parseChildWithNamespaces(itemElement, "comments");

            if (comments != null)
            {
                item.setComments(comments.getText());
            }
        }
    }

    private void parseSource(Item item, final Element itemElement)
    {
        if (item.getSource() == null)
        {
            final Element source = parseChildWithNamespaces(itemElement, "source");
            if (source != null)
            {
                Source sourceBean = new Source();
                sourceBean.setValue(source.getText());
                final String url = source.getAttributeValue("url");
                sourceBean.setUrl(url);
                item.setSource(sourceBean);
            }
        }
    }

    private void parseGuid(Item item, final Element itemElement)
    {
        if (item.getGuid() == null)
        {
            final Element guid = parseChildWithNamespaces(itemElement, "guid");
            if (guid != null)
            {
                Guid guidBean = new Guid();
                final String isPermaLink = guid.getAttributeValue("isPermaLink");

                if (isPermaLink != null)
                {
                    guidBean.setPermaLink(isPermaLink.equalsIgnoreCase("true"));
                }
                guidBean.setValue(guid.getText());
                item.setGuid(guidBean);
            }
        }
    }

    private void parsePubDate(Item item, final Element itemElement, Locale locale)
    {
        if (item.getPubDate() == null)
        {
            final Element pubDate = parseChildWithNamespaces(itemElement, "pubDate");

            if (pubDate != null)
            {
                item.setPubDate(parseDate(pubDate.getText(), locale));
            }
        }
    }

    private void parseExpirationDate(Item item, final Element itemElement, Locale locale)
    {
        if (item.getExpirationDate() == null)
        {
            final Element expirationDate = parseChildWithNamespaces(itemElement, "expirationDate");
            if (expirationDate != null)
            {
                item.setExpirationDate(parseDate(expirationDate.getText(), locale));
            }
        }
    }

    private void parseContent(Item item, final Element itemElement)
    {
        if (item.getContent() == null)
        {
            final Element encoded = parseChildWithNamespaces(itemElement, "encoded");
            if (encoded != null)
            {
                final Content content = new Content();
                content.setType(Content.HTML);
                content.setValue(encoded.getText());
                item.setContent(content);
            }
        }
    }

    private void parseCategories(Item item, final Element itemElement)
    {
        if (item.getCategories() != null && item.getCategories().size() == 0)
        {
            final List categories = parseChildrenWithNamespaces(itemElement, "category");
            item.setCategories(parseCategories(categories));
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy