com.sun.syndication.io.impl.Atom10Parser Maven / Gradle / Ivy
/*
* Copyright 2004 Sun Microsystems, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.sun.syndication.io.impl;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Namespace;
import org.jdom2.output.XMLOutputter;
import com.sun.syndication.feed.WireFeed;
import com.sun.syndication.feed.atom.Category;
import com.sun.syndication.feed.atom.Content;
import com.sun.syndication.feed.atom.Entry;
import com.sun.syndication.feed.atom.Feed;
import com.sun.syndication.feed.atom.Generator;
import com.sun.syndication.feed.atom.Link;
import com.sun.syndication.feed.atom.Person;
import com.sun.syndication.io.FeedException;
import com.sun.syndication.io.WireFeedInput;
import com.sun.syndication.io.WireFeedOutput;
import java.io.IOException;
import java.io.Reader;
import java.net.MalformedURLException;
import java.util.regex.Pattern;
import org.jdom2.Attribute;
import org.jdom2.JDOMException;
import org.jdom2.Parent;
import org.jdom2.input.SAXBuilder;
/**
* Parser for Atom 1.0
* @author Dave Johnson
*/
public class Atom10Parser extends BaseWireFeedParser {
private static final String ATOM_10_URI = "http://www.w3.org/2005/Atom";
private static final Namespace ATOM_10_NS = Namespace.getNamespace(ATOM_10_URI);
private static boolean resolveURIs = false;
public static void setResolveURIs(boolean resolveURIs) {
Atom10Parser.resolveURIs = resolveURIs;
}
public static boolean getResolveURIs() {
return resolveURIs;
}
public Atom10Parser() {
this("atom_1.0");
}
protected Atom10Parser(String type) {
super(type, ATOM_10_NS);
}
protected Namespace getAtomNamespace() {
return ATOM_10_NS;
}
public boolean isMyType(Document document) {
Element rssRoot = document.getRootElement();
Namespace defaultNS = rssRoot.getNamespace();
return (defaultNS!=null) && defaultNS.equals(getAtomNamespace());
}
public WireFeed parse(Document document, boolean validate)
throws IllegalArgumentException,FeedException {
if (validate) {
validateFeed(document);
}
Element rssRoot = document.getRootElement();
return parseFeed(rssRoot);
}
protected void validateFeed(Document document) throws FeedException {
// TBD
// here we have to validate the Feed against a schema or whatever
// not sure how to do it
// one posibility would be to produce an ouput and attempt to parse it again
// with validation turned on.
// otherwise will have to check the document elements by hand.
}
protected WireFeed parseFeed(Element eFeed) throws FeedException {
String baseURI = null;
try {
baseURI = findBaseURI(eFeed);
} catch (Exception e) {
throw new FeedException("ERROR while finding base URI of feed", e);
}
Feed feed = parseFeedMetadata(baseURI, eFeed);
String xmlBase = eFeed.getAttributeValue("base", Namespace.XML_NAMESPACE);
if (xmlBase != null) {
feed.setXmlBase(xmlBase);
}
feed.setModules(parseFeedModules(eFeed));
List eList = eFeed.getChildren("entry",getAtomNamespace());
if (eList.size()>0) {
feed.setEntries(parseEntries(feed, baseURI, eList));
}
List foreignMarkup =
extractForeignMarkup(eFeed, feed, getAtomNamespace());
if (foreignMarkup.size() > 0) {
feed.setForeignMarkup(foreignMarkup);
}
return feed;
}
private Feed parseFeedMetadata(String baseURI, Element eFeed) {
com.sun.syndication.feed.atom.Feed feed =
new com.sun.syndication.feed.atom.Feed(getType());
Element e = eFeed.getChild("title",getAtomNamespace());
if (e!=null) {
Content c = new Content();
c.setValue(parseTextConstructToString(e));
c.setType(getAttributeValue(e, "type"));
feed.setTitleEx(c);
}
List eList = eFeed.getChildren("link",getAtomNamespace());
feed.setAlternateLinks(parseAlternateLinks(feed, null, baseURI, eList));
feed.setOtherLinks(parseOtherLinks(feed, null, baseURI, eList));
List cList = eFeed.getChildren("category",getAtomNamespace());
feed.setCategories(parseCategories(baseURI, cList));
eList = eFeed.getChildren("author", getAtomNamespace());
if (eList.size()>0) {
feed.setAuthors(parsePersons(baseURI, eList));
}
eList = eFeed.getChildren("contributor",getAtomNamespace());
if (eList.size()>0) {
feed.setContributors(parsePersons(baseURI, eList));
}
e = eFeed.getChild("subtitle",getAtomNamespace());
if (e!=null) {
Content subtitle = new Content();
subtitle.setValue(parseTextConstructToString(e));
subtitle.setType(getAttributeValue(e, "type"));
feed.setSubtitle(subtitle);
}
e = eFeed.getChild("id",getAtomNamespace());
if (e!=null) {
feed.setId(e.getText());
}
e = eFeed.getChild("generator",getAtomNamespace());
if (e!=null) {
Generator gen = new Generator();
gen.setValue(e.getText());
String att = getAttributeValue(e, "uri");
if (att!=null) {
gen.setUrl(att);
}
att = getAttributeValue(e, "version");
if (att!=null) {
gen.setVersion(att);
}
feed.setGenerator(gen);
}
e = eFeed.getChild("rights",getAtomNamespace());
if (e!=null) {
feed.setRights(parseTextConstructToString(e));
}
e = eFeed.getChild("icon",getAtomNamespace());
if (e!=null) {
feed.setIcon(e.getText());
}
e = eFeed.getChild("logo",getAtomNamespace());
if (e!=null) {
feed.setLogo(e.getText());
}
e = eFeed.getChild("updated",getAtomNamespace());
if (e!=null) {
feed.setUpdated(DateParser.parseDate(e.getText()));
}
return feed;
}
private Link parseLink(Feed feed , Entry entry, String baseURI, Element eLink) {
Link link = new Link();
String att = getAttributeValue(eLink, "rel");
if (att!=null) {
link.setRel(att);
}
att = getAttributeValue(eLink, "type");
if (att!=null) {
link.setType(att);
}
att = getAttributeValue(eLink, "href");
if (att!=null) {
link.setHref(att);
if (isRelativeURI(att)) {
link.setHrefResolved(resolveURI(baseURI, eLink, att));
}
}
att = getAttributeValue(eLink, "title");
if (att!=null) {
link.setTitle(att);
}
att = getAttributeValue(eLink, "hreflang");
if (att!=null) {
link.setHreflang(att);
}
att = getAttributeValue(eLink, "length");
if (att!=null) {
Long val = NumberParser.parseLong(att);
if (val != null) {
link.setLength(val.longValue());
}
}
return link;
}
// List(Elements) -> List(Link)
private List parseAlternateLinks(Feed feed, Entry entry, String baseURI, List eLinks) {
List links = new ArrayList();
for (int i=0;i0) ? links : null;
}
private List parseOtherLinks(Feed feed, Entry entry, String baseURI, List eLinks) {
List links = new ArrayList();
for (int i=0;i0) ? links : null;
}
private Person parsePerson(String baseURI, Element ePerson) {
Person person = new Person();
Element e = ePerson.getChild("name",getAtomNamespace());
if (e!=null) {
person.setName(e.getText());
}
e = ePerson.getChild("uri",getAtomNamespace());
if (e!=null) {
person.setUri(e.getText());
if (isRelativeURI(e.getText())) {
person.setUriResolved(resolveURI(baseURI, ePerson, e.getText()));
}
}
e = ePerson.getChild("email",getAtomNamespace());
if (e!=null) {
person.setEmail(e.getText());
}
person.setModules(parsePersonModules(ePerson));
return person;
}
// List(Elements) -> List(Persons)
private List parsePersons(String baseURI, List ePersons) {
List persons = new ArrayList();
for (int i=0;i0) ? persons : null;
}
private Content parseContent(Element e) {
String value = parseTextConstructToString(e);
String src = getAttributeValue(e, "src");
String type = getAttributeValue(e, "type");
Content content = new Content();
content.setSrc(src);
content.setType(type);
content.setValue(value);
return content;
}
private String parseTextConstructToString(Element e) {
String value = null;
String type = getAttributeValue(e, "type");
type = (type!=null) ? type : Content.TEXT;
if (type.equals(Content.XHTML) || (type.indexOf("/xml")) != -1 || (type.indexOf("+xml")) != -1) {
// XHTML content needs special handling
XMLOutputter outputter = new XMLOutputter();
List eContent = e.getContent();
Iterator i = eContent.iterator();
while (i.hasNext()) {
org.jdom2.Content c = (org.jdom2.Content) i.next();
if (c instanceof Element) {
Element eC = (Element) c;
if (eC.getNamespace().equals(getAtomNamespace())) {
((Element)c).setNamespace(Namespace.NO_NAMESPACE);
}
}
}
value = outputter.outputString(eContent);
} else {
// Everything else comes in verbatim
value = e.getText();
}
return value;
}
// List(Elements) -> List(Entries)
protected List parseEntries(Feed feed, String baseURI, List eEntries) {
List entries = new ArrayList();
for (int i=0;i0) ? entries : null;
}
protected Entry parseEntry(Feed feed, Element eEntry, String baseURI) {
Entry entry = new Entry();
String xmlBase = eEntry.getAttributeValue("base", Namespace.XML_NAMESPACE);
if (xmlBase != null) {
entry.setXmlBase(xmlBase);
}
Element e = eEntry.getChild("title",getAtomNamespace());
if (e!=null) {
Content c = new Content();
c.setValue(parseTextConstructToString(e));
c.setType(getAttributeValue(e, "type"));
entry.setTitleEx(c);
}
List eList = eEntry.getChildren("link",getAtomNamespace());
entry.setAlternateLinks(parseAlternateLinks(feed, entry, baseURI, eList));
entry.setOtherLinks(parseOtherLinks(feed, entry, baseURI, eList));
eList = eEntry.getChildren("author", getAtomNamespace());
if (eList.size()>0) {
entry.setAuthors(parsePersons(baseURI, eList));
}
eList = eEntry.getChildren("contributor",getAtomNamespace());
if (eList.size()>0) {
entry.setContributors(parsePersons(baseURI, eList));
}
e = eEntry.getChild("id",getAtomNamespace());
if (e!=null) {
entry.setId(e.getText());
}
e = eEntry.getChild("updated",getAtomNamespace());
if (e!=null) {
entry.setUpdated(DateParser.parseDate(e.getText()));
}
e = eEntry.getChild("published",getAtomNamespace());
if (e!=null) {
entry.setPublished(DateParser.parseDate(e.getText()));
}
e = eEntry.getChild("summary",getAtomNamespace());
if (e!=null) {
entry.setSummary(parseContent(e));
}
e = eEntry.getChild("content",getAtomNamespace());
if (e!=null) {
List contents = new ArrayList();
contents.add(parseContent(e));
entry.setContents(contents);
}
e = eEntry.getChild("rights",getAtomNamespace());
if (e!=null) {
entry.setRights(e.getText());
}
List cList = eEntry.getChildren("category",getAtomNamespace());
entry.setCategories(parseCategories(baseURI, cList));
// TODO: SHOULD handle Atom entry source element
e = eEntry.getChild("source", getAtomNamespace());
if (e!=null) {
entry.setSource(parseFeedMetadata(baseURI, e));
}
entry.setModules(parseItemModules(eEntry));
List foreignMarkup =
extractForeignMarkup(eEntry, entry, getAtomNamespace());
if (foreignMarkup.size() > 0) {
entry.setForeignMarkup(foreignMarkup);
}
return entry;
}
private List parseCategories(String baseURI, List eCategories) {
List cats = new ArrayList();
for (int i=0;i0) ? cats : null;
}
private Category parseCategory(String baseURI, Element eCategory) {
Category category = new Category();
String att = getAttributeValue(eCategory, "term");
if (att!=null) {
category.setTerm(att);
}
att = getAttributeValue(eCategory, "scheme");
if (att!=null) {
category.setScheme(att);
if (isRelativeURI(att)) {
category.setSchemeResolved(resolveURI(baseURI, eCategory, att));
}
}
att = getAttributeValue(eCategory, "label");
if (att!=null) {
category.setLabel(att);
}
return category;
}
// Once following relative URI methods are made public in the ROME
// Atom10Parser, then use them instead and delete these.
// Fix for issue #34 "valid IRI href attributes are stripped for atom:link"
// URI's that didn't start with http were being treated as relative URIs.
// So now consider an absolute URI to be any alpha-numeric string followed
// by a colon, followed by anything -- specified by this regex:
static Pattern absoluteURIPattern = Pattern.compile("^[a-z0-9]*:.*$");
public static boolean isAbsoluteURI(String uri) {
return absoluteURIPattern.matcher(uri).find();
}
/** Returns true if URI is relative. */
public static boolean isRelativeURI(String uri) {
return !isAbsoluteURI(uri);
}
/**
* Resolve URI via base URL and parent element.
* Resolve URI based considering xml:base and baseURI.
* @param baseURI Base URI used to fetch the XML document
* @param parent Parent element from which to consider xml:base
* @param url URL to be resolved
*/
public static String resolveURI(String baseURI, Parent parent, String url) {
if (!resolveURIs) {
return url;
}
if (isRelativeURI(url)) {
url = (!".".equals(url) && !"./".equals(url)) ? url : "";
if (url.startsWith("/") && baseURI != null) {
String base = null;
int slashslash = baseURI.indexOf("//");
int nextslash = baseURI.indexOf("/", slashslash + 2);
if (nextslash != -1) base = baseURI.substring(0, nextslash);
return formURI(base, url);
}
// Relative URI with parent
if (parent != null && parent instanceof Element) {
// Do we have an xml:base?
String xmlbase = ((Element)parent).getAttributeValue(
"base", Namespace.XML_NAMESPACE);
if (xmlbase != null && xmlbase.trim().length() > 0) {
if (isAbsoluteURI(xmlbase)) {
// Absolute xml:base, so form URI right now
if (url.startsWith("/")) {
// Host relative URI
int slashslash = xmlbase.indexOf("//");
int nextslash = xmlbase.indexOf("/", slashslash + 2);
if (nextslash != -1) xmlbase = xmlbase.substring(0, nextslash);
return formURI(xmlbase, url);
}
if (!xmlbase.endsWith("/")) {
// Base URI is filename, strip it off
xmlbase = xmlbase.substring(0, xmlbase.lastIndexOf("/"));
}
return formURI(xmlbase, url);
} else {
// Relative xml:base, so walk up tree
return resolveURI(baseURI, parent.getParent(),
stripTrailingSlash(xmlbase) + "/"+ stripStartingSlash(url));
}
}
// No xml:base so walk up tree
return resolveURI(baseURI, parent.getParent(), url);
// Relative URI with no parent (i.e. top of tree), so form URI right now
} else if (parent == null || parent instanceof Document) {
return formURI(baseURI, url);
}
}
return url;
}
/**
* Find base URI of feed considering relative URIs.
* @param root Root element of feed.
*/
private String findBaseURI(Element root) throws MalformedURLException {
String ret = null;
if (findAtomLink(root, "self") != null) {
ret = findAtomLink(root, "self");
if (".".equals(ret) || "./".equals(ret)) ret = "";
if (ret.indexOf("/") != -1) ret = ret.substring(0, ret.lastIndexOf("/"));
ret = resolveURI(null, root, ret);
}
return ret;
}
/**
* Return URL string of Atom link element under parent element.
* Link with no rel attribute is considered to be rel="alternate"
* @param parent Consider only children of this parent element
* @param rel Consider only links with this relationship
*/
private String findAtomLink(Element parent, String rel) {
String ret = null;
List linksList = parent.getChildren("link", ATOM_10_NS);
if (linksList != null) {
for (Iterator links = linksList.iterator(); links.hasNext(); ) {
Element link = (Element)links.next();
Attribute relAtt = getAttribute(link, "rel");
Attribute hrefAtt = getAttribute(link, "href");
if ( (relAtt == null && "alternate".equals(rel))
|| (relAtt != null && relAtt.getValue().equals(rel))) {
ret = hrefAtt.getValue();
break;
}
}
}
return ret;
}
/**
* Form URI by combining base with append portion and giving
* special consideration to append portions that begin with ".."
* @param base Base of URI, may end with trailing slash
* @param append String to append, may begin with slash or ".."
*/
private static String formURI(String base, String append) {
base = stripTrailingSlash(base);
append = stripStartingSlash(append);
if (append.startsWith("..")) {
String ret = null;
String[] parts = append.split("/");
for (int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy