All Downloads are FREE. Search and download functionalities are using the official Maven repository.

be.ceau.podcastparser.namespace.custom.impl.Media Maven / Gradle / Ivy

/*
	Copyright 2018 Marceau Dewilde 
	
	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at
	
		https://www.apache.org/licenses/LICENSE-2.0
	
	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
*/
package be.ceau.podcastparser.namespace.custom.impl;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;

import be.ceau.podcastparser.PodcastParserContext;
import be.ceau.podcastparser.models.core.Item;
import be.ceau.podcastparser.models.support.Category;
import be.ceau.podcastparser.models.support.Copyright;
import be.ceau.podcastparser.models.support.Credit;
import be.ceau.podcastparser.models.support.Hash;
import be.ceau.podcastparser.models.support.Image;
import be.ceau.podcastparser.models.support.License;
import be.ceau.podcastparser.models.support.MediaContent;
import be.ceau.podcastparser.models.support.MediaPlayer;
import be.ceau.podcastparser.models.support.Rating;
import be.ceau.podcastparser.models.support.Scene;
import be.ceau.podcastparser.models.support.Transcript;
import be.ceau.podcastparser.models.support.TypedString;
import be.ceau.podcastparser.namespace.Namespace;
import be.ceau.podcastparser.util.Durations;
import be.ceau.podcastparser.util.Strings;
import be.ceau.podcastparser.util.UnmodifiableSet;

/**
 * 

Media RSS Specification

* *

* An RSS module that supplements the <enclosure> element capabilities of RSS 2.0 to allow for * more robust media syndication. *

* *

* Primary elements *

*
    *
  • {@code } sub-element of {@code }. It allows grouping of * {@code } elements that are effectively the same content, yet different * representations *
  • {@code } is a sub-element of either {@code } or {@code }. * It contains 14 attributes, most of which are optional. *
      *
    • url should specify the direct URL to the media object. If not included, a * {@code } element must be specified. * *
    • fileSize is the number of bytes of the media object. It is an optional attribute. * *
    • type is the standard MIME type of the object. It is an optional attribute. * *
    • medium is the type of object (image | audio | video | document | executable). While this * attribute can at times seem redundant if type is supplied, it is included because it simplifies * decision making on the reader side, as well as flushes out any ambiguities between MIME type and * object type. It is an optional attribute. * *
    • isDefault determines if this is the default object that should be used for the * {@code }. There should only be one default object per {@code }. It is * an optional attribute. * *
    • expression determines if the object is a sample or the full version of the object, or even if * it is a continuous stream (sample | full | nonstop). Default value is "full". It is an optional * attribute. * *
    • bitrate is the kilobits per second rate of media. It is an optional attribute. * *
    • framerate is the number of frames per second for the media object. It is an optional * attribute. * *
    • samplingrate is the number of samples per second taken to create the media object. It is * expressed in thousands of samples per second (kHz). It is an optional attribute. * *
    • channels is number of audio channels in the media object. It is an optional attribute. * *
    • duration is the number of seconds the media object plays. It is an optional attribute. * *
    • height is the height of the media object. It is an optional attribute. * *
    • width is the width of the media object. It is an optional attribute. * *
    • lang is the primary language encapsulated in the media object. Language codes possible are * detailed in RFC 3066. This attribute is used similar to the xml:lang attribute detailed in the * XML 1.0 Specification (Third Edition). It is an optional attribute. *
    *
* * @see Media RSS Specification */ public class Media implements Namespace { private static final String NAME = "http://search.yahoo.com/mrss/"; private static final Set ALTERNATIVE_NAMES = UnmodifiableSet.of( "http://search.yahoo.com/mrss", "http://www.rssboard.org/media-rss"); @Override public String getName() { return NAME; } @Override public Set getAlternativeNames() { return ALTERNATIVE_NAMES; } @Override public void process(PodcastParserContext ctx) throws XMLStreamException { String localName = ctx.getReader().getLocalName(); switch (localName) { case "category": ctx.getFeed().addCategory(parseCategory(ctx)); break; case "copyright": ctx.getFeed().setCopyright(parseCopyright(ctx)); break; case "credit": ctx.getFeed().setCredit(parseCredit(ctx)); break; case "description": ctx.getFeed().setDescription(parseDescription(ctx)); break; case "keywords": ctx.getFeed().addKeywords(parseKeywords(ctx)); break; case "rating": ctx.getFeed().setRating(parseRating(ctx)); break; case "thumbnail": ctx.getFeed().addImage(parseImage(ctx)); break; default: Namespace.super.process(ctx); break; } } @Override public void process(PodcastParserContext ctx, Item item) throws XMLStreamException { switch (ctx.getReader().getLocalName()) { case "adult": // This is deprecated and has been replaced with 'rating' item.getRating().setText(ctx.getElementText()); item.getRating().setScheme("urn:simple"); break; case "category": item.addCategory(parseCategory(ctx)); break; case "content": item.addMediaContent(parseMediaContent(ctx)); break; case "description": item.setDescription(parseDescription(ctx)); break; case "group": // do not map this as a separate model // extract content elements and add to item directly item.addMediaContents(parseMediaGroup(ctx)); break; case "hash": item.addHash(parseHash(ctx)); break; case "keywords": ctx.getFeed().addKeywords(parseKeywords(ctx)); break; case "rating": item.setRating(parseRating(ctx)); break; case "thumbnail": item.addImage(parseImage(ctx)); break; case "title": item.setTitle(parseTitle(ctx)); break; case "player": item.setMediaPlayer(parseMediaPlayer(ctx)); break; case "credit": item.addCredit(parseCredit(ctx)); break; case "copyright": item.setCopyright(parseCopyright(ctx)); break; case "text": item.addTranscript(parseText(ctx)); break; case "restriction": Namespace.super.process(ctx, item); break; case "community": /* * This element stands for the community related content. This allows inclusion of the user * perception about a media object in the form of view count, ratings and tags. * * * news: 5, abc:3, reuters * * * starRating This element specifies the rating-related information about a media object. Valid * attributes are average, count, min and max. * * statistics This element specifies various statistics about a media object like the view count and * the favorite count. Valid attributes are views and favorites. * * tags This element contains user-generated tags separated by commas in the decreasing order of * each tag's weight. Each tag can be assigned an integer weight in tag_name:weight format. It's up * to the provider to choose the way weight is determined for a tag; for example, number of * occurences can be one way to decide weight of a particular tag. Default weight is 1. */ Namespace.super.process(ctx, item); break; case "comments": /* * Allows inclusion of all the comments a media object has received. */ Namespace.super.process(ctx, item); break; case "embed": /* * Sometimes player-specific embed code is needed for a player to play any video. * allows inclusion of such information in the form of key-value pairs. */ Namespace.super.process(ctx, item); break; case "responses": /* * Allows inclusion of a list of all media responses a media object has received. */ Namespace.super.process(ctx, item); break; case "backLinks": /* * Allows inclusion of all the URLs pointing to a media object. */ Namespace.super.process(ctx, item); break; case "status": /* * Optional tag to specify the status of a media object -- whether it's still active or it has been * blocked/deleted. * * * * state can have values "active", "blocked" or "deleted". "active" means a media object is active * in the system, "blocked" means a media object is blocked by the publisher, "deleted" means a * media object has been deleted by the publisher. * * reason is a reason explaining why a media object has been blocked/deleted. It can be plain text * or a URL. */ Namespace.super.process(ctx, item); break; case "price": /* * Optional tag to include pricing information about a media object. If this tag is not present, the * media object is supposed to be free. One media object can have multiple instances of this tag for * including different pricing structures. The presence of this tag would mean that media object is * not free. * * * * * * * * type Valid values are "rent", "purchase", "package" or "subscription". If nothing is specified, * then the media is free. * * info if the type is "package" or "subscription", then info is a URL pointing to package or * subscription information. This is an optional attribute. * * price is the price of the media object. This is an optional attribute. * * currency -- use [ISO 4217] for currency codes. This is an optional attribute. */ Namespace.super.process(ctx, item); break; case "license": item.setLicense(parseLicense(ctx)); break; case "subTitle": /* * Optional element for subtitle/CC link. It contains type and language attributes. Language is * based on RFC 3066. There can be more than one such tag per media element, for example one per * language. Please refer to Timed Text spec - W3C for more information on Timed Text and Real Time * Subtitling. */ Namespace.super.process(ctx, item); break; case "peerLink": /* * Optional element for P2P link. * * * * For a valid Media RSS item, at least one of the following links is required: * * media:content media:player media:peerLink media:location * * Optional element to specify geographical information about various locations captured in the * content of a media object. The format conforms to geoRSS. * * * 35.669998 139.770004 * * description description of the place whose location is being specified. * * start time at which the reference to a particular location starts in the media object. * * end time at which the reference to a particular location ends in the media object. */ Namespace.super.process(ctx, item); break; case "rights": item.setRights(parseRights(ctx)); break; case "scenes": parseScenes(ctx).forEach(item::addScene); break; default: Namespace.super.process(ctx, item); break; } } /* * Allows a taxonomy to be set that gives an indication of the type of media content, and its * particular contents. */ private Category parseCategory(PodcastParserContext ctx) throws XMLStreamException { Category category = new Category(); String scheme = ctx.getAttribute("scheme"); if (Strings.isBlank(scheme)) { // scheme is an optional attribute. If not included, the default is // "http://search.yahoo.com/mrss/category_schema". scheme = "http://search.yahoo.com/mrss/category_schema"; } category.setScheme(scheme); category.setLabel(ctx.getAttribute("label")); category.setName(ctx.getElementText()); return category; } private Copyright parseCopyright(PodcastParserContext ctx) throws XMLStreamException { Copyright copyright = new Copyright(); copyright.setUrl(ctx.getAttribute("url")); copyright.setText(ctx.getElementText()); return copyright; } /* * Notable entity and the contribution to the creation of the media object. Current entities can * include people, companies, locations, etc. Specific entities can have multiple roles, and several * entities can have the same role. These should appear as distinct elements. It has * two optional attributes. * * entity name * * copyright holder of the entity * * role specifies the role the entity played. Must be lowercase. It is an optional attribute. * * scheme is the URI that identifies the role scheme. It is an optional attribute and possible * values for this attribute are ( urn:ebu | urn:yvs ) . The default scheme is "urn:ebu". The list * of roles supported under urn:ebu scheme can be found at European Broadcasting Union Role Codes. * The roles supported under urn:yvs scheme are ( uploader | owner ). */ private Credit parseCredit(PodcastParserContext ctx) throws XMLStreamException { Credit credit = new Credit(); credit.setScheme(ctx.getAttribute("scheme")); credit.setRole(ctx.getAttribute("role")); credit.setEntity(ctx.getElementText()); return credit; } /** * Short description describing the media object typically a sentence in length. It has one optional * attribute. {@code * This was some really bizarre band I listened to as a young * lad. * } type specifies the type of text embedded. Possible values are either "plain" or "html". Default * value is "plain". All HTML must be entity-encoded. It is an optional attribute. * * @return */ private TypedString parseDescription(PodcastParserContext ctx) throws XMLStreamException { TypedString typedString = new TypedString(); if ("html".equals(ctx.getAttribute("type"))) { typedString.setType("html"); } else { typedString.setType("plain"); } typedString.setText(ctx.getElementText()); return typedString; } /* * This is the hash of the binary media file. It can appear multiple times as long as each instance * is a different algo. */ private Hash parseHash(PodcastParserContext ctx) throws XMLStreamException { Hash hash = new Hash(); hash.setAlgo(ctx.getAttribute("algo")); hash.setHash(ctx.getElementText()); return hash; } /* * Allows particular images to be used as representative images for the media object. If multiple * thumbnails are included, and time coding is not at play, it is assumed that the images are in * order of importance. It has one required attribute and three optional attributes. */ private Image parseImage(PodcastParserContext ctx) throws XMLStreamException { Image image = new Image(); image.setUrl(ctx.getAttribute("url")); image.setWidth(ctx.getAttributeAsInteger("width")); image.setHeight(ctx.getAttributeAsInteger("height")); image.setTime(ctx.getAttributeAsLong("time")); return image; } /* * Comma-delimited keywords describing the media object with typically a maximum of 10 words. */ private List parseKeywords(PodcastParserContext ctx) throws XMLStreamException { return Strings.splitOnComma(ctx.getElementText()); } /* * Optional link to specify the machine-readable license associated with the content. * * Creative * Commons Attribution 3.0 United States License */ private License parseLicense(PodcastParserContext ctx) throws XMLStreamException { String type = ctx.getAttribute("type"); String label = ctx.getAttribute("label"); License license = new License(); license.setHref(ctx.getElementText()); license.setType(type); license.setLabel(label); return license; } private MediaContent parseMediaContent(PodcastParserContext ctx) throws XMLStreamException { MediaContent mediaContent = new MediaContent(); mediaContent.setUrl(ctx.getAttribute("url")); mediaContent.setFileSize(ctx.getAttributeAsLong("fileSize")); mediaContent.setType(ctx.getAttribute("type")); mediaContent.setMedium(ctx.getAttribute("medium")); mediaContent.setIsDefault(ctx.getAttribute("isDefault")); mediaContent.setBitrate(ctx.getAttributeAsLong("bitrate")); mediaContent.setFramerate(ctx.getAttributeAsLong("framerate")); mediaContent.setSamplingrate(ctx.getAttribute("samplingrate")); mediaContent.setChannels(ctx.getAttribute("channels")); mediaContent.setHeight(ctx.getAttributeAsInteger("height")); mediaContent.setWidth(ctx.getAttributeAsInteger("width")); mediaContent.setLang(ctx.getAttribute("lang")); return mediaContent; } private List parseMediaGroup(PodcastParserContext ctx) throws XMLStreamException { List list = new ArrayList<>(); while (ctx.getReader().hasNext()) { switch (ctx.getReader().next()) { case XMLStreamConstants.END_ELEMENT: if ("group".equals(ctx.getReader().getLocalName())) { return list; } break; case XMLStreamConstants.START_ELEMENT: if ("content".equals(ctx.getReader().getLocalName())) { list.add(parseMediaContent(ctx)); } break; } } return list; } private MediaPlayer parseMediaPlayer(PodcastParserContext ctx) throws XMLStreamException { MediaPlayer player = new MediaPlayer(); player.setUrl(ctx.getAttribute("url")); player.setHeight(ctx.getAttributeAsInteger("height")); player.setWidth(ctx.getAttributeAsInteger("width")); return player; } /** * This allows the permissible audience to be declared. If this element is not included, it assumes * that no restrictions are necessary. It has one optional attribute. * * {@code * adult * r (cz 1 lz 1 nz 1 oz 1 vz 1) * pg * tv-y7-fv * } * * scheme is the URI that identifies the rating scheme. It is an optional attribute. If this * attribute is not included, the default scheme is urn:simple (adult | nonadult). * * @param ctx * @return new {@link Rating} instance, not {@code null} * @throws XMLStreamException */ private Rating parseRating(PodcastParserContext ctx) throws XMLStreamException { String scheme = ctx.getAttribute("scheme"); if (Strings.isBlank(scheme)) { scheme = "urn:simple"; } Rating rating = new Rating(); rating.setText(ctx.getElementText()); rating.setScheme(scheme); return rating; } /* * Allows restrictions to be placed on the aggregator rendering the media in the feed. Currently, * restrictions are based on distributor (URI), country codes and sharing of a media object. This * element is purely informational and no obligation can be assumed or implied. Only one * element of the same type can be applied to a media object -- all others will * be ignored. Entities in this element should be space-separated. To allow the producer to * explicitly declare his/her intentions, two literals are reserved: "all", "none". These literals * can only be used once. This element has one required attribute and one optional attribute (with * strict requirements for its exclusion). * * au us * * relationship indicates the type of relationship that the restriction represents (allow | deny). * In the example above, the media object should only be syndicated in Australia and the United * States. It is a required attribute. * * Note: If the "allow" element is empty and the type of relationship is "allow", it is assumed that * the empty list means "allow nobody" and the media should not be syndicated. * * A more explicit method would be: * * au us * * type specifies the type of restriction (country | uri | sharing ) that the media can be * syndicated. It is an optional attribute; however can only be excluded when using one of the * literal values "all" or "none". * * "country" allows restrictions to be placed based on country code. [ISO 3166] * * "uri" allows restrictions based on URI. Examples: urn:apple, http://images.google.com, urn:yahoo, * etc. * * "sharing" allows restriction on sharing. * * "deny" means content cannot be shared -- for example via embed tags. If the sharing type is not * present, the default functionality is to allow sharing. For example: * * */ // private Restriction parseRestriction(PodParseContext ctx) throws XMLStreamException { // return null; // } /* * Optional element to specify the rights information of a media object. * * * * * * status is the status of the media object saying whether a media object has been created by the * publisher or they have rights to circulate it. * * Supported values are "userCreated" and "official". */ private String parseRights(PodcastParserContext ctx) throws XMLStreamException { return ctx.getAttribute("status"); } private List parseScenes(PodcastParserContext ctx) throws XMLStreamException { /* * Optional element to specify various scenes within a media object. It can have multiple child * elements, where each element contains information about a particular * scene. has the optional sub-elements , , * and , which contains title, description, start and end time of a * particular scene in the media, respectively. */ List scenes = new ArrayList<>(); Scene scene = null; while (ctx.getReader().hasNext()) { switch (ctx.getReader().next()) { case XMLStreamConstants.END_ELEMENT: if ("scenes".equals(ctx.getReader().getLocalName())) { return scenes; } break; case XMLStreamConstants.START_ELEMENT: if ("scene".equals(ctx.getReader().getLocalName())) { scene = new Scene(); scenes.add(scene); } if ("sceneTitle".equals(ctx.getReader().getLocalName())) { scene.setTitle(ctx.getElementText()); } if ("sceneDescription".equals(ctx.getReader().getLocalName())) { scene.setDescription(ctx.getElementText()); } if ("sceneStartTime".equals(ctx.getReader().getLocalName())) { scene.setStartTime(ctx.getElementText()); } if ("sceneEndTime".equals(ctx.getReader().getLocalName())) { scene.setEndTime(ctx.getElementText()); } break; } } return scenes; } /* * Allows the inclusion of a text transcript, closed captioning or lyrics of the media content. Many * of these elements are permitted to provide a time series of text. In such cases, it is * encouraged, but not required, that the elements be grouped by language and appear in time * sequence order based on the start time. Elements can have overlapping start and end times. It has * four optional attributes. * * Oh, say, can you * see * * By the dawn's early * light * * type specifies the type of text embedded. Possible values are either "plain" or "html". Default * value is "plain". All HTML must be entity-encoded. It is an optional attribute. * * lang is the primary language encapsulated in the media object. Language codes possible are * detailed in RFC 3066. This attribute is used similar to the xml:lang attribute detailed in the * XML 1.0 Specification (Third Edition). It is an optional attribute. * * start specifies the start time offset that the text starts being relevant to the media object. An * example of this would be for closed captioning. It uses the NTP time code format (see: the time * attribute used in ). It is an optional attribute. * * end specifies the end time that the text is relevant. If this attribute is not provided, and a * start time is used, it is expected that the end time is either the end of the clip or the start * of the next element. */ private Transcript parseText(PodcastParserContext ctx) throws XMLStreamException { Transcript transcript = new Transcript(); transcript.setLang(ctx.getAttribute("lang")); String type = ctx.getAttribute("type"); if (Strings.isBlank(type)) { type = "plain"; } transcript.setType(type); String start = ctx.getAttribute("start"); if (Strings.isNotBlank(start)) { transcript.setStart(Durations.parse(start)); } String end = ctx.getAttribute("end"); if (Strings.isNotBlank(end)) { transcript.setEnd(Durations.parse(end)); } transcript.setText(ctx.getElementText()); return transcript; } // type specifies the type of text embedded. Possible values are // either "plain" or "html". Default value is "plain". All HTML must // be entity-encoded. It is an optional attribute. private TypedString parseTitle(PodcastParserContext ctx) throws XMLStreamException { TypedString title = new TypedString(); title.setType(ctx.getAttribute("type")); title.setText(ctx.getElementText()); return title; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy