org.modeshape.sequencer.epub.EpubMetadataSequencer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of modeshape-sequencer-epub
Show all versions of modeshape-sequencer-epub
ModeShape Sequencer that processes EPUB 3.0 format
/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.modeshape.sequencer.epub;
import static org.modeshape.sequencer.epub.EpubMetadataLexicon.METADATA_NODE;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Calendar;
import java.util.List;
import java.util.stream.Collectors;
import javax.jcr.NamespaceRegistry;
import javax.jcr.Node;
import javax.jcr.Property;
import javax.jcr.RepositoryException;
import javax.jcr.Value;
import javax.jcr.ValueFactory;
import org.modeshape.common.util.CheckArg;
import org.modeshape.common.util.StringUtil;
import org.modeshape.jcr.api.Binary;
import org.modeshape.jcr.api.JcrConstants;
import org.modeshape.jcr.api.nodetype.NodeTypeManager;
import org.modeshape.jcr.api.sequencer.Sequencer;
/**
* A sequencer that processes the binary content of an EPUB 3.0 file, extracts
* the metadata, and then writes that metadata to the repository.
*
* This sequencer produces data that corresponds to the following structure:
*
* - epub:metadata node of type
epub:metadata
*
* - jcr:mimeType - optional string property for the mime type of the EPUB
* - epub:identifier - optional
epub:identifier
node for the identifier
* - epub:title - optional
epub:title
node for the title
* - epub:language - optional
epub:language
node for the language
* - epub:contributor - optional
epub:contributor
node for the contributor
* - epub:creator - optional
epub:creator
node for the creator
* - epub:description - optional
epub:description
node for the description
* - epub:publisher - optional
epub:publisher
node for the publisher
* - epub:rights - optional
epub:rights
node for the rights
* - epub:date - optional
epub:date
node for the date date
*
*
* - epub:* nodes are descendants of
epub:property
which has following structure:
*
* - epub:value - optional string property for the value of the property
* - epub:titleType - optional string property for the title-type of the property
* - epub:identifierType - optional string property for the identifier-type of the property
* - epub:metadataAuthority - optional string property for the metadata-authority field of the property
* - epub:role - optional string property for the role of the property
* - epub:displaySeq - optional long property for the display-sequence field of the property
* - epub:fileAs - optional string property for the file-as field of the property
* - epub:groupPosition - optional long property for the group-position field of the property
* - epub:scheme - optional string property for the scheme of the property
* - epub:alternateScript - optional
epub:alternateScript
node for the alternate transcriptions of the property
*
* - epub:value - optional string property for the value of the alternate script
* - epub:languageCode - optional string property for the value of the alternate script
*
*
*
*
*
*
*
* @since 5.1
*/
public class EpubMetadataSequencer extends Sequencer {
@Override
public void initialize( NamespaceRegistry registry,
NodeTypeManager nodeTypeManager ) throws RepositoryException, IOException {
super.registerNodeTypes("epub.cnd", nodeTypeManager, true);
registerDefaultMimeTypes(EpubMetadata.MIME_TYPE_STRINGS);
}
@Override
public boolean execute( Property inputProperty,
Node outputNode,
Context context ) throws Exception {
Binary binaryValue = (Binary) inputProperty.getBinary();
CheckArg.isNotNull(binaryValue, "binary");
String mimeType = binaryValue.getMimeType();
Node sequencedNode = getMetadataNode(outputNode);
setPropertyIfMetadataPresent(sequencedNode, JcrConstants.JCR_MIME_TYPE, mimeType);
return processBasicMetadata(sequencedNode, binaryValue);
}
private boolean processBasicMetadata( Node sequencedNode,
Binary binaryValue ) {
EpubMetadata metadata = null;
try (InputStream stream = binaryValue.getStream()) {
metadata = new EpubMetadata(stream);
if (metadata.check()) {
addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.TITLE, metadata.getTitle());
addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.CREATOR, metadata.getCreator());
addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.CONTRIBUTOR, metadata.getContributor());
addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.LANGUAGE, metadata.getLanguage());
addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.IDENTIFIER, metadata.getIdentifier());
addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.DESCRIPTION, metadata.getDescription());
addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.PUBLISHER, metadata.getPublisher());
addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.DATE, metadata.getDate());
addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.RIGHTS, metadata.getRights());
return true;
}
} catch (Exception e) {
getLogger().error(e, "Couldn't process stream.");
}
return false;
}
private Node getMetadataNode( Node outputNode ) throws RepositoryException {
if (outputNode.isNew()) {
outputNode.setPrimaryType(METADATA_NODE);
return outputNode;
}
return outputNode.addNode(METADATA_NODE, METADATA_NODE);
}
private void addEpubMetadataProperties( Node node,
String propertyName,
List values ) throws RepositoryException {
for (EpubMetadataProperty value : values) {
Node propertyNode = node.addNode(propertyName, propertyName);
setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.VALUE, value.getValue());
setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.TITLE_TYPE, value.getTitleType());
setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.METADATA_AUTHORITY, value.getMetadataAuthority());
setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.ROLE, value.getRole());
setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.DISPLAY_SEQ, value.getDisplaySeq());
setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.FILE_AS, value.getFileAs());
setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.GROUP_POSITION, value.getGroupPosition());
setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.IDENTIFIER_TYPE, value.getIdentifierType());
setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.SCHEME, value.getScheme());
if (value.getAlternateScript() != null) {
Node alternateScriptNode = propertyNode.addNode(EpubMetadataLexicon.ALTERNATE_SCRIPT_NODE, EpubMetadataLexicon.ALTERNATE_SCRIPT_NODE);
setPropertyIfMetadataPresent(alternateScriptNode, EpubMetadataLexicon.VALUE, value.getAlternateScript().getValue());
setPropertyIfMetadataPresent(alternateScriptNode, EpubMetadataLexicon.LANGUAGE_CODE, value.getAlternateScript().getLanguage());
}
}
}
private void setPropertyIfMetadataPresent( Node node,
String propertyName,
Object value ) throws RepositoryException {
if (value != null) {
if (value instanceof String && !StringUtil.isBlank((String) value)) {
node.setProperty(propertyName, (String) value);
} else if (value instanceof Boolean) {
node.setProperty(propertyName, (Boolean) value);
} else if (value instanceof Long) {
node.setProperty(propertyName, (Long) value);
} else if (value instanceof Integer) {
node.setProperty(propertyName, new Long((Integer) value));
} else if (value instanceof Double) {
node.setProperty(propertyName, (Double) value);
} else if (value instanceof Calendar) {
node.setProperty(propertyName, (Calendar) value);
} else if (value instanceof byte[]) {
InputStream is = new ByteArrayInputStream((byte []) value);
javax.jcr.Binary binaryProperty = node.getSession().getValueFactory().createBinary(is);
node.setProperty(propertyName, binaryProperty);
} else if (value instanceof List>) {
ValueFactory vf = node.getSession().getValueFactory();
List values = ((List>) value).stream()
.filter(val -> val instanceof String)
.map(val -> vf.createValue((String) val))
.collect(Collectors.toList());
if (!values.isEmpty()) {
node.setProperty(propertyName, values.toArray(new Value[values.size()]));
}
} else {
getLogger().warn("The value of the property {0} has unknown type and couldn't be saved", propertyName);
}
}
}
}