All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.modeshape.sequencer.epub.EpubMetadataSequencer Maven / Gradle / Ivy

There is a newer version: 5.4.1.Final
Show newest version
/*
 * ModeShape (http://www.modeshape.org)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.modeshape.sequencer.epub;

import static org.modeshape.sequencer.epub.EpubMetadataLexicon.METADATA_NODE;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Calendar;
import java.util.List;
import java.util.stream.Collectors;
import javax.jcr.NamespaceRegistry;
import javax.jcr.Node;
import javax.jcr.Property;
import javax.jcr.RepositoryException;
import javax.jcr.Value;
import javax.jcr.ValueFactory;
import org.modeshape.common.util.CheckArg;
import org.modeshape.common.util.StringUtil;
import org.modeshape.jcr.api.Binary;
import org.modeshape.jcr.api.JcrConstants;
import org.modeshape.jcr.api.nodetype.NodeTypeManager;
import org.modeshape.jcr.api.sequencer.Sequencer;

/**
 * A sequencer that processes the binary content of an EPUB 3.0 file, extracts
 * the metadata, and then writes that metadata to the repository.
 * 

* This sequencer produces data that corresponds to the following structure: *

    *
  • epub:metadata node of type epub:metadata *
      *
    • jcr:mimeType - optional string property for the mime type of the EPUB
    • *
    • epub:identifier - optional epub:identifier node for the identifier
    • *
    • epub:title - optional epub:title node for the title
    • *
    • epub:language - optional epub:language node for the language
    • *
    • epub:contributor - optional epub:contributor node for the contributor
    • *
    • epub:creator - optional epub:creator node for the creator
    • *
    • epub:description - optional epub:description node for the description
    • *
    • epub:publisher - optional epub:publisher node for the publisher
    • *
    • epub:rights - optional epub:rights node for the rights
    • *
    • epub:date - optional epub:date node for the date date
    • *
    *
  • *
  • epub:* nodes are descendants of epub:property which has following structure: *
      *
    • epub:value - optional string property for the value of the property
    • *
    • epub:titleType - optional string property for the title-type of the property
    • *
    • epub:identifierType - optional string property for the identifier-type of the property
    • *
    • epub:metadataAuthority - optional string property for the metadata-authority field of the property
    • *
    • epub:role - optional string property for the role of the property
    • *
    • epub:displaySeq - optional long property for the display-sequence field of the property
    • *
    • epub:fileAs - optional string property for the file-as field of the property
    • *
    • epub:groupPosition - optional long property for the group-position field of the property
    • *
    • epub:scheme - optional string property for the scheme of the property
    • *
    • epub:alternateScript - optional epub:alternateScript node for the alternate transcriptions of the property *
        *
      • epub:value - optional string property for the value of the alternate script
      • *
      • epub:languageCode - optional string property for the value of the alternate script
      • *
      *
    • *
    *
  • *
*

* * @since 5.1 */ public class EpubMetadataSequencer extends Sequencer { @Override public void initialize( NamespaceRegistry registry, NodeTypeManager nodeTypeManager ) throws RepositoryException, IOException { super.registerNodeTypes("epub.cnd", nodeTypeManager, true); registerDefaultMimeTypes(EpubMetadata.MIME_TYPE_STRINGS); } @Override public boolean execute( Property inputProperty, Node outputNode, Context context ) throws Exception { Binary binaryValue = (Binary) inputProperty.getBinary(); CheckArg.isNotNull(binaryValue, "binary"); String mimeType = binaryValue.getMimeType(); Node sequencedNode = getMetadataNode(outputNode); setPropertyIfMetadataPresent(sequencedNode, JcrConstants.JCR_MIME_TYPE, mimeType); return processBasicMetadata(sequencedNode, binaryValue); } private boolean processBasicMetadata( Node sequencedNode, Binary binaryValue ) { EpubMetadata metadata = null; try (InputStream stream = binaryValue.getStream()) { metadata = new EpubMetadata(stream); if (metadata.check()) { addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.TITLE, metadata.getTitle()); addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.CREATOR, metadata.getCreator()); addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.CONTRIBUTOR, metadata.getContributor()); addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.LANGUAGE, metadata.getLanguage()); addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.IDENTIFIER, metadata.getIdentifier()); addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.DESCRIPTION, metadata.getDescription()); addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.PUBLISHER, metadata.getPublisher()); addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.DATE, metadata.getDate()); addEpubMetadataProperties(sequencedNode, EpubMetadataLexicon.RIGHTS, metadata.getRights()); return true; } } catch (Exception e) { getLogger().error(e, "Couldn't process stream."); } return false; } private Node getMetadataNode( Node outputNode ) throws RepositoryException { if (outputNode.isNew()) { outputNode.setPrimaryType(METADATA_NODE); return outputNode; } return outputNode.addNode(METADATA_NODE, METADATA_NODE); } private void addEpubMetadataProperties( Node node, String propertyName, List values ) throws RepositoryException { for (EpubMetadataProperty value : values) { Node propertyNode = node.addNode(propertyName, propertyName); setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.VALUE, value.getValue()); setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.TITLE_TYPE, value.getTitleType()); setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.METADATA_AUTHORITY, value.getMetadataAuthority()); setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.ROLE, value.getRole()); setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.DISPLAY_SEQ, value.getDisplaySeq()); setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.FILE_AS, value.getFileAs()); setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.GROUP_POSITION, value.getGroupPosition()); setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.IDENTIFIER_TYPE, value.getIdentifierType()); setPropertyIfMetadataPresent(propertyNode, EpubMetadataLexicon.SCHEME, value.getScheme()); if (value.getAlternateScript() != null) { Node alternateScriptNode = propertyNode.addNode(EpubMetadataLexicon.ALTERNATE_SCRIPT_NODE, EpubMetadataLexicon.ALTERNATE_SCRIPT_NODE); setPropertyIfMetadataPresent(alternateScriptNode, EpubMetadataLexicon.VALUE, value.getAlternateScript().getValue()); setPropertyIfMetadataPresent(alternateScriptNode, EpubMetadataLexicon.LANGUAGE_CODE, value.getAlternateScript().getLanguage()); } } } private void setPropertyIfMetadataPresent( Node node, String propertyName, Object value ) throws RepositoryException { if (value != null) { if (value instanceof String && !StringUtil.isBlank((String) value)) { node.setProperty(propertyName, (String) value); } else if (value instanceof Boolean) { node.setProperty(propertyName, (Boolean) value); } else if (value instanceof Long) { node.setProperty(propertyName, (Long) value); } else if (value instanceof Integer) { node.setProperty(propertyName, new Long((Integer) value)); } else if (value instanceof Double) { node.setProperty(propertyName, (Double) value); } else if (value instanceof Calendar) { node.setProperty(propertyName, (Calendar) value); } else if (value instanceof byte[]) { InputStream is = new ByteArrayInputStream((byte []) value); javax.jcr.Binary binaryProperty = node.getSession().getValueFactory().createBinary(is); node.setProperty(propertyName, binaryProperty); } else if (value instanceof List) { ValueFactory vf = node.getSession().getValueFactory(); List values = ((List) value).stream() .filter(val -> val instanceof String) .map(val -> vf.createValue((String) val)) .collect(Collectors.toList()); if (!values.isEmpty()) { node.setProperty(propertyName, values.toArray(new Value[values.size()])); } } else { getLogger().warn("The value of the property {0} has unknown type and couldn't be saved", propertyName); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy