org.apache.tika.xmp.convert.TikaToXMP Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tika-xmp Show documentation
Show all versions of tika-xmp Show documentation
Converts Tika metadata to XMP
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.xmp.convert;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.apache.tika.parser.odf.OpenDocumentParser;
import org.apache.tika.parser.rtf.RTFParser;
import com.adobe.xmp.XMPException;
import com.adobe.xmp.XMPMeta;
import com.adobe.xmp.XMPMetaFactory;
public class TikaToXMP {
/**
* Map from mimetype to converter class Must only be accessed through
* getConverterMap
*/
private static Map> converterMap;
// --- public API implementation---
public TikaToXMP() {
// Nothing to do
}
/**
* @see TikaToXMP#convert(Metadata, String) But the mimetype is retrieved from the metadata
* map.
*/
public static XMPMeta convert(Metadata tikaMetadata) throws TikaException {
if (tikaMetadata == null) {
throw new IllegalArgumentException( "Metadata parameter must not be null" );
}
String mimetype = tikaMetadata.get( Metadata.CONTENT_TYPE );
if (mimetype == null) {
mimetype = tikaMetadata.get( TikaCoreProperties.FORMAT );
}
return convert( tikaMetadata, mimetype );
}
/**
* Convert the given Tika metadata map to XMP object. If a mimetype is provided in the Metadata
* map, a specific converter can be used, that converts all available metadata. If there is no
* mimetype provided or no specific converter available a generic conversion is done which will
* convert only those properties that are in known namespaces and are using the correct
* prefixes.
*
* @param tikaMetadata
* the Metadata map from Tika
* @param mimetype
* depicts the format's converter to use
* @return XMP object
* @throws TikaException
*/
public static XMPMeta convert(Metadata tikaMetadata, String mimetype) throws TikaException {
if (tikaMetadata == null) {
throw new IllegalArgumentException( "Metadata parameter must not be null" );
}
ITikaToXMPConverter converter = null;
if (isConverterAvailable( mimetype )) {
converter = getConverter( mimetype );
}
else {
converter = new GenericConverter();
}
XMPMeta xmp = null;
if (converter != null) {
try {
xmp = converter.process( tikaMetadata );
}
catch (XMPException e) {
throw new TikaException( "Tika metadata could not be converted to XMP", e );
}
}
else {
xmp = XMPMetaFactory.create(); // empty packet
}
return xmp;
}
/**
* Check if there is a converter available which allows to convert the Tika metadata to XMP
*
* @param mimetype
* the Mimetype
* @return true if the Metadata object can be converted or false if not
*/
public static boolean isConverterAvailable(String mimetype) {
MediaType type = MediaType.parse( mimetype );
if (type != null) {
return (getConverterMap().get( type ) != null);
}
return false;
}
/**
* Retrieve a specific converter according to the mimetype
*
* @param mimetype
* the Mimetype
* @return the converter or null, if none exists
* @throws TikaException
*/
public static ITikaToXMPConverter getConverter(String mimetype) throws TikaException {
if (mimetype == null) {
throw new IllegalArgumentException( "mimetype must not be null" );
}
ITikaToXMPConverter converter = null;
MediaType type = MediaType.parse( mimetype );
if (type != null) {
Class extends ITikaToXMPConverter> clazz = getConverterMap().get( type );
if (clazz != null) {
try {
converter = clazz.newInstance();
}
catch (Exception e) {
throw new TikaException(
"TikaToXMP converter class cannot be instantiated for mimetype: "
+ type.toString(), e );
}
}
}
return converter;
}
// --- Private methods ---
private static Map> getConverterMap() {
if (converterMap == null) {
converterMap = new HashMap>();
initialize();
}
return converterMap;
}
/**
* Initializes the map with supported converters.
*/
private static void initialize() {
// No particular parsing context is needed
ParseContext parseContext = new ParseContext();
// MS Office Binary File Format
addConverter( new OfficeParser().getSupportedTypes( parseContext ),
MSOfficeBinaryConverter.class );
// Rich Text Format
addConverter( new RTFParser().getSupportedTypes( parseContext ), RTFConverter.class );
// MS Open XML Format
addConverter( new OOXMLParser().getSupportedTypes( parseContext ),
MSOfficeXMLConverter.class );
// Open document format
addConverter( new OpenDocumentParser().getSupportedTypes( parseContext ),
OpenDocumentConverter.class );
}
private static void addConverter(Set supportedTypes,
Class extends ITikaToXMPConverter> converter) {
for (MediaType type : supportedTypes) {
getConverterMap().put( type, converter );
}
}
}