org.apache.tika.xmp.convert.TikaToXMP Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tika-xmp Show documentation
Show all versions of tika-xmp Show documentation
Converts Tika metadata to XMP
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.xmp.convert;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import com.adobe.internal.xmp.XMPException;
import com.adobe.internal.xmp.XMPMeta;
import com.adobe.internal.xmp.XMPMetaFactory;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.apache.tika.parser.microsoft.rtf.RTFParser;
import org.apache.tika.parser.odf.OpenDocumentParser;
public class TikaToXMP {
/**
* Map from mimetype to converter class Must only be accessed through
* getConverterMap
*/
private static Map> converterMap;
// --- public API implementation---
public TikaToXMP() {
// Nothing to do
}
/**
* @see TikaToXMP#convert(Metadata, String) But the mimetype is retrieved from the metadata
* map.
*/
public static XMPMeta convert(Metadata tikaMetadata) throws TikaException {
if (tikaMetadata == null) {
throw new IllegalArgumentException("Metadata parameter must not be null");
}
String mimetype = tikaMetadata.get(Metadata.CONTENT_TYPE);
if (mimetype == null) {
mimetype = tikaMetadata.get(TikaCoreProperties.FORMAT);
}
return convert(tikaMetadata, mimetype);
}
/**
* Convert the given Tika metadata map to XMP object. If a mimetype is provided in the Metadata
* map, a specific converter can be used, that converts all available metadata. If there is no
* mimetype provided or no specific converter available a generic conversion is done which will
* convert only those properties that are in known namespaces and are using the correct
* prefixes.
*
* @param tikaMetadata the Metadata map from Tika
* @param mimetype depicts the format's converter to use
* @return XMP object
* @throws TikaException
*/
public static XMPMeta convert(Metadata tikaMetadata, String mimetype) throws TikaException {
if (tikaMetadata == null) {
throw new IllegalArgumentException("Metadata parameter must not be null");
}
ITikaToXMPConverter converter = null;
if (isConverterAvailable(mimetype)) {
converter = getConverter(mimetype);
} else {
converter = new GenericConverter();
}
XMPMeta xmp = null;
if (converter != null) {
try {
xmp = converter.process(tikaMetadata);
} catch (XMPException e) {
throw new TikaException("Tika metadata could not be converted to XMP", e);
}
} else {
xmp = XMPMetaFactory.create(); // empty packet
}
return xmp;
}
/**
* Check if there is a converter available which allows to convert the Tika metadata to XMP
*
* @param mimetype the Mimetype
* @return true if the Metadata object can be converted or false if not
*/
public static boolean isConverterAvailable(String mimetype) {
MediaType type = MediaType.parse(mimetype);
if (type != null) {
return (getConverterMap().get(type) != null);
}
return false;
}
/**
* Retrieve a specific converter according to the mimetype
*
* @param mimetype the Mimetype
* @return the converter or null, if none exists
* @throws TikaException
*/
public static ITikaToXMPConverter getConverter(String mimetype) throws TikaException {
if (mimetype == null) {
throw new IllegalArgumentException("mimetype must not be null");
}
ITikaToXMPConverter converter = null;
MediaType type = MediaType.parse(mimetype);
if (type != null) {
Class extends ITikaToXMPConverter> clazz = getConverterMap().get(type);
if (clazz != null) {
try {
converter = clazz.getDeclaredConstructor().newInstance();
} catch (Exception e) {
throw new TikaException(
"TikaToXMP converter class cannot be instantiated for mimetype: " +
type.toString(), e);
}
}
}
return converter;
}
// --- Private methods ---
private static Map> getConverterMap() {
if (converterMap == null) {
converterMap = new HashMap<>();
initialize();
}
return converterMap;
}
/**
* Initializes the map with supported converters.
*/
private static void initialize() {
// No particular parsing context is needed
ParseContext parseContext = new ParseContext();
// MS Office Binary File Format
addConverter(new OfficeParser().getSupportedTypes(parseContext),
MSOfficeBinaryConverter.class);
// Rich Text Format
addConverter(new RTFParser().getSupportedTypes(parseContext), RTFConverter.class);
// MS Open XML Format
addConverter(new OOXMLParser().getSupportedTypes(parseContext), MSOfficeXMLConverter.class);
// Open document format
addConverter(new OpenDocumentParser().getSupportedTypes(parseContext),
OpenDocumentConverter.class);
}
private static void addConverter(Set supportedTypes,
Class extends ITikaToXMPConverter> converter) {
for (MediaType type : supportedTypes) {
getConverterMap().put(type, converter);
}
}
}