All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.mp4.NoakesMP4Parser Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.mp4;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

import com.drew.imaging.mp4.Mp4Reader;
import com.drew.metadata.Directory;
import com.drew.metadata.MetadataException;
import com.drew.metadata.mp4.Mp4BoxHandler;
import com.drew.metadata.mp4.Mp4Directory;
import com.drew.metadata.mp4.media.Mp4SoundDirectory;
import com.drew.metadata.mp4.media.Mp4VideoDirectory;
import org.apache.commons.lang3.StringUtils;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import org.apache.tika.exception.RuntimeSAXException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;

/**
 * Parser for the MP4 media container format, as well as the older
 * QuickTime format that MP4 is based on.
 * 

* This uses Drew Noakes' metadata-extractor: https://github.com/drewnoakes/metadata-extractor */ public class NoakesMP4Parser extends AbstractParser { /** * Serial version UID */ private static final long serialVersionUID = 84011216792285L; private static final Map> typesMap = new HashMap<>(); private static final Set SUPPORTED_TYPES = Collections.unmodifiableSet(typesMap.keySet()); private static final MediaType APPLICATION_MP4 = MediaType.application("mp4"); private static final int MAX_ERROR_MESSAGES = 100; static { // All types should be 4 bytes long, space padded as needed typesMap.put(MediaType.audio("mp4"), Arrays.asList("M4A ", "M4B ", "F4A ", "F4B ")); typesMap.put(MediaType.video("3gpp"), Arrays.asList("3ge6", "3ge7", "3gg6", "3gp1", "3gp2", "3gp3", "3gp4", "3gp5", "3gp6", "3gs7")); typesMap.put(MediaType.video("3gpp2"), Arrays.asList("3g2a", "3g2b", "3g2c")); typesMap.put(MediaType.video("mp4"), Arrays.asList("mp41", "mp42")); typesMap.put(MediaType.video("x-m4v"), Arrays.asList("M4V ", "M4VH", "M4VP")); typesMap.put(MediaType.video("quicktime"), Collections.emptyList()); typesMap.put(MediaType.application("mp4"), Collections.emptyList()); } private ISO6709Extractor iso6709Extractor = new ISO6709Extractor(); public Set getSupportedTypes(ParseContext context) { return SUPPORTED_TYPES; } public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); TikaInputStream tstream = TikaInputStream.get(stream, tmp); try (InputStream is = Files.newInputStream(tstream.getPath())) { XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); com.drew.metadata.Metadata mp4Metadata = new com.drew.metadata.Metadata(); Mp4BoxHandler boxHandler = new TikaMp4BoxHandler(mp4Metadata, metadata, xhtml); try { Mp4Reader.extract(is, boxHandler); } catch (RuntimeSAXException e) { throw (SAXException) e.getCause(); } //TODO -- figure out how to get IOExceptions out of boxhandler. Mp4Reader //currently swallows IOExceptions. Set errorMessages = processMp4Directories( mp4Metadata.getDirectoriesOfType(Mp4Directory.class), metadata); for (String m : errorMessages) { metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, m); } xhtml.endDocument(); } finally { tmp.dispose(); } } private Set processMp4Directories(Collection mp4Directories, Metadata metadata) { Set errorMsgs = new HashSet<>(); for (Mp4Directory mp4Directory : mp4Directories) { for (String m : mp4Directory.getErrors()) { if (errorMsgs.size() < MAX_ERROR_MESSAGES) { errorMsgs.add(m); } else { break; } } /* for (Tag t : mp4Directory.getTags()) { System.out.println(mp4Directory.getClass() + " : " + t.getTagName() + " : " + mp4Directory.getString(t.getTagType())); }*/ if (mp4Directory instanceof Mp4SoundDirectory) { processMp4SoundDirectory((Mp4SoundDirectory) mp4Directory, metadata); } else if (mp4Directory instanceof Mp4VideoDirectory) { processMp4VideoDirectory((Mp4VideoDirectory) mp4Directory, metadata); } else { processActualMp4Directory(mp4Directory, metadata); } } return errorMsgs; } private void processMp4VideoDirectory(Mp4VideoDirectory mp4Directory, Metadata metadata) { addInt(mp4Directory, metadata, Mp4VideoDirectory.TAG_HEIGHT, Metadata.IMAGE_LENGTH); addInt(mp4Directory, metadata, Mp4VideoDirectory.TAG_WIDTH, Metadata.IMAGE_WIDTH); if (mp4Directory.containsTag(Mp4VideoDirectory.TAG_COMPRESSOR_NAME)) { String compressor = mp4Directory.getString(Mp4VideoDirectory.TAG_COMPRESSOR_NAME); metadata.set(XMPDM.VIDEO_COMPRESSOR, compressor); } } private void processMp4SoundDirectory(Mp4SoundDirectory mp4SoundDirectory, Metadata metadata) { addInt(mp4SoundDirectory, metadata, Mp4SoundDirectory.TAG_AUDIO_SAMPLE_RATE, XMPDM.AUDIO_SAMPLE_RATE); try { int numChannels = mp4SoundDirectory.getInt(Mp4SoundDirectory.TAG_NUMBER_OF_CHANNELS); if (numChannels == 1) { metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "Mono"); } else if (numChannels == 2) { metadata.set(XMPDM.AUDIO_CHANNEL_TYPE, "Stereo"); } else { //??? log } } catch (MetadataException e) { //log } } private void addInt(Mp4Directory mp4Directory, Metadata metadata, int tag, Property property) { try { int val = mp4Directory.getInt(tag); metadata.set(property, val); } catch (MetadataException e) { //log } } private void processActualMp4Directory(Mp4Directory mp4Directory, Metadata metadata) { addDate(mp4Directory, metadata, Mp4Directory.TAG_CREATION_TIME, TikaCoreProperties.CREATED); addDate(mp4Directory, metadata, Mp4Directory.TAG_MODIFICATION_TIME, TikaCoreProperties.MODIFIED); handleBrands(mp4Directory, metadata); handleDurationInSeconds(mp4Directory, metadata); addDouble(mp4Directory, metadata, Mp4Directory.TAG_LATITUDE, TikaCoreProperties.LATITUDE); addDouble(mp4Directory, metadata, Mp4Directory.TAG_LONGITUDE, TikaCoreProperties.LONGITUDE); addInt(mp4Directory, metadata, Mp4Directory.TAG_TIME_SCALE, XMPDM.AUDIO_SAMPLE_RATE); } private void handleDurationInSeconds(Mp4Directory mp4Directory, Metadata metadata) { String durationInSeconds = mp4Directory.getString(Mp4Directory.TAG_DURATION_SECONDS); if (durationInSeconds == null) { return; } if (! durationInSeconds.contains("/")) { try { double d = Double.parseDouble(durationInSeconds); DecimalFormat df = (DecimalFormat) NumberFormat.getNumberInstance(Locale.ROOT); df.applyPattern("0.0#"); metadata.set(XMPDM.DURATION, df.format(d)); } catch (NumberFormatException e) { //swallow } return; } String[] bits = durationInSeconds.split("/"); if (bits.length != 2) { return; } double durationSeconds; try { long numerator = Long.parseLong(bits[0]); long denominator = Long.parseLong(bits[1]); if (denominator != 0) { durationSeconds = (double) numerator / (double) denominator; // Get the duration //TODO Replace this with a 2dp Duration Property Converter //avoid thread safety issues by creating a new decimal format for every call //threadlocal doesn't play well in long running processes. DecimalFormat df = (DecimalFormat) NumberFormat.getNumberInstance(Locale.ROOT); df.applyPattern("0.0#"); metadata.set(XMPDM.DURATION, df.format(durationSeconds)); } } catch (NumberFormatException e) { //log return; } } private void handleBrands(Mp4Directory mp4Directory, Metadata metadata) { String majorBrand = mp4Directory.getString(Mp4Directory.TAG_MAJOR_BRAND); // Identify the type based on the major brand Optional typeHolder = typesMap.entrySet().stream() .filter(e -> e.getValue().contains(majorBrand)).findFirst() .map(Map.Entry::getKey); if (!typeHolder.isPresent()) { String compatibleBrands = mp4Directory.getString(Mp4Directory.TAG_COMPATIBLE_BRANDS); if (compatibleBrands != null) { // If no match for major brand, see if any of the compatible brands match typeHolder = typesMap.entrySet().stream().filter(e -> e.getValue().stream().anyMatch(compatibleBrands::contains)) .findFirst().map(Map.Entry::getKey); } } MediaType type = typeHolder.orElse(MediaType.application("mp4")); if (metadata.getValues(Metadata.CONTENT_TYPE) == null) { metadata.set(Metadata.CONTENT_TYPE, type.toString()); } else if (! type.equals(APPLICATION_MP4)) { //todo check for specialization? metadata.set(Metadata.CONTENT_TYPE, type.toString()); } if (type.getType().equals("audio") && ! StringUtils.isBlank(majorBrand)) { metadata.set(XMPDM.AUDIO_COMPRESSOR, majorBrand.trim()); } } private void addDate(Mp4Directory mp4Directory, Metadata metadata, int tag, Property property) { Date d = mp4Directory.getDate(tag); if (d == null) { return; } metadata.set(property, d); } private void addDouble(Directory mp4Directory, Metadata metadata, int tag, Property property) { try { double val = mp4Directory.getDouble(tag); metadata.set(property, val); } catch (MetadataException e) { //log return; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy