All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.mp4.boxes.TikaUserDataBox Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.mp4.boxes;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.drew.lang.SequentialByteArrayReader;
import com.drew.lang.SequentialReader;
import com.drew.lang.annotations.NotNull;
import com.drew.lang.annotations.Nullable;
import com.drew.metadata.mp4.Mp4Directory;
import com.drew.metadata.mp4.boxes.Box;
import org.xml.sax.SAXException;

import org.apache.tika.exception.RuntimeSAXException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMP;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.sax.XHTMLContentHandler;

public class TikaUserDataBox extends Box {

    private static final String LOCATION_CODE = "\u00A9xyz";
    private static final String META = "meta";
    private static final String ILST = "ilst";
    private static final String MDTA = "mdta";
    private static final String HDLR = "hdlr";
    private static final String MDIR = "mdir";//apple metadata itunes reader
    private static final Pattern COORDINATE_PATTERN =
            Pattern.compile("([+-]\\d+\\.\\d+)([+-]\\d+\\.\\d+)");

    @Nullable
    private String coordinateString;

    private boolean isQuickTime = false;
    private final Metadata metadata;
    private final XHTMLContentHandler xhtml;
    public TikaUserDataBox(@NotNull Box box, byte[] payload, Metadata metadata,
                           XHTMLContentHandler xhtml) throws IOException, SAXException {
        super(box);
        this.metadata = metadata;
        this.xhtml = xhtml;
        int length = payload.length;
        SequentialReader reader = new SequentialByteArrayReader(payload);
        while (reader.getPosition() < (long) length) {
            long size = reader.getUInt32();
            if (size <= 4L) {
                break;
            }
            String kindName = reader.getString(4, StandardCharsets.ISO_8859_1);
            if (LOCATION_CODE.equals(kindName)) {
                int xyzLength = reader.getUInt16();
                reader.skip(2L);
                this.coordinateString = reader.getString(xyzLength, "UTF-8");
            } else if (META.equals(kindName)) {
                reader.getUInt32();//not sure what this is
                long lengthToStartOfList = reader.getUInt32() - 4;//this is the length to
                // 'ilst', but the length of the ilist is defined in the 4 bytes before ilist
                if (lengthToStartOfList < 0 || lengthToStartOfList > Integer.MAX_VALUE) {
                    return;
                }
                String hdlr = reader.getString(4, StandardCharsets.ISO_8859_1);
                reader.getUInt32();
                reader.getUInt32();
                String subtype = reader.getString(4, StandardCharsets.ISO_8859_1);
                // If the second and the fifth 32-bit integers encode 'hdlr' and 'mdta' respectively
                // then the MetaBox is formatted according to QuickTime File Format.
                // See https://developer.apple.com/library/content/documentation
                // /QuickTime/QTFF/Metadata/Metadata.html
                if (HDLR.equals(hdlr) && MDTA.equals(subtype)) {
                    isQuickTime = true;
                }
                int read = 16;//bytes read so far
                parseUserDataBox(reader, subtype, read, (int)lengthToStartOfList);
            } else {
                if (size < 8L) {
                    return;
                }

                reader.skip(size - 8L);
            }
        }

    }

    private void parseUserDataBox(SequentialReader reader, String handlerType,
                                  int read, int lengthToStartOfList)
            throws IOException {
        if (!MDIR.equals(handlerType)) {
            return;
        }
        if (lengthToStartOfList < read) {
            return;
        }
        int toSkip = lengthToStartOfList - read;
        reader.skip(toSkip);
        long len = reader.getUInt32();
        if (len >= Integer.MAX_VALUE || len <= 0) {
            //log
            return;
        }
        String subType = reader.getString(4, StandardCharsets.ISO_8859_1);
        //this handles "free" types...not sure if there are others?
        //will throw IOException if no ilist is found
        while (! subType.equals(ILST)) {
            reader.skip(len - 8);
            len = reader.getUInt32();
            subType = reader.getString(4, StandardCharsets.ISO_8859_1);
        }
        if (ILST.equals(subType)) {
            processIList(reader, len);
        }

    }



    private void processIList(SequentialReader reader, long totalLen)
            throws IOException {

        long totalRead = 0;
        while (totalRead < totalLen) {
            long recordLen = reader.getUInt32();
            String fieldName = reader.getString(4, StandardCharsets.ISO_8859_1);
            long fieldLen = reader.getUInt32();
            String typeName = reader.getString(4, StandardCharsets.ISO_8859_1);//data
            totalRead += 16;
            if ("data".equals(typeName)) {
                reader.skip(8);//not sure what these are
                totalRead += 8;
                int toRead = (int) fieldLen - 16;
                if (toRead <= 0) {
                    //log?
                    return;
                }
                if ("covr".equals(fieldName)) {
                    //covr can be an image file, e.g. png or jpeg
                    //skip this for now
                    reader.skip(toRead);
                } else if ("cpil".equals(fieldName)) {
                    int compilationId = (int)reader.getByte();
                    metadata.set(XMPDM.COMPILATION, compilationId);
                } else if ("trkn".equals(fieldName)) {
                    if (toRead == 8) {
                        long numA = reader.getUInt32();
                        long numB = reader.getUInt32();
                        metadata.set(XMPDM.TRACK_NUMBER, (int)numA);
                    } else {
                        //log
                        reader.skip(toRead);
                    }
                } else if ("disk".equals(fieldName)) {
                    int a = reader.getInt32();
                    short b = reader.getInt16();
                    metadata.set(XMPDM.DISC_NUMBER, a);
                } else {
                    String val = reader.getString(toRead, StandardCharsets.UTF_8);
                    try {
                        addMetadata(fieldName, val);
                    } catch (SAXException e) {
                        //need to punch through IOException catching in MP4Reader
                        throw new RuntimeSAXException(e);
                    }
                }

                totalRead += toRead;
            } else {
                int toSkip = (int) recordLen - 16;
                if (toSkip <= 0) {
                    //log?
                    return;
                }
                reader.skip(toSkip);
                totalRead += toSkip;
            }
        }
    }


    private void addMetadata(String key, String value) throws SAXException {
        switch (key) {
            case "\u00A9nam":
                metadata.set(TikaCoreProperties.TITLE, value);
                xhtml.element("p", value);
                break;
            case "\u00A9too":
                metadata.set(XMP.CREATOR_TOOL, value);
                break;
            case "\u00A9ART" :
                metadata.set(XMPDM.ARTIST, value);
                metadata.set(TikaCoreProperties.CREATOR, value);
                xhtml.element("p", value);
                break;
            case "aART" :
                metadata.set(XMPDM.ALBUM_ARTIST, value);
                xhtml.element("p", value);
                break;
            case "\u00A9wrt":
                metadata.set(XMPDM.COMPOSER, value);
                xhtml.element("p", value);
                break;
            case "\u00A9alb":
                metadata.set(XMPDM.ALBUM, value);
                xhtml.element("p", value);
                break;
            case "\u00A9gen" :
                metadata.set(XMPDM.GENRE, value);
                xhtml.element("p", value);
                break;
            case "\u00A9day" :
                //this can be a year "2008" or a date "2017-04-26T07:00:00Z"
                metadata.set(XMPDM.RELEASE_DATE, value);
                xhtml.element("p", value);
                break;
            case "\u00A9cmt" :
                metadata.set(XMPDM.LOG_COMMENT, value);
                xhtml.element("p", value);
                break;
            case "cprt" :
                metadata.set(XMPDM.COPYRIGHT, value);
                xhtml.element("p", value);
                break;
            case "keyw" :
                metadata.set(Metadata.SUBJECT, value);
                xhtml.element("p", value);
                break;
            case "\u00A9lyr" :
                xhtml.element("p", value);
                break;
            case "ldes" : //intentional fall through
            case "desc" :
                metadata.set(TikaCoreProperties.DESCRIPTION, value);
                xhtml.element("p", value);
            case "xid " :
                //not sure this is the right use of this key
                metadata.set(XMP.IDENTIFIER, value);
                break;
                //purd date?
                //xid ? e.g. SonyBMG:isrc:KRA031208874
                //cprt copyright
                //ownr ? and apID
                //flvr ?
                //son = nam, soal = (c)alb soar = aART?
                //(C)ART
        }
    }

    public void addMetadata(Mp4Directory directory) {
        if (this.coordinateString != null) {
            Matcher matcher = COORDINATE_PATTERN.matcher(this.coordinateString);
            if (matcher.find()) {
                double latitude = Double.parseDouble(matcher.group(1));
                double longitude = Double.parseDouble(matcher.group(2));
                directory.setDouble(8193, latitude);
                directory.setDouble(8194, longitude);
            }
        }
    }
}






© 2015 - 2025 Weber Informatics LLC | Privacy Policy