All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.video.FLVParser Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.video;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import static java.nio.charset.StandardCharsets.UTF_8;

/**
 * 

* Parser for metadata contained in Flash Videos (.flv). Resources: * http://osflash.org/flv and for AMF: * http://download.macromedia.com/pub/labs/amf/amf0_spec_121207.pdf *

* This parser is capable of extracting the general metadata from header as well * as embedded metadata. *

* Known keys for metadata (from file header): *

    *
  1. hasVideo: true|false *
  2. hasSound: true|false *
*

* In addition to the above values also metadata that is inserted in to the * actual stream will be picked. Usually there are keys like: * hasKeyframes, lastkeyframetimestamp, audiocodecid, keyframes, filepositions, * hasMetadata, audiosamplerate, videodatarate metadatadate, videocodecid, * metadatacreator, audiosize, hasVideo, height, audiosamplesize, framerate, * hasCuePoints width, cuePoints, lasttimestamp, canSeekToEnd, datasize, * duration, videosize, filesize, audiodatarate, hasAudio, stereo audiodelay */ public class FLVParser extends AbstractParser { /** Serial version UID */ private static final long serialVersionUID = -8718013155719197679L; private static int TYPE_METADATA = 0x12; private static byte MASK_AUDIO = 1; private static byte MASK_VIDEO = 4; private static final Set SUPPORTED_TYPES = Collections.singleton(MediaType.video("x-flv")); public Set getSupportedTypes(ParseContext context) { return SUPPORTED_TYPES; } private long readUInt32(DataInputStream input) throws IOException { return input.readInt() & 0xFFFFFFFFL; } private int readUInt24(DataInputStream input) throws IOException { //readunsignedbyte checks for eof int uint = input.readUnsignedByte()<<16; uint += input.readUnsignedByte()<<8; uint += input.readUnsignedByte(); return uint; } private Object readAMFData(DataInputStream input, int type) throws IOException { if (type == -1) { type = input.readUnsignedByte(); } switch (type) { case 0: return input.readDouble(); case 1: return input.readUnsignedByte() == 1; case 2: return readAMFString(input); case 3: return readAMFObject(input); case 8: return readAMFEcmaArray(input); case 10: return readAMFStrictArray(input); case 11: final Date date = new Date((long) input.readDouble()); input.readShort(); // time zone return date; case 13: return "UNDEFINED"; default: return null; } } private Object readAMFStrictArray(DataInputStream input) throws IOException { long count = readUInt32(input); ArrayList list = new ArrayList(); for (int i = 0; i < count; i++) { list.add(readAMFData(input, -1)); } return list; } private String readAMFString(DataInputStream input) throws IOException { int size = input.readUnsignedShort(); byte[] chars = new byte[size]; input.readFully(chars); return new String(chars, UTF_8); } private Object readAMFObject(DataInputStream input) throws IOException { HashMap array = new HashMap(); while (true) { String key = readAMFString(input); int dataType = input.read(); if (dataType == 9) { // object end marker break; } array.put(key, readAMFData(input, dataType)); } return array; } private Object readAMFEcmaArray(DataInputStream input) throws IOException { long size = readUInt32(input); HashMap array = new HashMap(); for (int i = 0; i < size; i++) { String key = readAMFString(input); int dataType = input.read(); array.put(key, readAMFData(input, dataType)); } return array; } private boolean checkSignature(DataInputStream fis) throws IOException { return fis.read() == 'F' && fis.read() == 'L' && fis.read() == 'V'; } public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { DataInputStream datainput = new DataInputStream(stream); if (!checkSignature(datainput)) { throw new TikaException("FLV signature not detected"); } // header int version = datainput.readUnsignedByte(); if (version != 1) { // should be 1, perhaps this is not flv? throw new TikaException("Unpexpected FLV version: " + version); } int typeFlags = datainput.readUnsignedByte(); long len = readUInt32(datainput); if (len != 9) { // we only know about format with header of 9 bytes throw new TikaException("Unpexpected FLV header length: " + len); } long sizePrev = readUInt32(datainput); if (sizePrev != 0) { // should be 0, perhaps this is not flv? throw new TikaException( "Unpexpected FLV first previous block size: " + sizePrev); } metadata.set(Metadata.CONTENT_TYPE, "video/x-flv"); metadata.set("hasVideo", Boolean.toString((typeFlags & MASK_VIDEO) != 0)); metadata.set("hasAudio", Boolean.toString((typeFlags & MASK_AUDIO) != 0)); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); // flv tag stream follows... while (true) { int type = datainput.read(); if (type == -1) { // EOF break; } final int datalen = readUInt24(datainput); //body length readUInt32(datainput); // timestamp readUInt24(datainput); // streamid if (type == TYPE_METADATA) { // found metadata Tag, read content to buffer byte[] metaBytes = new byte[datalen]; for (int readCount = 0; readCount < datalen;) { int r = stream.read(metaBytes, readCount, datalen - readCount); if(r!=-1) { readCount += r; } else { break; } } ByteArrayInputStream is = new ByteArrayInputStream(metaBytes); DataInputStream dis = new DataInputStream(is); Object data = null; for (int i = 0; i < 2; i++) { data = readAMFData(dis, -1); } if (data instanceof Map) { // TODO if there are multiple metadata values with same key (in // separate AMF blocks, we currently loose previous values) Map extractedMetadata = (Map) data; for (Entry entry : extractedMetadata.entrySet()) { if (entry.getValue() == null) { continue; } metadata.set(entry.getKey(), entry.getValue().toString()); } } } else { // Tag was not metadata, skip over data we cannot handle for (int i = 0; i < datalen; i++) { datainput.readByte(); } } sizePrev = readUInt32(datainput); // previous block size if (sizePrev != datalen + 11) { // file was corrupt or we could not parse it... break; } } xhtml.endDocument(); } }