org.apache.tika.parser.image.BPGParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.image;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.poi.util.IOUtils;
import org.apache.tika.config.Field;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.TikaMemoryLimitException;
import org.apache.tika.io.EndianUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Photoshop;
import org.apache.tika.metadata.TIFF;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
/**
* Parser for the Better Portable Graphics (BPG) File Format.
*
* Documentation on the file format is available from
* http://bellard.org/bpg/bpg_spec.txt
*/
public class BPGParser extends AbstractParser {
protected static final int EXTENSION_TAG_EXIF = 1;
protected static final int EXTENSION_TAG_ICC_PROFILE = 2;
protected static final int EXTENSION_TAG_XMP = 3;
protected static final int EXTENSION_TAG_THUMBNAIL = 4;
private static final long serialVersionUID = -161736541253892772L;
//50 MB -- throw TikaMemoryLimitException if xmp or exif is allegedly longer than this
private static final int DEFAULT_MAX_RECORD_LENGTH = 50 * 1024 * 1024;
private static final Set SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet(Arrays.asList(
MediaType.image("x-bpg"), MediaType.image("bpg"))));
private int maxRecordLength = DEFAULT_MAX_RECORD_LENGTH;
public Set getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}
public void parse(
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
// Check for the magic header signature
byte[] signature = new byte[4];
IOUtils.readFully(stream, signature);
if (signature[0] == (byte) 'B' && signature[1] == (byte) 'P' &&
signature[2] == (byte) 'G' && signature[3] == (byte) 0xfb) {
// Good, signature found
} else {
throw new TikaException("BPG magic signature invalid");
}
// Grab and decode the first byte
int pdf = stream.read();
// Pixel format: Greyscale / 4:2:0 / 4:2:2 / 4:4:4
int pixelFormat = pdf & 0x7;
// TODO Identify a suitable metadata key for this
// Is there an alpha plane as well as a colour plane?
boolean hasAlphaPlane1 = (pdf & 0x8) == 0x8;
// TODO Identify a suitable metadata key for this+hasAlphaPlane2
// Bit depth minus 8
int bitDepth = (pdf >> 4) + 8;
metadata.set(TIFF.BITS_PER_SAMPLE, Integer.toString(bitDepth));
// Grab and decode the second byte
int cer = stream.read();
// Colour Space: YCbCr / RGB / YCgCo / YCbCrK / CMYK
int colourSpace = cer & 0x15;
switch (colourSpace) {
case 0:
metadata.set(Photoshop.COLOR_MODE, "YCbCr Colour");
break;
case 1:
metadata.set(Photoshop.COLOR_MODE, "RGB Colour");
break;
case 2:
metadata.set(Photoshop.COLOR_MODE, "YCgCo Colour");
break;
case 3:
metadata.set(Photoshop.COLOR_MODE, "YCbCrK Colour");
break;
case 4:
metadata.set(Photoshop.COLOR_MODE, "CMYK Colour");
break;
}
// Are there extensions or not?
boolean hasExtensions = (cer & 16) == 16;
// Is the Alpha Plane 2 flag set?
boolean hasAlphaPlane2 = (cer & 32) == 32;
// cer then holds 2 more booleans - limited range, reserved
// Width and height next
int width = (int) EndianUtils.readUE7(stream);
int height = (int) EndianUtils.readUE7(stream);
metadata.set(TIFF.IMAGE_LENGTH, height);
metadata.set(TIFF.IMAGE_WIDTH, width);
// Picture Data length
EndianUtils.readUE7(stream);
// Extension Data Length, if extensions present
long extensionDataLength = 0;
if (hasExtensions)
extensionDataLength = EndianUtils.readUE7(stream);
// Alpha Data Length, if alpha used
long alphaDataLength = 0;
if (hasAlphaPlane1 || hasAlphaPlane2)
alphaDataLength = EndianUtils.readUE7(stream);
// Extension Data
if (hasExtensions) {
long extensionsDataSeen = 0;
ImageMetadataExtractor metadataExtractor =
new ImageMetadataExtractor(metadata);
while (extensionsDataSeen < extensionDataLength) {
int extensionType = (int) EndianUtils.readUE7(stream);
int extensionLength = (int) EndianUtils.readUE7(stream);
if (extensionLength > maxRecordLength) {
throw new TikaMemoryLimitException("extension length (" +
extensionLength + " bytes) is greater than 'maxRecordLength' (" +
maxRecordLength + " bytes). If this file is not corrupt, " +
"consider bumping the maxRecordLength via tika-config.xml");
}
switch (extensionType) {
case EXTENSION_TAG_EXIF:
metadataExtractor.parseRawExif(stream, extensionLength, true);
break;
case EXTENSION_TAG_XMP:
handleXMP(stream, extensionLength, metadataExtractor);
break;
default:
long skipped = IOUtils.skipFully(stream, extensionLength);
if (skipped != extensionLength) {
throw new TikaException("failed to skip required amount");
}
}
extensionsDataSeen += extensionLength;
}
}
// HEVC Header + Data
// Alpha HEVC Header + Data
// We can't do anything with these parts
// We don't have any helpful text, sorry...
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.endDocument();
}
@Field
public void setMaxRecordLength(int maxRecordLength) {
this.maxRecordLength = maxRecordLength;
}
protected void handleXMP(InputStream stream, int xmpLength,
ImageMetadataExtractor extractor) throws IOException, TikaException, SAXException {
byte[] xmp = new byte[xmpLength];
IOUtils.readFully(stream, xmp);
extractor.parseRawXMP(xmp);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy