org.apache.fontbox.cmap.CMapParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.fop Show documentation
Show all versions of org.apache.fop Show documentation
The core maven build properties
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.fontbox.cmap;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.fontbox.util.Charsets;
/**
* Parses a CMap stream.
*
* @author Ben Litchfield
*/
public class CMapParser
{
private static final String MARK_END_OF_DICTIONARY = ">>";
private static final String MARK_END_OF_ARRAY = "]";
private final byte[] tokenParserByteBuffer = new byte[512];
private boolean strictMode = false;
/**
* Creates a new instance of CMapParser.
*/
public CMapParser()
{
}
/**
* Creates a new instance of CMapParser.
*
* @param strictMode activates the strict mode used for inline CMaps
*/
public CMapParser(boolean strictMode)
{
this.strictMode = strictMode;
}
/**
* Parse a CMAP file on the file system.
*
* @param file The file to parse.
* @return A parsed CMAP file.
* @throws IOException If there is an issue while parsing the CMAP.
*/
public CMap parse(File file) throws IOException
{
FileInputStream input = null;
try
{
input = new FileInputStream(file);
return parse(input);
}
finally
{
if (input != null)
{
input.close();
}
}
}
/**
* Parses a predefined CMap.
*
* @param name CMap name.
* @return The parsed predefined CMap as a java object, never null.
* @throws IOException If the CMap could not be parsed.
*/
public CMap parsePredefined(String name) throws IOException
{
InputStream input = null;
try
{
input = getExternalCMap(name);
// deactivate strict mode
strictMode = false;
return parse(input);
}
finally
{
if (input != null)
{
input.close();
}
}
}
/**
* This will parse the stream and create a cmap object.
*
* @param input The CMAP stream to parse.
* @return The parsed stream as a java object, never null.
* @throws IOException If there is an error parsing the stream.
*/
public CMap parse(InputStream input) throws IOException
{
PushbackInputStream cmapStream = new PushbackInputStream(input);
CMap result = new CMap();
Object previousToken = null;
Object token;
while ((token = parseNextToken(cmapStream)) != null)
{
if (token instanceof Operator)
{
Operator op = (Operator) token;
if (op.op.equals("endcmap"))
{
// end of CMap reached, stop reading as there isn't any interesting info anymore
break;
}
if (previousToken != null)
{
if (op.op.equals("usecmap") && previousToken instanceof LiteralName)
{
parseUsecmap((LiteralName) previousToken, result);
}
else if (previousToken instanceof Number)
{
if (op.op.equals("begincodespacerange"))
{
parseBegincodespacerange((Number) previousToken, cmapStream, result);
}
else if (op.op.equals("beginbfchar"))
{
parseBeginbfchar((Number) previousToken, cmapStream, result);
}
else if (op.op.equals("beginbfrange"))
{
parseBeginbfrange((Number) previousToken, cmapStream, result);
}
else if (op.op.equals("begincidchar"))
{
parseBegincidchar((Number) previousToken, cmapStream, result);
}
else if (op.op.equals("begincidrange") && previousToken instanceof Integer)
{
parseBegincidrange((Integer) previousToken, cmapStream, result);
}
}
}
}
else if (token instanceof LiteralName)
{
parseLiteralName((LiteralName) token, cmapStream, result);
}
previousToken = token;
}
return result;
}
private void parseUsecmap(LiteralName useCmapName, CMap result) throws IOException
{
InputStream useStream = getExternalCMap(useCmapName.name);
CMap useCMap = parse(useStream);
result.useCmap(useCMap);
}
private void parseLiteralName(LiteralName literal, PushbackInputStream cmapStream, CMap result) throws IOException
{
if ("WMode".equals(literal.name))
{
Object next = parseNextToken(cmapStream);
if (next instanceof Integer)
{
result.setWMode((Integer) next);
}
}
else if ("CMapName".equals(literal.name))
{
Object next = parseNextToken(cmapStream);
if (next instanceof LiteralName)
{
result.setName(((LiteralName) next).name);
}
}
else if ("CMapVersion".equals(literal.name))
{
Object next = parseNextToken(cmapStream);
if (next instanceof Number)
{
result.setVersion(next.toString());
}
else if (next instanceof String)
{
result.setVersion((String) next);
}
}
else if ("CMapType".equals(literal.name))
{
Object next = parseNextToken(cmapStream);
if (next instanceof Integer)
{
result.setType((Integer) next);
}
}
else if ("Registry".equals(literal.name))
{
Object next = parseNextToken(cmapStream);
if (next instanceof String)
{
result.setRegistry((String) next);
}
}
else if ("Ordering".equals(literal.name))
{
Object next = parseNextToken(cmapStream);
if (next instanceof String)
{
result.setOrdering((String) next);
}
}
else if ("Supplement".equals(literal.name))
{
Object next = parseNextToken(cmapStream);
if (next instanceof Integer)
{
result.setSupplement((Integer) next);
}
}
}
/**
* Throws an IOException if expectedOperatorName not equals operator.op
*
* @param operator Instance of operator
* @param expectedOperatorName Expected name of operator
* @param rangeName The name of the range in which the operator is expected (without a tilde
* character), to be used in the exception message.
*
* @throws IOException if expectedOperatorName not equals operator.op
*/
private void checkExpectedOperator(Operator operator, String expectedOperatorName, String rangeName) throws IOException
{
if (!operator.op.equals(expectedOperatorName))
{
throw new IOException("Error : ~" + rangeName + " contains an unexpected operator : "
+ operator.op);
}
}
private void parseBegincodespacerange(Number cosCount, PushbackInputStream cmapStream, CMap result) throws IOException
{
for (int j = 0; j < cosCount.intValue(); j++)
{
Object nextToken = parseNextToken(cmapStream);
if (nextToken instanceof Operator)
{
checkExpectedOperator((Operator) nextToken, "endcodespacerange", "codespacerange");
break;
}
byte[] startRange = (byte[]) nextToken;
byte[] endRange = (byte[]) parseNextToken(cmapStream);
try
{
result.addCodespaceRange(new CodespaceRange(startRange, endRange));
}
catch (IllegalArgumentException ex)
{
throw new IOException(ex);
}
}
}
private void parseBeginbfchar(Number cosCount, PushbackInputStream cmapStream, CMap result) throws IOException
{
for (int j = 0; j < cosCount.intValue(); j++)
{
Object nextToken = parseNextToken(cmapStream);
if (nextToken instanceof Operator)
{
checkExpectedOperator((Operator) nextToken, "endbfchar", "bfchar");
break;
}
byte[] inputCode = (byte[]) nextToken;
nextToken = parseNextToken(cmapStream);
if (nextToken instanceof byte[])
{
byte[] bytes = (byte[]) nextToken;
String value = createStringFromBytes(bytes);
result.addCharMapping(inputCode, value);
}
else if (nextToken instanceof LiteralName)
{
result.addCharMapping(inputCode, ((LiteralName) nextToken).name);
}
else
{
throw new IOException("Error parsing CMap beginbfchar, expected{COSString "
+ "or COSName} and not " + nextToken);
}
}
}
private void parseBegincidrange(int numberOfLines, PushbackInputStream cmapStream, CMap result) throws IOException
{
for (int n = 0; n < numberOfLines; n++)
{
Object nextToken = parseNextToken(cmapStream);
if (nextToken instanceof Operator)
{
checkExpectedOperator((Operator) nextToken, "endcidrange", "cidrange");
break;
}
byte[] startCode = (byte[]) nextToken;
int start = createIntFromBytes(startCode);
byte[] endCode = (byte[]) parseNextToken(cmapStream);
int end = createIntFromBytes(endCode);
int mappedCode = (Integer) parseNextToken(cmapStream);
if (startCode.length <= 2 && endCode.length <= 2)
{
// some CMaps are using CID ranges to map single values
if (end == start)
{
result.addCIDMapping(mappedCode, start);
}
else
{
result.addCIDRange((char) start, (char) end, mappedCode);
}
}
else
{
// TODO Is this even possible?
int endOfMappings = mappedCode + end - start;
while (mappedCode <= endOfMappings)
{
int mappedCID = createIntFromBytes(startCode);
result.addCIDMapping(mappedCode++, mappedCID);
increment(startCode, startCode.length - 1, false);
}
}
}
}
private void parseBegincidchar(Number cosCount, PushbackInputStream cmapStream, CMap result) throws IOException
{
for (int j = 0; j < cosCount.intValue(); j++)
{
Object nextToken = parseNextToken(cmapStream);
if (nextToken instanceof Operator)
{
checkExpectedOperator((Operator) nextToken, "endcidchar", "cidchar");
break;
}
byte[] inputCode = (byte[]) nextToken;
int mappedCode = (Integer) parseNextToken(cmapStream);
int mappedCID = createIntFromBytes(inputCode);
result.addCIDMapping(mappedCode, mappedCID);
}
}
private void parseBeginbfrange(Number cosCount, PushbackInputStream cmapStream, CMap result) throws IOException
{
for (int j = 0; j < cosCount.intValue(); j++)
{
Object nextToken = parseNextToken(cmapStream);
if (nextToken instanceof Operator)
{
checkExpectedOperator((Operator) nextToken, "endbfrange", "bfrange");
break;
}
byte[] startCode = (byte[]) nextToken;
byte[] endCode = (byte[]) parseNextToken(cmapStream);
int start = CMap.toInt(startCode, startCode.length);
int end = CMap.toInt(endCode, endCode.length);
// end has to be bigger than start or equal
if (end < start)
{
// PDFBOX-4550: likely corrupt stream
break;
}
nextToken = parseNextToken(cmapStream);
if (nextToken instanceof List>)
{
List array = (List) nextToken;
// ignore empty and malformed arrays
if (!array.isEmpty() && array.size() >= end - start)
{
addMappingFrombfrange(result, startCode, array);
}
}
// PDFBOX-3807: ignore null
else if (nextToken instanceof byte[])
{
byte[] tokenBytes = (byte[]) nextToken;
// PDFBOX-3450: ignore <>
if (tokenBytes.length > 0)
{
// PDFBOX-4720:
// some pdfs use the malformed bfrange <0000> <0000>. Add support by adding a identity
// mapping for the whole range instead of cutting it after 255 entries
// TODO find a more efficient method to represent all values for a identity mapping
if (tokenBytes.length == 2 && start == 0 && end == 0xffff
&& tokenBytes[0] == 0 && tokenBytes[1] == 0)
{
for (int i = 0; i < 256; i++)
{
startCode[0] = (byte) i;
startCode[1] = 0;
tokenBytes[0] = (byte) i;
tokenBytes[1] = 0;
addMappingFrombfrange(result, startCode, 256, tokenBytes);
}
}
else
{
addMappingFrombfrange(result, startCode, end - start + 1, tokenBytes);
}
}
}
}
}
private void addMappingFrombfrange(CMap cmap, byte[] startCode, List tokenBytesList)
{
for (byte[] tokenBytes : tokenBytesList)
{
String value = createStringFromBytes(tokenBytes);
cmap.addCharMapping(startCode, value);
increment(startCode, startCode.length - 1, false);
}
}
private void addMappingFrombfrange(CMap cmap, byte[] startCode, int values,
byte[] tokenBytes)
{
for (int i = 0; i < values; i++)
{
String value = createStringFromBytes(tokenBytes);
cmap.addCharMapping(startCode, value);
if (!increment(tokenBytes, tokenBytes.length - 1, strictMode))
{
// overflow detected -> stop adding further mappings
break;
}
increment(startCode, startCode.length - 1, false);
}
}
/**
* Returns an input stream containing the given "use" CMap.
*
* @param name Name of the given "use" CMap resource.
* @throws IOException if the CMap resource doesn't exist or if there is an error opening its
* stream.
*/
protected InputStream getExternalCMap(String name) throws IOException
{
InputStream resourceAsStream = getClass().getResourceAsStream(name);
if (resourceAsStream == null)
{
throw new IOException("Error: Could not find referenced cmap stream " + name);
}
return new BufferedInputStream(resourceAsStream);
}
private Object parseNextToken(PushbackInputStream is) throws IOException
{
Object retval = null;
int nextByte = is.read();
// skip whitespace
while (nextByte == 0x09 || nextByte == 0x20 || nextByte == 0x0D || nextByte == 0x0A)
{
nextByte = is.read();
}
switch (nextByte)
{
case '%':
{
// header operations, for now return the entire line
// may need to smarter in the future
StringBuilder buffer = new StringBuilder();
buffer.append((char) nextByte);
readUntilEndOfLine(is, buffer);
retval = buffer.toString();
break;
}
case '(':
{
StringBuilder buffer = new StringBuilder();
int stringByte = is.read();
while (stringByte != -1 && stringByte != ')')
{
buffer.append((char) stringByte);
stringByte = is.read();
}
retval = buffer.toString();
break;
}
case '>':
{
int secondCloseBrace = is.read();
if (secondCloseBrace == '>')
{
retval = MARK_END_OF_DICTIONARY;
}
else
{
throw new IOException("Error: expected the end of a dictionary.");
}
break;
}
case ']':
{
retval = MARK_END_OF_ARRAY;
break;
}
case '[':
{
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy