All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.lowagie.text.pdf.fonts.cmaps.CMapParser Maven / Gradle / Ivy

There is a newer version: 2.0.3
Show newest version
/**
 * Copyright (c) 2005-2006, www.fontbox.org All rights reserved.
 * 

* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the * following conditions are met: *

* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following * disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and * the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the * name of fontbox; nor the names of its contributors may be used to endorse or promote products derived from this * software without specific prior written permission. *

* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *

* http://www.fontbox.org */ package com.lowagie.text.pdf.fonts.cmaps; import com.lowagie.text.error_messages.MessageLocalization; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * This will parser a CMap stream. * * @author Ben Litchfield * @version $Revision: 4065 $ * @since 2.1.4 */ public class CMapParser { private static final String BEGIN_CODESPACE_RANGE = "begincodespacerange"; private static final String BEGIN_BASE_FONT_CHAR = "beginbfchar"; private static final String BEGIN_BASE_FONT_RANGE = "beginbfrange"; private static final String MARK_END_OF_DICTIONARY = ">>"; private static final String MARK_END_OF_ARRAY = "]"; private byte[] tokenParserByteBuffer = new byte[512]; /** * Creates a new instance of CMapParser. */ public CMapParser() { } /** * A simple class to test parsing of cmap files. * * @param args Some command line arguments. * @throws Exception If there is an error parsing the file. */ public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("usage: java org.pdfbox.cmapparser.CMapParser "); System.exit(-1); } CMapParser parser = new CMapParser(); CMap result = parser.parse(new FileInputStream(args[0])); System.out.println("Result:" + result); } /** * This will parse the stream and create a cmap object. * * @param input The CMAP stream to parse. * @return The parsed stream as a java object. * @throws IOException If there is an error parsing the stream. */ public CMap parse(InputStream input) throws IOException { PushbackInputStream cmapStream = new PushbackInputStream(input); CMap result = new CMap(); Object previousToken = null; Object token = null; while ((token = parseNextToken(cmapStream)) != null) { if (token instanceof Operator) { Operator op = (Operator) token; switch (op.op) { case BEGIN_CODESPACE_RANGE: { Number cosCount = (Number) previousToken; for (int j = 0; j < cosCount.intValue(); j++) { byte[] startRange = (byte[]) parseNextToken(cmapStream); byte[] endRange = (byte[]) parseNextToken(cmapStream); CodespaceRange range = new CodespaceRange(); range.setStart(startRange); range.setEnd(endRange); result.addCodespaceRange(range); } break; } case BEGIN_BASE_FONT_CHAR: { Number cosCount = (Number) previousToken; for (int j = 0; j < cosCount.intValue(); j++) { byte[] inputCode = (byte[]) parseNextToken(cmapStream); Object nextToken = parseNextToken(cmapStream); if (nextToken instanceof byte[]) { byte[] bytes = (byte[]) nextToken; String value = createStringFromBytes(bytes); result.addMapping(inputCode, value); } else if (nextToken instanceof LiteralName) { result.addMapping(inputCode, ((LiteralName) nextToken).name); } else { throw new IOException(MessageLocalization.getComposedMessage( "error.parsing.cmap.beginbfchar.expected.cosstring.or.cosname.and.not.1", nextToken)); } } break; } case BEGIN_BASE_FONT_RANGE: { Number cosCount = (Number) previousToken; for (int j = 0; j < cosCount.intValue(); j++) { byte[] startCode = (byte[]) parseNextToken(cmapStream); byte[] endCode = (byte[]) parseNextToken(cmapStream); Object nextToken = parseNextToken(cmapStream); List array = null; byte[] tokenBytes = null; if (nextToken instanceof List) { array = (List) nextToken; tokenBytes = (byte[]) array.get(0); } else { tokenBytes = (byte[]) nextToken; } String value = null; int arrayIndex = 0; boolean done = false; while (!done) { if (compare(startCode, endCode) >= 0) { done = true; } value = createStringFromBytes(tokenBytes); result.addMapping(startCode, value); increment(startCode); if (array == null) { increment(tokenBytes); } else { arrayIndex++; if (arrayIndex < array.size()) { tokenBytes = (byte[]) array.get(arrayIndex); } } } } break; } } } previousToken = token; } return result; } private Object parseNextToken(PushbackInputStream is) throws IOException { Object retval = null; int nextByte = is.read(); //skip whitespace while (nextByte == 0x09 || nextByte == 0x20 || nextByte == 0x0D || nextByte == 0x0A) { nextByte = is.read(); } switch (nextByte) { case '%': { //header operations, for now return the entire line //may need to smarter in the future StringBuffer buffer = new StringBuffer(); buffer.append((char) nextByte); readUntilEndOfLine(is, buffer); retval = buffer.toString(); break; } case '(': { StringBuilder buffer = new StringBuilder(); int stringByte = is.read(); while (stringByte != -1 && stringByte != ')') { buffer.append((char) stringByte); stringByte = is.read(); } retval = buffer.toString(); break; } case '>': { int secondCloseBrace = is.read(); if (secondCloseBrace == '>') { retval = MARK_END_OF_DICTIONARY; } else { throw new IOException( MessageLocalization.getComposedMessage("error.expected.the.end.of.a.dictionary")); } break; } case ']': { retval = MARK_END_OF_ARRAY; break; } case '[': { List list = new ArrayList<>(); Object nextToken = parseNextToken(is); while (nextToken != MARK_END_OF_ARRAY) { list.add(nextToken); nextToken = parseNextToken(is); } retval = list; break; } case '<': { int theNextByte = is.read(); if (theNextByte == '<') { Map result = new HashMap<>(); //we are reading a dictionary Object key = parseNextToken(is); while (key instanceof LiteralName && key != MARK_END_OF_DICTIONARY) { Object value = parseNextToken(is); result.put(((LiteralName) key).name, value); key = parseNextToken(is); } retval = result; } else { //won't read more than 512 bytes int multiplyer = 16; int bufferIndex = -1; while (theNextByte != -1 && theNextByte != '>') { int intValue = 0; if (theNextByte == ' ' || theNextByte == '\t' || theNextByte == '\n' || theNextByte == '\r' || theNextByte == '\f') { theNextByte = is.read(); continue; } else if (theNextByte >= '0' && theNextByte <= '9') { intValue = theNextByte - '0'; } else if (theNextByte >= 'A' && theNextByte <= 'F') { intValue = 10 + theNextByte - 'A'; } else if (theNextByte >= 'a' && theNextByte <= 'f') { intValue = 10 + theNextByte - 'a'; } else { throw new IOException(MessageLocalization.getComposedMessage( "error.expected.hex.character.and.not.char.thenextbyte.1", theNextByte)); } intValue *= multiplyer; if (multiplyer == 16) { bufferIndex++; tokenParserByteBuffer[bufferIndex] = 0; multiplyer = 1; } else { multiplyer = 16; } tokenParserByteBuffer[bufferIndex] += (byte) intValue; theNextByte = is.read(); } byte[] finalResult = new byte[bufferIndex + 1]; System.arraycopy(tokenParserByteBuffer, 0, finalResult, 0, bufferIndex + 1); retval = finalResult; } break; } case '/': { StringBuilder buffer = new StringBuilder(); int stringByte = is.read(); while (!isWhitespaceOrEOF(stringByte)) { buffer.append((char) stringByte); stringByte = is.read(); } retval = new LiteralName(buffer.toString()); break; } case -1: { //EOF return null; break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { StringBuilder buffer = new StringBuilder(); buffer.append((char) nextByte); nextByte = is.read(); while (!isWhitespaceOrEOF(nextByte) && (Character.isDigit((char) nextByte) || nextByte == '.')) { buffer.append((char) nextByte); nextByte = is.read(); } is.unread(nextByte); String value = buffer.toString(); if (value.indexOf('.') >= 0) { retval = Double.valueOf(value); } else { retval = Integer.valueOf(buffer.toString()); } break; } default: { StringBuilder buffer = new StringBuilder(); buffer.append((char) nextByte); nextByte = is.read(); while (!isWhitespaceOrEOF(nextByte)) { buffer.append((char) nextByte); nextByte = is.read(); } retval = new Operator(buffer.toString()); break; } } return retval; } private void readUntilEndOfLine(InputStream is, StringBuffer buf) throws IOException { int nextByte = is.read(); while (nextByte != -1 && nextByte != 0x0D && nextByte != 0x0A) { buf.append((char) nextByte); nextByte = is.read(); } } private boolean isWhitespaceOrEOF(int aByte) { return aByte == -1 || aByte == 0x20 || aByte == 0x0D || aByte == 0x0A; } private void increment(byte[] data) { increment(data, data.length - 1); } private void increment(byte[] data, int position) { if (position > 0 && (data[position] + 256) % 256 == 255) { data[position] = 0; increment(data, position - 1); } else { data[position] = (byte) (data[position] + 1); } } private String createStringFromBytes(byte[] bytes) throws IOException { String retval = null; if (bytes.length == 1) { retval = new String(bytes); } else { retval = new String(bytes, StandardCharsets.UTF_16BE); } return retval; } private int compare(byte[] first, byte[] second) { int retval = 1; boolean done = false; for (int i = 0; i < first.length && !done; i++) { if (first[i] == second[i]) { //move to next position } else if (((first[i] + 256) % 256) < ((second[i] + 256) % 256)) { done = true; retval = -1; } else { done = true; retval = 1; } } return retval; } /** * Internal class. */ private class LiteralName { private String name; private LiteralName(String theName) { name = theName; } } /** * Internal class. */ private class Operator { private String op; private Operator(String theOp) { op = theOp; } } }