org.apache.fontbox.cmap.CMap Maven / Gradle / Ivy
Show all versions of org.apache.fop Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.fontbox.cmap;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* This class represents a CMap file.
*
* @author Ben Litchfield
*/
public class CMap
{
private static final Log LOG = LogFactory.getLog(CMap.class);
private int wmode = 0;
private String cmapName = null;
private String cmapVersion = null;
private int cmapType = -1;
private String registry = null;
private String ordering = null;
private int supplement = 0;
private int minCodeLength = 4;
private int maxCodeLength;
// code lengths
private final List codespaceRanges = new ArrayList();
// Unicode mappings
private final Map charToUnicode = new HashMap();
// inverted map
private final Map unicodeToByteCodes = new HashMap();
// CID mappings
private final Map codeToCid = new HashMap();
private final List codeToCidRanges = new ArrayList();
private static final String SPACE = " ";
private int spaceMapping = -1;
/**
* Creates a new instance of CMap.
*/
CMap()
{
}
/**
* This will tell if this cmap has any CID mappings.
*
* @return true If there are any CID mappings, false otherwise.
*/
public boolean hasCIDMappings()
{
return !codeToCid.isEmpty() || !codeToCidRanges.isEmpty();
}
/**
* This will tell if this cmap has any Unicode mappings.
*
* @return true If there are any Unicode mappings, false otherwise.
*/
public boolean hasUnicodeMappings()
{
return !charToUnicode.isEmpty();
}
/**
* Returns the sequence of Unicode characters for the given character code.
*
* @param code character code
* @return Unicode characters (may be more than one, e.g "fi" ligature)
*/
public String toUnicode(int code)
{
return charToUnicode.get(code);
}
/**
* Reads a character code from a string in the content stream.
* See "CMap Mapping" and "Handling Undefined Characters" in PDF32000 for more details.
*
* @param in string stream
* @return character code
* @throws IOException if there was an error reading the stream or CMap
*/
public int readCode(InputStream in) throws IOException
{
byte[] bytes = new byte[maxCodeLength];
in.read(bytes,0,minCodeLength);
in.mark(maxCodeLength);
for (int i = minCodeLength-1; i < maxCodeLength; i++)
{
final int byteCount = i+1;
for (CodespaceRange range : codespaceRanges)
{
if (range.isFullMatch(bytes, byteCount))
{
return toInt(bytes, byteCount);
}
}
if (byteCount < maxCodeLength)
{
bytes[byteCount] = (byte)in.read();
}
}
if (LOG.isWarnEnabled())
{
StringBuilder sb = new StringBuilder();
for (int i = 0; i < maxCodeLength; ++i)
{
sb.append(String.format("0x%02X (%04o) ", bytes[i], bytes[i]));
}
LOG.warn("Invalid character code sequence " + sb + "in CMap " + cmapName);
}
// PDFBOX-4811 reposition to where we were after initial read
if (in.markSupported())
{
in.reset();
}
else
{
LOG.warn("mark() and reset() not supported, " + (maxCodeLength - 1) +
" bytes have been skipped");
}
return toInt(bytes, minCodeLength); // Adobe Reader behavior
}
/**
* Returns an int for the given byte array
*/
static int toInt(byte[] data, int dataLen)
{
int code = 0;
for (int i = 0; i < dataLen; ++i)
{
code <<= 8;
code |= (data[i] & 0xFF);
}
return code;
}
/**
* Returns the CID for the given character code.
*
* @param code character code
* @return CID
*/
public int toCID(int code)
{
Integer cid = codeToCid.get(code);
if (cid != null)
{
return cid;
}
for (CIDRange range : codeToCidRanges)
{
int ch = range.map((char)code);
if (ch != -1)
{
return ch;
}
}
return 0;
}
/**
* Convert the given part of a byte array to an integer.
* @param data the byte array
* @param offset The offset into the byte array.
* @param length The length of the data we are getting.
* @return the resulting integer
*/
private int getCodeFromArray( byte[] data, int offset, int length )
{
int code = 0;
for( int i=0; i