All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.fontbox.cmap.CMap Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.fontbox.cmap;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * This class represents a CMap file.
 *
 * @author Ben Litchfield
 */
public class CMap
{
    private static final Log LOG = LogFactory.getLog(CMap.class);

    private int wmode = 0;
    private String cmapName = null;
    private String cmapVersion = null;
    private int cmapType = -1;

    private String registry = null;
    private String ordering = null;
    private int supplement = 0;

    private int minCodeLength = 4;
    private int maxCodeLength;

    // code lengths
    private final List codespaceRanges = new ArrayList();

    // Unicode mappings
    private final Map charToUnicode = new HashMap();

    // inverted map
    private final Map  unicodeToByteCodes = new HashMap();

    // CID mappings
    private final Map codeToCid = new HashMap();
    private final List codeToCidRanges = new ArrayList();

    private static final String SPACE = " ";
    private int spaceMapping = -1;

    /**
     * Creates a new instance of CMap.
     */
    CMap()
    {
    }

    /**
     * This will tell if this cmap has any CID mappings.
     * 
     * @return true If there are any CID mappings, false otherwise.
     */
    public boolean hasCIDMappings()
    {
        return !codeToCid.isEmpty() || !codeToCidRanges.isEmpty();
    }

    /**
     * This will tell if this cmap has any Unicode mappings.
     *
     * @return true If there are any Unicode mappings, false otherwise.
     */
    public boolean hasUnicodeMappings()
    {
        return !charToUnicode.isEmpty();
    }

    /**
     * Returns the sequence of Unicode characters for the given character code.
     *
     * @param code character code
     * @return Unicode characters (may be more than one, e.g "fi" ligature)
     */
    public String toUnicode(int code)
    {
        return charToUnicode.get(code);
    }

    /**
     * Reads a character code from a string in the content stream.
     * 

See "CMap Mapping" and "Handling Undefined Characters" in PDF32000 for more details. * * @param in string stream * @return character code * @throws IOException if there was an error reading the stream or CMap */ public int readCode(InputStream in) throws IOException { byte[] bytes = new byte[maxCodeLength]; in.read(bytes,0,minCodeLength); in.mark(maxCodeLength); for (int i = minCodeLength-1; i < maxCodeLength; i++) { final int byteCount = i+1; for (CodespaceRange range : codespaceRanges) { if (range.isFullMatch(bytes, byteCount)) { return toInt(bytes, byteCount); } } if (byteCount < maxCodeLength) { bytes[byteCount] = (byte)in.read(); } } if (LOG.isWarnEnabled()) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < maxCodeLength; ++i) { sb.append(String.format("0x%02X (%04o) ", bytes[i], bytes[i])); } LOG.warn("Invalid character code sequence " + sb + "in CMap " + cmapName); } // PDFBOX-4811 reposition to where we were after initial read if (in.markSupported()) { in.reset(); } else { LOG.warn("mark() and reset() not supported, " + (maxCodeLength - 1) + " bytes have been skipped"); } return toInt(bytes, minCodeLength); // Adobe Reader behavior } /** * Returns an int for the given byte array */ static int toInt(byte[] data, int dataLen) { int code = 0; for (int i = 0; i < dataLen; ++i) { code <<= 8; code |= (data[i] & 0xFF); } return code; } /** * Returns the CID for the given character code. * * @param code character code * @return CID */ public int toCID(int code) { Integer cid = codeToCid.get(code); if (cid != null) { return cid; } for (CIDRange range : codeToCidRanges) { int ch = range.map((char)code); if (ch != -1) { return ch; } } return 0; } /** * Convert the given part of a byte array to an integer. * @param data the byte array * @param offset The offset into the byte array. * @param length The length of the data we are getting. * @return the resulting integer */ private int getCodeFromArray( byte[] data, int offset, int length ) { int code = 0; for( int i=0; i





© 2015 - 2025 Weber Informatics LLC | Privacy Policy