All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.fonts.tt.CMAP Maven / Gradle / Ivy

The newest version!
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2017 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
 @LICENSE@
 *
 * ---------------
 * CMAP.java
 * ---------------
 */
package org.jpedal.fonts.tt;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import org.jpedal.fonts.StandardFonts;
import org.jpedal.utils.LogWriter;

public class CMAP extends Table {

    protected int[][] glyphIndexToChar;

    private boolean remapType4;

    private boolean hasFormatZero;

    private int[] glyphToIndex;

    //flag 6 and use if not able to map elsewhere
    private boolean hasSix;

    //flag 4 
    private boolean hasFormat4;
    private boolean hasFormat6;

    private int lastFormat4Found = -1;

    //used by format 6
    private int firstCode = -1;
    private int entryCount = -1;

    //used by format 4
    private int segCount;

    /**
     * which type of mapping to use
     */
    private int fontMapping;

    //used by format 4
    protected int[] endCode;
    protected int[] startCode;
    protected int[] idDelta;
    protected int[] idRangeOffset;
    protected int[] glyphIdArray;
    private int[] f6glyphIdArray;
    private int[] offset;

    //used by Format 12
    int nGroups;
    private int[] startCharCode;
    private int[] endCharCode;
    private int[] startGlyphCode;

    /**
     * CMap format used -1 shows not set
     */
    protected int[] CMAPformats, CMAPlength, CMAPlang, CMAPsegCount, CMAPsearchRange, CMAPentrySelector, CMAPrangeShift, CMAPreserved;

    /*Platform-specific ID list*/
//	private static String[] PlatformSpecificID={"Roman","Japanese","Traditional Chinese","Korean",
//			"Arabic","Hebrew","Greek","Russian",
//			"RSymbol","Devanagari","Gurmukhi","Gujarati",
//			"Oriya","Bengali","Tamil","Telugu",
//			"Kannada","Malayalam","Sinhalese","Burmese",
//			"Khmer","Thai","Laotian","Georgian",
//			"Armenian","Simplified Chinese","Tibetan","Mongolian",
//			"Geez","Slavic","Vietnamese","Sindhi","(Uninterpreted)"};
//
    /*Platform-specific ID list*/
    //private static String[] PlatformIDName={"Unicode","Macintosh","Reserved","Microsoft"};

    /**
     * shows which encoding used
     */
    protected int[] platformID;

    private static final Map exceptions;

    /*set up differences from Mac Roman*/
    static {

        exceptions = new HashMap();

        final String[] keys = {"notequal", "infinity", "lessequal", "greaterequal",
                "partialdiff", "summation", "product", "pi",
                "integral", "Omega", "radical", "approxequal",
                "Delta", "lozenge", "Euro", "apple"};

        final int[] values = {173, 176, 178, 179,
                182, 183, 184, 185,
                186, 189, 195, 197,
                198, 215, 219, 240};

        for (int i = 0; i < values.length; i++) {
            exceptions.put(keys[i], values[i]);
        }
        StandardFonts.checkLoaded(StandardFonts.WIN);

    }

    /**
     * which CMAP to use to decode the font
     */
    private int formatToUse;

    protected int id, numberSubtables;

    protected int[] CMAPsubtables, platformSpecificID;

    private int fontEncoding;

    public CMAP(final FontFile2 currentFontFile, final int startPointer) {

        final boolean debug = false;

        if (debug) {
            System.out.println("CMAP " + this);
        }

        //LogWriter.writeMethod("{readCMAPTable}", 0);

        //read 'cmap' table
        if (startPointer == 0) {
            LogWriter.writeLog("No CMAP table found");
        } else {

            id = currentFontFile.getNextUint16(); //id
            numberSubtables = currentFontFile.getNextUint16();

            //read the subtables
            CMAPsubtables = new int[numberSubtables];
            platformID = new int[numberSubtables];
            platformSpecificID = new int[numberSubtables];
            CMAPformats = new int[numberSubtables];
            CMAPsearchRange = new int[numberSubtables];
            CMAPentrySelector = new int[numberSubtables];
            CMAPrangeShift = new int[numberSubtables];
            CMAPreserved = new int[numberSubtables];
            CMAPsegCount = new int[numberSubtables];
            CMAPlength = new int[numberSubtables];
            CMAPlang = new int[numberSubtables];
            glyphIndexToChar = new int[numberSubtables][256];

            glyphToIndex = new int[256];

            for (int i = 0; i < numberSubtables; i++) {

                platformID[i] = currentFontFile.getNextUint16();
                platformSpecificID[i] = currentFontFile.getNextUint16();
                CMAPsubtables[i] = currentFontFile.getNextUint32();

                if (debug) {
                    System.out.println("IDs platformID=" + platformID[i] + " platformSpecificID=" + platformSpecificID[i] + " CMAPsubtables=" + CMAPsubtables[i]);
                }
                //System.out.println(PlatformID[platformID[i]]+" "+PlatformSpecificID[platformSpecificID[i]]+CMAPsubtables[i]);

            }

            //now read each subtable
            for (int j = 0; j < numberSubtables; j++) {
                currentFontFile.selectTable(FontFile2.CMAP);
                currentFontFile.skip(CMAPsubtables[j]);

                //assume 16 bit format to start
                CMAPformats[j] = currentFontFile.getNextUint16();
                CMAPlength[j] = currentFontFile.getNextUint16();
                CMAPlang[j] = currentFontFile.getNextUint16(); //lang

                if (debug) {
                    System.out.println(j + " type=" + CMAPformats[j] + " length=" + CMAPlength[j] + " lang=" + CMAPlang[j]);
                }
                //flag if present
                if (CMAPformats[j] == 6) {
                    hasSix = true;
                }

                if (CMAPformats[j] == 0 && CMAPlength[j] == 262) {
                    readFormatZeroTable(currentFontFile, j);
                } else if (CMAPformats[j] == 4) {

                    readFormat4Table(j, currentFontFile);

                } else if (CMAPformats[j] == 6) {
                    readFormat6Table(currentFontFile);

                } else if (CMAPformats[j] == 12) {

                    readFormat12Table(currentFontFile);

                } else {
                    //System.out.println("Unsupported Format "+CMAPformats[j]);
                    //reset to avoid setting
                    CMAPformats[j] = -1;

                }
            }
        }
    }

    private void readFormat4Table(final int j, final FontFile2 currentFontFile) {

        //read values
        CMAPsegCount[j] = currentFontFile.getNextUint16();
        segCount = CMAPsegCount[j] / 2;
        CMAPsearchRange[j] = currentFontFile.getNextUint16(); //searchrange
        CMAPentrySelector[j] = currentFontFile.getNextUint16(); //entrySelector
        CMAPrangeShift[j] = currentFontFile.getNextUint16(); //rangeShift

        //check current format 4 is greater than previous or vice versa and act accordingly
        //because some font files have more than one format 4 subtables with different length
        if (hasFormat4) {
            if (CMAPlength[lastFormat4Found] > CMAPlength[j]) {
                CMAPlength[j] = CMAPlength[lastFormat4Found];
                CMAPsegCount[j] = CMAPsegCount[lastFormat4Found];
                CMAPsearchRange[j] = CMAPsearchRange[lastFormat4Found]; //searchrange
                CMAPentrySelector[j] = CMAPentrySelector[lastFormat4Found]; //entrySelector
                CMAPrangeShift[j] = CMAPrangeShift[lastFormat4Found]; //rangeShift
                return;
            } else if (CMAPlength[lastFormat4Found] < CMAPlength[j]) {
                CMAPlength[lastFormat4Found] = CMAPlength[j];
                CMAPsegCount[lastFormat4Found] = CMAPsegCount[j];
                CMAPsearchRange[lastFormat4Found] = CMAPsearchRange[j]; //searchrange
                CMAPentrySelector[lastFormat4Found] = CMAPentrySelector[j]; //entrySelector
                CMAPrangeShift[lastFormat4Found] = CMAPrangeShift[j]; //rangeShift
            }
        }

        lastFormat4Found = j;
        hasFormat4 = true;

        //read tables and initialise size of arrays
        endCode = new int[segCount];
        for (int i = 0; i < segCount; i++) {
            endCode[i] = currentFontFile.getNextUint16();
        }
        CMAPreserved[j] = currentFontFile.getNextUint16(); //reserved (should be zero)
        startCode = new int[segCount];
        for (int i = 0; i < segCount; i++) {
            startCode[i] = currentFontFile.getNextUint16();
        }
        idDelta = new int[segCount];
        for (int i = 0; i < segCount; i++) {
            idDelta[i] = currentFontFile.getNextUint16();
        }
        idRangeOffset = new int[segCount];
        for (int i = 0; i < segCount; i++) {
            idRangeOffset[i] = currentFontFile.getNextUint16();
        }
        /*create offsets*/
        offset = new int[segCount];
        int diff, cumulative = 0;
        for (int i = 0; i < segCount; i++) {

            if (idDelta[i] == 0) { // && startCode[i]!=endCode[i]){
                offset[i] = cumulative;
                diff = 1 + endCode[i] - startCode[i];

                //fixes bug in mapping theSansOffice tff font
                if (startCode[i] == endCode[i] && idRangeOffset[i] == 0) {
                    diff = 0;
                }

                cumulative += diff;
            }
        }

        // glyphIdArray at end
        final int count = (CMAPlength[j] - 16 - (segCount * 8)) / 2;
        glyphIdArray = new int[count];
        for (int i = 0; i < count; i++) {
            glyphIdArray[i] = currentFontFile.getNextUint16();
        }
    }

    private void readFormat6Table(final FontFile2 currentFontFile) {
        hasFormat6 = true;
        firstCode = currentFontFile.getNextUint16();
        entryCount = currentFontFile.getNextUint16();

        f6glyphIdArray = new int[firstCode + entryCount];
        for (int jj = 0; jj < entryCount; jj++) {
            f6glyphIdArray[jj + firstCode] = currentFontFile.getNextUint16();
        }
    }

    private void readFormat12Table(final FontFile2 currentFontFile) {
        currentFontFile.getNextUint16(); //length //not what it says in spec but what I found in file
        currentFontFile.getNextUint32(); //lang

        nGroups = currentFontFile.getNextUint32();

        startCharCode = new int[nGroups];
        endCharCode = new int[nGroups];
        startGlyphCode = new int[nGroups];

        for (int ii = 0; ii < nGroups; ii++) {

            startCharCode[ii] = currentFontFile.getNextUint32();
            endCharCode[ii] = currentFontFile.getNextUint32();
            startGlyphCode[ii] = currentFontFile.getNextUint32();
        }
    }

    private void readFormatZeroTable(final FontFile2 currentFontFile, final int j) {
        hasFormatZero = true;

        for (int glyphNum = 0; glyphNum < 256; glyphNum++) {

            final int index = currentFontFile.getNextUint8();
            glyphIndexToChar[j][glyphNum] = index;
            glyphToIndex[index] = glyphNum;

        }
    }

    public CMAP() {
    }


    /**
     * convert raw glyph number to Character code
     */
    public int convertIndexToCharacterCode(final String glyph, int index) {

        int index2 = -1;
        final int rawIndex = index;
        int format = CMAPformats[formatToUse];

        final boolean debugMapping = false; //(index==223);

        if (debugMapping) {
            System.out.println(glyph + " fontMapping=" + fontMapping + " index=" + index + ' ' + remapType4);
        }

        /*
         * convert index if needed
         */
        if ((fontMapping == 1 || fontMapping == 2 || fontMapping == 3 || (fontMapping == 4 && remapType4))) { //) && (!"notdef".equals(glyph))) {

            if (glyph != null && !"notdef".equals(glyph)) {
                index2 = index; //StandardFonts.lookupCharacterIndex(glyph,StandardFonts.WIN);

                index = StandardFonts.getAdobeMap(glyph);

            } else if (exceptions.containsKey(glyph)) {
                index = exceptions.get(glyph);
            }
        }

        int value = -1;

        //exception found in Itext
        if (rawIndex == 128 && endCode != null && "Euro".equals(glyph)) {
            value = getFormat4Value(8364, value);
        } else if (format == 0) { //if no cmap use also identity

            //hack
            if (index > 255) {
                index = 0;
            }

            value = glyphIndexToChar[formatToUse][index];
            if (value == 0 && index2 != -1) {
                value = glyphIndexToChar[formatToUse][index2];
            }

            //if no Format 0 check format 4 (see case 26831)
            if (value == 0) {
                final int format4Value = getFormat4Value(index, 0);
                if (format4Value > 0) {
                    value = format4Value;
                }
            }

        } else if (format == 4) {

            value = getFormat4Value(index, value);

            //hack for odd value in customer file
            if (value == -1) {

                if (index > 0xf000) {
                    value = getFormat4Value(index - 0xf000, value);
                } else {
                    value = getFormat4Value(index + 0xf000, value);
                }

            }

            //see 18113 fixes ligatures on page
            if (value == -1) {
                value = getFormat4Value(rawIndex + 0xf000, value);
            }
        } else if (format == 12) {
            value = getFormat12Value(index, debugMapping, value);
        }

        //second attempt if no value found
        if (value == -1 && hasSix) {
            index = rawIndex;
            format = 6;
        }

        if (format == 6) {

            if (fontEncoding != 1) {
                index = StandardFonts.lookupCharacterIndex(glyph, StandardFonts.MAC);
            }

            if (index >= f6glyphIdArray.length) {
                value = 0;
            } else {
                value = f6glyphIdArray[index];
            }
        }

        if (debugMapping) {
            System.out.println("returns " + value + ' ' + this);
        }

        return value;
    }

    /**
     * lookup tables similar to format 4
     * see https://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html
     */
    private int getFormat12Value(final int index, final boolean debugMapping, int value) {

        /*
         * cycle through tables and then add offset to Glyph start
         */
        for (int i = 0; i < nGroups; i++) {

            if (debugMapping) {
                System.out.println("table=" + i + " start=" + startCharCode[i] + ' ' + index +
                        " end=" + endCharCode[i] + " glypgStartCode[i]=" + startGlyphCode[i]);
            }

            if (endCharCode[i] >= index && startCharCode[i] <= index) {

                value = startGlyphCode[i] + index - startCharCode[i];
                i = nGroups; //exit loop
            }
        }

        return value;
    }

    private int getFormat4Value(final int index, int value) {

        final boolean debugMapping = false;
        for (int i = 0; i < segCount; i++) {

            if (debugMapping) {
                System.out.println("Segtable=" + i + " start=" + startCode[i] + ' ' + index +
                        " end=" + endCode[i] + " idRangeOffset[i]=" + idRangeOffset[i] +
                        " offset[i]=" + offset[i] + " idRangeOffset[i]=" + idRangeOffset[i] + " idDelta[i]=" + idDelta[i]);
            }

            if (endCode[i] >= index && startCode[i] <= index) {

                final int idx;
                if (idRangeOffset[i] == 0) {

                    if (debugMapping) {
                        System.out.println("xxx=" + (idDelta[i] + index));
                    }

                    value = (idDelta[i] + index) % 65536;

                    i = segCount;
                } else {

                    idx = offset[i] + (index - startCode[i]);
                    if (idx < glyphIdArray.length) {
                        value = glyphIdArray[idx];
                    }

                    if (debugMapping) {
                        System.out.println("value=" + value + " idx=" +
                                idx + " glyphIdArrays=" + glyphIdArray[0] + ' ' +
                                glyphIdArray[1] + ' ' + glyphIdArray[2] + " offset[i]=" + offset[i] +
                                " index=" + index + " startCode[" + i + "]=" + startCode[i] + " i=" + i);
                    }

                    i = segCount;

                }
            }
        }

        return value;
    }

    /**
     * work out correct CMAP table to use.
     */
    public void setEncodingToUse(final boolean hasEncoding, final int fontEncoding, final boolean isCID) {

        final boolean encodingDebug = false;

        this.fontEncoding = fontEncoding;

        if (encodingDebug) {
            System.out.println(this + "hasEncoding=" + hasEncoding + " fontEncoding=" + fontEncoding + " isCID=" + isCID);
        }

        formatToUse = -1;

        final int count = platformID.length;

        /*case 1 */
        for (int i = 0; i < count; i++) {

            if ((platformID[i] == 3) && (CMAPformats[i] == 1 || CMAPformats[i] == 0)) {
                formatToUse = i;
                this.fontMapping = 1;
                i = count;

                if (encodingDebug) {
                    System.out.println("case1");
                }
            }
        }

        /*case 2*/
        boolean wasCase2 = false;
        if (formatToUse == -1 && hasFormatZero && !isCID) {

            for (int i = 0; i < count; i++) {
                if (platformID[i] == 1 && CMAPformats[i] == 0) {
                    formatToUse = i;

                    wasCase2 = !(glyphIndexToChar[formatToUse][223] != 0 && getFormat4Value(223, 0) == 0);

                    if (hasEncoding) {
                        fontMapping = 2;
                        StandardFonts.checkLoaded(StandardFonts.MAC);
                    } else if (!wasCase2 && platformSpecificID[formatToUse] == 0 && platformID[formatToUse] == 1) {
                        fontMapping = 3;
                        StandardFonts.checkLoaded(StandardFonts.WIN);

                    } else {
                        fontMapping = -1;
                    }

                    i = count;

                    if (encodingDebug) {
                        System.out.println("case2 fontMapping=" + fontMapping + " formatToUse=" + formatToUse + ' ' + platformSpecificID[formatToUse] + ' ' + platformID[formatToUse] + ' ' + hasEncoding + ' ' + fontEncoding + ' ' + wasCase2);
                    }
                }
            }
        }
        
        /*case 4 - no simple maps or prefer to last 1*/
        /*last check uses fl glyph and sticks to case 1 if found*/
        if (formatToUse == -1 || fontMapping == 3 || wasCase2) {
            //if((formatToUse==-1)){
            for (int i = 0; i < count; i++) {
                if ((CMAPformats[i] == 4)) {
                    formatToUse = i;
                    fontMapping = 4;

                    i = count;

                    if (encodingDebug) {
                        System.out.println("case4 fontMapping=" + fontMapping + " formatToUse=" + formatToUse + ' ' + platformSpecificID[formatToUse] + ' ' + platformID[formatToUse] + ' ' + hasEncoding + ' ' + fontEncoding);
                    }

                    if (platformSpecificID[formatToUse] == 3 && platformID[formatToUse] == 0 && count > 1 && (hasEncoding || fontEncoding == StandardFonts.STD)) {
                        remapType4 = true;
                        // System.out.println("a "+wasCase2);
                    } else if ((platformSpecificID[formatToUse] == 1 || platformSpecificID[formatToUse] == 3) &&
                            platformID[formatToUse] == 0 && hasEncoding && fontEncoding == StandardFonts.WIN) {
                        remapType4 = true;
                        //  System.out.println("b");
                    } else if (platformSpecificID[formatToUse] == 0 && platformID[formatToUse] == 0 && hasEncoding && fontEncoding == StandardFonts.WIN) {
                        remapType4 = true;
                        //  System.out.println("c");
                    } else if (platformSpecificID[formatToUse] == 1 && platformID[formatToUse] == 3 && hasEncoding && fontEncoding == StandardFonts.WIN &&
                            (wasCase2 || getFormat4Value(223, 0) == 0)) {
                        remapType4 = true;
                        // System.out.println("d "+wasCase2+ " "+isCase2+" "+(glyphIndexToChar[formatToUse][223]+" "+getFormat4Value(223, 0)));
                    } else if (platformSpecificID[formatToUse] == 1 && platformID[formatToUse] == 3 && hasEncoding && (fontEncoding == StandardFonts.MAC || (count == 1 && fontEncoding == StandardFonts.WIN))) {
                        remapType4 = true;
                        //  System.out.println("e");
                    } else if (!hasEncoding && fontEncoding != 1) {
                        remapType4 = true;
                        //  System.out.println("g");
                    }
                }
            }
        }

        
        /*case 3 - no MAC cmap in other ranges and substituting font */
        if (formatToUse == -1) {
            for (int i = 0; i < count; i++) {
                if ((CMAPformats[i] == 6)) {
                    formatToUse = i;
                    if (!hasEncoding) {
                        fontMapping = 2;
                        StandardFonts.checkLoaded(StandardFonts.MAC);
                    } else {
                        fontMapping = 6;
                    }

                    i = count;

                    if (encodingDebug) {
                        System.out.println("case3 fontMapping=" + fontMapping + " formatToUse=" + formatToUse + ' ' + platformID[formatToUse]);
                    }
                }
            }
        }

        
        /*case 5 - type12*/
        if (formatToUse == -1) {
            for (int i = 0; i < count; i++) {
                if ((CMAPformats[i] == 12)) {
                    formatToUse = i;
                    if (!hasEncoding) {
                        fontMapping = 2;
                        StandardFonts.checkLoaded(StandardFonts.MAC);
                    } else {
                        fontMapping = 12;
                    }

                    i = count;

                    if (encodingDebug) {
                        System.out.println("case5");
                    }
                }
            }
        }

        if (fontEncoding == StandardFonts.ZAPF) {
            fontMapping = 2;

            StandardFonts.checkLoaded(StandardFonts.MAC);

            if (encodingDebug) {
                System.out.println("Zapf");
            }
        }
    }

    /**
     * turn type 0 table into a list of glyph
     */
    public Map buildCharStringTable() {

        final Map glyfValues = new HashMap();
//      for(int i : glyphToIndex){

//      if(i>0){
//          glyfValues.put(glyphToIndex[i],i);
//      //System.out.println("i=" + i + " " + StandardFonts.getUnicodeChar(encodingToUse, i));
//      }
//  }        
        if (hasFormat4) {
            final ArrayList list4 = new ArrayList();
            for (int z = 0; z < segCount; z++) {
                final int total = endCode[z] - startCode[z] + 1;
                for (int q = 0; q < total; q++) {
                    list4.add(startCode[z] + q);
                }
            }
            for (final Integer i : list4) {
                glyfValues.put(i, getFormat4Value(i, 0));
            }
        } else if (hasFormat6) {
            for (int z = 0; z < entryCount; z++) {
                //System.out.println(firstCode+z+" ==> "+f6glyphIdArray[firstCode+z]);
                glyfValues.put(firstCode + z, f6glyphIdArray[firstCode + z]);
            }
        } else {
            for (int z = 0; z < glyphToIndex.length; z++) {
                if (glyphToIndex[z] > 0) {
                    glyfValues.put(glyphToIndex[z], z);
                }
            }
        }

        return glyfValues;
    }

    public int getGlyphToIndex(final int rawInt) {

        return glyphToIndex[rawInt];
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy