com.itextpdf.text.pdf.FontDetails Maven / Gradle / Ivy

Go to download
/*
 * $Id: b7672bd487ba8c2ae2c42c52346ed829dd03d8df $
 *
 * This file is part of the iText (R) project.
 * Copyright (c) 1998-2016 iText Group NV
 * Authors: Bruno Lowagie, Paulo Soares, et al.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License version 3
 * as published by the Free Software Foundation with the addition of the
 * following permission added to Section 15 as permitted in Section 7(a):
 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
 * ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
 * OF THIRD PARTY RIGHTS
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Affero General Public License for more details.
 * You should have received a copy of the GNU Affero General Public License
 * along with this program; if not, see http://www.gnu.org/licenses or write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
 * http://itextpdf.com/terms-of-use/
 *
 * The interactive user interfaces in modified source and object code versions
 * of this program must display Appropriate Legal Notices, as required under
 * Section 5 of the GNU Affero General Public License.
 *
 * In accordance with Section 7(b) of the GNU Affero General Public License,
 * a covered work must retain the producer line in every PDF that is created
 * or manipulated using iText.
 *
 * You can be released from the requirements of the license by purchasing
 * a commercial license. Buying such a license is mandatory as soon as you
 * develop commercial activities involving the iText software without
 * disclosing the source code of your own applications.
 * These activities include: offering paid services to customers as an ASP,
 * serving PDFs on the fly in a web application, shipping iText with a closed
 * source product.
 *
 * For more information, please contact iText Software Corp. at this
 * address: [email protected]
 */
package com.itextpdf.text.pdf;

import com.itextpdf.text.ExceptionConverter;
import com.itextpdf.text.Utilities;
import com.itextpdf.text.pdf.fonts.otf.Language;
import com.itextpdf.text.pdf.languages.BanglaGlyphRepositioner;
import com.itextpdf.text.pdf.languages.GlyphRepositioner;
import com.itextpdf.text.pdf.languages.IndicCompositeCharacterComparator;

import java.io.UnsupportedEncodingException;
import java.util.*;

/**
 * Each font in the document will have an instance of this class
 * where the characters used will be represented.
 *
 * @author  Paulo Soares
 */
class FontDetails {

    /**
     * The indirect reference to this font
     */
    PdfIndirectReference indirectReference;
    /**
     * The font name that appears in the document body stream
     */
    PdfName fontName;
    /**
     * The font
     */
    BaseFont baseFont;
    /**
     * The font if it's an instance of TrueTypeFontUnicode
     */
    TrueTypeFontUnicode ttu;
    /**
     * The font if it's an instance of CJKFont
     */
    CJKFont cjkFont;
    /**
     * The array used with single byte encodings
     */
    byte shortTag[];
    /**
     * The map used with double byte encodings. The key is Integer(glyph) and
     * the value is int[]{glyph, width, Unicode code}
     */
    HashMap longTag;
    /**
     * IntHashtable with CIDs of CJK glyphs that are used in the text.
     */
    IntHashtable cjkTag;
    /**
     * The font type
     */
    int fontType;
    /**
     * true if the font is symbolic
     */
    boolean symbolic;
    /**
     * Indicates if only a subset of the glyphs and widths for that particular
     * encoding should be included in the document.
     */
    protected boolean subset = true;

    /**
     * Each font used in a document has an instance of this class.
     * This class stores the characters used in the document and other
     * specifics unique to the current working document.
     * @param fontName the font name
     * @param indirectReference the indirect reference to the font
     * @param baseFont the BaseFont
     */
    FontDetails(PdfName fontName, PdfIndirectReference indirectReference, BaseFont baseFont) {
        this.fontName = fontName;
        this.indirectReference = indirectReference;
        this.baseFont = baseFont;
        fontType = baseFont.getFontType();
        switch (fontType) {
            case BaseFont.FONT_TYPE_T1:
            case BaseFont.FONT_TYPE_TT:
                shortTag = new byte[256];
                break;
            case BaseFont.FONT_TYPE_CJK:
                cjkTag = new IntHashtable();
                cjkFont = (CJKFont)baseFont;
                break;
            case BaseFont.FONT_TYPE_TTUNI:
                longTag = new HashMap();
                ttu = (TrueTypeFontUnicode)baseFont;
                symbolic = baseFont.isFontSpecific();
                break;
        }
    }

    /**
     * Gets the indirect reference to this font.
     * @return the indirect reference to this font
     */
    PdfIndirectReference getIndirectReference() {
        return indirectReference;
    }

    /**
     * Gets the font name as it appears in the document body.
     * @return the font name
     */
    PdfName getFontName() {
        return fontName;
    }

    /**
     * Gets the BaseFont of this font.
     * @return the BaseFont of this font
     */
    BaseFont getBaseFont() {
        return baseFont;
    }

    Object[] convertToBytesGid(String gids) {
        if (fontType != BaseFont.FONT_TYPE_TTUNI)
            throw new IllegalArgumentException("GID require TT Unicode");
        try {
            StringBuilder sb = new StringBuilder();
            int totalWidth = 0;
            for (char gid : gids.toCharArray()) {
                int width = ttu.getGlyphWidth(gid);
                totalWidth += width;
                int vchar = ttu.GetCharFromGlyphId(gid);
                if (vchar != 0) {
                    sb.append(Utilities.convertFromUtf32(vchar));
                }
                Integer gl = Integer.valueOf(gid);
                if (!longTag.containsKey(gl))
                    longTag.put(gl, new int[]{gid, width, vchar});
            }
            return new Object[]{gids.getBytes(CJKFont.CJK_ENCODING), sb.toString(), Integer.valueOf(totalWidth)};
        }
        catch (Exception e) {
            throw new ExceptionConverter(e);
        }
    }
    
    /**
     * Converts the text into bytes to be placed in the document.
     * The conversion is done according to the font and the encoding and the characters
     * used are stored.
     * @param text the text to convert
     * @return the conversion
     */
    byte[] convertToBytes(String text) {
        byte b[] = null;
        switch (fontType) {
            case BaseFont.FONT_TYPE_T3:
                return baseFont.convertToBytes(text);
            case BaseFont.FONT_TYPE_T1:
            case BaseFont.FONT_TYPE_TT: {
                b = baseFont.convertToBytes(text);
                int len = b.length;
                for (int k = 0; k < len; ++k)
                    shortTag[b[k] & 0xff] = 1;
                break;
            }
            case BaseFont.FONT_TYPE_CJK: {
                int len = text.length();
                if (cjkFont.isIdentity()) {
                    for (int k = 0; k < len; ++k) {
                        cjkTag.put(text.charAt(k), 0);
                    }
                }
                else {
                    for (int k = 0; k < len; ++k) {
                        int val;
                        if (Utilities.isSurrogatePair(text, k)) {
                            val = Utilities.convertToUtf32(text, k);
                            k++;
                        }
                        else {
                            val = text.charAt(k);
                        }
                        cjkTag.put(cjkFont.getCidCode(val), 0);
                    }
                }
                b = cjkFont.convertToBytes(text);
                break;
            }
            case BaseFont.FONT_TYPE_DOCUMENT: {
                b = baseFont.convertToBytes(text);
                break;
            }
            case BaseFont.FONT_TYPE_TTUNI: {
                try {
                    int len = text.length();
                    int metrics[] = null;
                    char glyph[] = new char[len];
                    int i = 0;
                    if (symbolic) {
                        b = PdfEncodings.convertToBytes(text, "symboltt");
                        len = b.length;
                        for (int k = 0; k < len; ++k) {
                            metrics = ttu.getMetricsTT(b[k] & 0xff);
                            if (metrics == null)
                                continue;
                            longTag.put(Integer.valueOf(metrics[0]), new int[]{metrics[0], metrics[1], ttu.getUnicodeDifferences(b[k] & 0xff)});
                            glyph[i++] = (char)metrics[0];
                        }
                    } else if (canApplyGlyphSubstitution()) {
                    	return convertToBytesAfterGlyphSubstitution(text);
                    } else {
                    	for (int k = 0; k < len; ++k) {
                    		int val;
                    		if (Utilities.isSurrogatePair(text, k)) {
                    			val = Utilities.convertToUtf32(text, k);
                    			k++;
                    		}
                    		else {
                    			val = text.charAt(k);
                    		}
                    		metrics = ttu.getMetricsTT(val);
                    		if (metrics == null)
                    			continue;
                    		int m0 = metrics[0];
                    		Integer gl = Integer.valueOf(m0);
                    		if (!longTag.containsKey(gl))
                    			longTag.put(gl, new int[]{m0, metrics[1], val});
                    		glyph[i++] = (char)m0;
                    	}
                    }
                    glyph = Utilities.copyOfRange(glyph, 0, i);
                    b = StringUtils.convertCharsToBytes(glyph);
                }
                catch (UnsupportedEncodingException e) {
                    throw new ExceptionConverter(e);
                }
                break;
            }
        }
        return b;
    }
    
    private boolean canApplyGlyphSubstitution() {
    	return (fontType == BaseFont.FONT_TYPE_TTUNI) && (ttu.getGlyphSubstitutionMap() != null);
    }
    
    private byte[] convertToBytesAfterGlyphSubstitution(final String text) throws UnsupportedEncodingException { 
    	
    	if (!canApplyGlyphSubstitution()) {
    		throw new IllegalArgumentException("Make sure the font type if TTF Unicode and a valid GlyphSubstitutionTable exists!"); 
    	}
    	
    	 Map glyphSubstitutionMap = ttu.getGlyphSubstitutionMap();
    	
        // generate a regex from the characters to be substituted
        
        // for Indic languages: push back the CompositeCharacters with smaller length
        Set compositeCharacters = new TreeSet(new IndicCompositeCharacterComparator());
        compositeCharacters.addAll(glyphSubstitutionMap.keySet());
        
        // convert the text to a list of Glyph, also take care of the substitution
        ArrayBasedStringTokenizer tokenizer = new ArrayBasedStringTokenizer(compositeCharacters.toArray(new String[0]));
        String[] tokens = tokenizer.tokenize(text);
        
        List glyphList = new ArrayList(50);
        
        for (String token : tokens) {
            
            // first check whether this is in the substitution map
            Glyph subsGlyph = glyphSubstitutionMap.get(token);
            
            if (subsGlyph != null) {
                glyphList.add(subsGlyph);
            } else {
                // break up the string into individual characters
                for (char c : token.toCharArray()) {
                    int[] metrics = ttu.getMetricsTT(c);
                    int glyphCode = metrics[0];
                    int glyphWidth = metrics[1];
                    glyphList.add(new Glyph(glyphCode, glyphWidth, String.valueOf(c))); 
                }
            }
            
        }
        
        GlyphRepositioner glyphRepositioner = getGlyphRepositioner();
        
        if (glyphRepositioner != null) {
        	glyphRepositioner.repositionGlyphs(glyphList);
        }
        
        char[] charEncodedGlyphCodes = new char[glyphList.size()];
        
        // process each Glyph thus obtained
        for (int i = 0; i < glyphList.size(); i++) {
            Glyph glyph = glyphList.get(i); 
            charEncodedGlyphCodes[i] = (char) glyph.code;
            Integer glyphCode = Integer.valueOf(glyph.code);
            
            if (!longTag.containsKey(glyphCode)) {
                // FIXME: this is buggy as the 3rd arg. should be a String as a Glyph can represent more than 1 char
                longTag.put(glyphCode, new int[]{glyph.code,  glyph.width, glyph.chars.charAt(0)}); 
            }
        }
        
        return new String(charEncodedGlyphCodes).getBytes(CJKFont.CJK_ENCODING);
    }
    
    private GlyphRepositioner getGlyphRepositioner() {
    	Language language = ttu.getSupportedLanguage();
    	
    	if (language == null) {
    		throw new IllegalArgumentException("The supported language field cannot be null in " + ttu.getClass().getName()); 
    	}
    	
    	switch (language) {
		case BENGALI:
			return new BanglaGlyphRepositioner(Collections.unmodifiableMap(ttu.cmap31), ttu.getGlyphSubstitutionMap());
		default:
			return null;
		}
    }
    
    /**
     * Writes the font definition to the document.
     * @param writer the PdfWriter of this document
     */
    public void writeFont(PdfWriter writer) {
        try {
            switch (fontType) {
                case BaseFont.FONT_TYPE_T3:
                    baseFont.writeFont(writer, indirectReference, null);
                    break;
                case BaseFont.FONT_TYPE_T1:
                case BaseFont.FONT_TYPE_TT: {
                    int firstChar;
                    int lastChar;
                    for (firstChar = 0; firstChar < 256; ++firstChar) {
                        if (shortTag[firstChar] != 0)
                            break;
                    }
                    for (lastChar = 255; lastChar >= firstChar; --lastChar) {
                        if (shortTag[lastChar] != 0)
                            break;
                    }
                    if (firstChar > 255) {
                        firstChar = 255;
                        lastChar = 255;
                    }
                    baseFont.writeFont(writer, indirectReference, new Object[]{Integer.valueOf(firstChar), Integer.valueOf(lastChar), shortTag, Boolean.valueOf(subset)});
                    break;
                }
                case BaseFont.FONT_TYPE_CJK:
                    baseFont.writeFont(writer, indirectReference, new Object[]{cjkTag});
                    break;
                case BaseFont.FONT_TYPE_TTUNI:
                    baseFont.writeFont(writer, indirectReference, new Object[]{longTag, Boolean.valueOf(subset)});
                    break;
            }
        }
        catch(Exception e) {
            throw new ExceptionConverter(e);
        }
    }
    
    /**
     * Indicates if all the glyphs and widths for that particular
     * encoding should be included in the document.
     * @return false to include all the glyphs and widths.
     */
    public boolean isSubset() {
        return subset;
    }

    /**
     * Indicates if all the glyphs and widths for that particular
     * encoding should be included in the document. Set to false
     * to include all.
     * @param subset new value of property subset
     */
    public void setSubset(boolean subset) {
        this.subset = subset;
    }
   
}