All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pdfbox.pdmodel.font.PDFont Maven / Gradle / Ivy

Go to download

The Apache PDFBox library is an open source Java tool for working with PDF documents.

There is a newer version: 3.0.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdmodel.font;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.fontbox.afm.AFMParser;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.afm.FontMetric;
import org.apache.fontbox.cmap.CMapParser;
import org.apache.fontbox.cmap.CMap;

import org.apache.pdfbox.encoding.Encoding;

import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSFloat;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;

import org.apache.pdfbox.pdmodel.common.COSArrayList;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.common.PDMatrix;
import org.apache.pdfbox.pdmodel.common.PDRectangle;

import org.apache.pdfbox.util.ResourceLoader;

import java.awt.Graphics;
import java.awt.geom.AffineTransform;

/**
 * This is the base class for all PDF fonts.
 *
 * @author Ben Litchfield
 * 
 */
public abstract class PDFont implements COSObjectable
{

    /**
     * Log instance.
     */
    private static final Log LOG = LogFactory.getLog(PDFont.class);

    /**
     * The cos dictionary for this font.
     */
    protected COSDictionary font;

    /**
     * This is only used if this is a font object and it has an encoding.
     */
    private Encoding fontEncoding = null;

    /**
     *  The descriptor of the font.
     */
    private PDFontDescriptor fontDescriptor = null;

    /**
     *  The font matrix.
     */
    protected PDMatrix fontMatrix = null;

    /**
     * This is only used if this is a font object and it has an encoding and it is
     * a type0 font with a cmap.
     */
    protected CMap cmap = null;

    /**
     * The CMap holding the ToUnicode mapping.
     */
    protected CMap toUnicodeCmap = null;
    
    private boolean hasToUnicode = false;
    
    private boolean widthsAreMissing = false;

    protected static Map cmapObjects =
        Collections.synchronizedMap( new HashMap() );

    /**
     *  A list a floats representing the widths.
     */
    private List widths = null;

    /**
     * The static map of the default Adobe font metrics.
     */
    private static final Map afmObjects =
        Collections.unmodifiableMap( getAdobeFontMetrics() );

    // TODO move the Map to PDType1Font as these are the 14 Standard fonts
    // which are definitely Type 1 fonts
    private static Map getAdobeFontMetrics()
    {
        Map metrics = new HashMap();
        addAdobeFontMetric( metrics, "Courier-Bold" );
        addAdobeFontMetric( metrics, "Courier-BoldOblique" );
        addAdobeFontMetric( metrics, "Courier" );
        addAdobeFontMetric( metrics, "Courier-Oblique" );
        addAdobeFontMetric( metrics, "Helvetica" );
        addAdobeFontMetric( metrics, "Helvetica-Bold" );
        addAdobeFontMetric( metrics, "Helvetica-BoldOblique" );
        addAdobeFontMetric( metrics, "Helvetica-Oblique" );
        addAdobeFontMetric( metrics, "Symbol" );
        addAdobeFontMetric( metrics, "Times-Bold" );
        addAdobeFontMetric( metrics, "Times-BoldItalic" );
        addAdobeFontMetric( metrics, "Times-Italic" );
        addAdobeFontMetric( metrics, "Times-Roman" );
        addAdobeFontMetric( metrics, "ZapfDingbats" );

        // PDFBOX-239
        addAdobeFontMetric(metrics, "Arial", "Helvetica");
        addAdobeFontMetric(metrics, "Arial,Bold", "Helvetica-Bold");
        addAdobeFontMetric(metrics, "Arial,Italic", "Helvetica-Oblique");
        addAdobeFontMetric(metrics, "Arial,BoldItalic", "Helvetica-BoldOblique");
        
        return metrics;
    }

    protected static final String resourceRootCMAP = "org/apache/pdfbox/resources/cmap/";
    private static final String resourceRootAFM = "org/apache/pdfbox/resources/afm/";

    private static void addAdobeFontMetric(
            Map metrics, String name )
    {
        addAdobeFontMetric(metrics, name, name);
    }
    
    private static void addAdobeFontMetric(Map metrics, String name, String filePrefix)
    {
        try
        {
            String resource = resourceRootAFM + filePrefix + ".afm";
            InputStream afmStream = ResourceLoader.loadResource( resource );
            if( afmStream != null )
            {
                try
                {
                    AFMParser parser = new AFMParser( afmStream );
                    parser.parse();
                    metrics.put( name, parser.getResult() );
                }
                finally
                {
                    afmStream.close();
                }
            }
        }
        catch (Exception e)
        {
            // ignore
        }
    }

    /**
     * This will clear AFM resources that are stored statically.
     * This is usually not a problem unless you want to reclaim
     * resources for a long running process.
     *
     * SPECIAL NOTE: The font calculations are currently in COSObject, which
     * is where they will reside until PDFont is mature enough to take them over.
     * PDFont is the appropriate place for them and not in COSObject but we need font
     * calculations for text extraction.  THIS METHOD WILL BE MOVED OR REMOVED
     * TO ANOTHER LOCATION IN A FUTURE VERSION OF PDFBOX.
     */
    public static void clearResources()
    {
        cmapObjects.clear();
    }

    /**
     * Constructor.
     */
    public PDFont()
    {
        font = new COSDictionary();
        font.setItem( COSName.TYPE, COSName.FONT );
    }

    /**
     * Constructor.
     *
     * @param fontDictionary The font dictionary according to the PDF specification.
     */
    public PDFont( COSDictionary fontDictionary )
    {
        font = fontDictionary;
        determineEncoding();
    }

    /**
     * This will get the font descriptor for this font.
     *
     * @return The font descriptor for this font.
     *
     */
    public PDFontDescriptor getFontDescriptor()
    {
        if(fontDescriptor == null)
        {
            COSDictionary fd = (COSDictionary)font.getDictionaryObject( COSName.FONT_DESC );
            if (fd != null)
            {
                fontDescriptor = new PDFontDescriptorDictionary( fd );
            }
            else
            {
                getAFM();
                if( afm != null )
                {
                    fontDescriptor = new PDFontDescriptorAFM( afm );
                }
            }
        }
        return fontDescriptor;
    }

    /**
     * This will set the font descriptor.
     *
     * @param fdDictionary The font descriptor.
     */
    public void setFontDescriptor( PDFontDescriptorDictionary fdDictionary )
    {
        COSDictionary dic = null;
        if( fdDictionary != null )
        {
            dic = fdDictionary.getCOSDictionary();
        }
        font.setItem( COSName.FONT_DESC, dic );
        fontDescriptor = fdDictionary;
    }

    /**
     * Determines the encoding for the font.
     * This method as to be overwritten, as there are different
     * possibilities to define a mapping.
     */
    protected abstract void determineEncoding();

    /**
     * {@inheritDoc}
     */
    public COSBase getCOSObject()
    {
        return font;
    }

    /**
     * This will get the font width for a character.
     *
     * @param c The character code to get the width for.
     * @param offset The offset into the array.
     * @param length The length of the data.
     *
     * @return The width is in 1000 unit of text space, ie 333 or 777
     *
     * @throws IOException If an error occurs while parsing.
     */
    public abstract float getFontWidth( byte[] c, int offset, int length ) throws IOException;

    /**
     * This will get the font height for a character.
     *
     * @param c The character code to get the height for.
     * @param offset The offset into the array.
     * @param length The length of the data.
     *
     * @return The height is in 1000 unit of text space, ie 333 or 777
     *
     * @throws IOException If an error occurs while parsing.
     */
    public abstract float getFontHeight( byte[] c, int offset, int length ) throws IOException;

    /**
     * This will get the width of this string for this font.
     *
     * @param string The string to get the width of.
     *
     * @return The width of the string in 1000 units of text space, ie 333 567...
     *
     * @throws IOException If there is an error getting the width information.
     */
    public float getStringWidth( String string ) throws IOException
    {
        byte[] data = string.getBytes("ISO-8859-1");
        float totalWidth = 0;
        for( int i=0; i -1)
                {
                    name = name.substring(name.indexOf("+")+1);
                }

            }
            else if ( baseFont instanceof COSString )
            {
                COSString string = (COSString)baseFont;
                name = string.getString();
            }
            if ( name != null )
            {
                afm = afmObjects.get( name );
            }
        }
        return afm;
    }

    private FontMetric afm = null;

    private COSBase encoding = null;
    /**
     * cache the {@link COSName#ENCODING} object from
     * the font's dictionary since it is called so often.
     * 

* Use this method instead of *

     *   font.getDictionaryObject(COSName.ENCODING);
     * 
* @return the encoding */ protected COSBase getEncoding() { if (encoding == null) { encoding = font.getDictionaryObject( COSName.ENCODING ); } return encoding; } /** * Set the encoding object from the fonts dictionary. * @param encodingValue the given encoding. */ protected void setEncoding(COSBase encodingValue) { font.setItem( COSName.ENCODING, encodingValue ); encoding = encodingValue; } /** * Encode the given value using the CMap of the font. * * @param code the code to encode. * @param length the byte length of the given code. * @param isCIDFont indicates that the used font is a CID font. * * @return The value of the encoded character. * @throws IOException if something went wrong */ protected String cmapEncoding( int code, int length, boolean isCIDFont, CMap sourceCmap ) throws IOException { String retval = null; // there is not sourceCmap if this is a descendant font if (sourceCmap == null) { sourceCmap = cmap; } if (sourceCmap != null) { retval = sourceCmap.lookup(code, length); if (retval == null && isCIDFont) { retval = sourceCmap.lookupCID(code); } } return retval; } /** * This will perform the encoding of a character if needed. * * @param c The character to encode. * @param offset The offset into the array to get the data * @param length The number of bytes to read. * * @return The value of the encoded character. * * @throws IOException If there is an error during the encoding. */ public String encode( byte[] c, int offset, int length ) throws IOException { String retval = null; int code = getCodeFromArray( c, offset, length ); if( toUnicodeCmap != null ) { retval = cmapEncoding(code, length, false, toUnicodeCmap); } if( retval == null && cmap != null ) { retval = cmapEncoding(code, length, false, cmap); } // there is no cmap but probably an encoding with a suitable mapping if( retval == null ) { if( fontEncoding != null ) { retval = fontEncoding.getCharacter( code ); } if( retval == null && (cmap == null || length == 2)) { retval = getStringFromArray( c, offset, length ); } } return retval; } public int encodeToCID( byte[] c, int offset, int length ) throws IOException { int code = -1; if (encode(c, offset, length) != null) { code = getCodeFromArray( c, offset, length ); } return code; } private static final String[] SINGLE_CHAR_STRING = new String[256]; private static final String[][] DOUBLE_CHAR_STRING = new String[256][256]; static { for( int i=0; i<256; i++ ) { try { SINGLE_CHAR_STRING[i] = new String( new byte[] {(byte)i}, "ISO-8859-1" ); } catch (UnsupportedEncodingException e) { // Nothing should happen here LOG.error(e,e); } for( int j=0; j<256; j++ ) { try { DOUBLE_CHAR_STRING[i][j] = new String( new byte[] {(byte)i, (byte)j}, "UTF-16BE" ); } catch (UnsupportedEncodingException e) { // Nothing should happen here LOG.error(e,e); } } } } /** * Map the given byte(s) to a string. * * @param c the byte array * @param offset the offset of the byte(s) * @param length the number of bytes, usually 1 or 2 * @return the mapped string */ protected static String getStringFromArray( byte[] c, int offset, int length ) { String retval = null; if( length == 1 ) { retval = SINGLE_CHAR_STRING[(c[offset]+256)%256]; } else if( length == 2 ) { retval = DOUBLE_CHAR_STRING[(c[offset]+256)%256][(c[offset+1]+256)%256]; } return retval; } protected CMap parseCmap( String cmapRoot, InputStream cmapStream) { CMap targetCmap = null; if( cmapStream != null ) { CMapParser parser = new CMapParser(); try { targetCmap = parser.parse( cmapRoot, cmapStream ); // limit the cache to external CMaps if (cmapRoot != null) { cmapObjects.put( targetCmap.getName(), targetCmap ); } } catch (IOException exception) { LOG.error("An error occurs while reading a CMap", exception); } } return targetCmap; } /** * The will set the encoding for this font. * * @param enc The font encoding. */ public void setFontEncoding( Encoding enc ) { fontEncoding = enc; } /** * This will get or create the encoder. * * @return The encoding to use. */ public Encoding getFontEncoding() { return fontEncoding; } /** * This will always return "Font" for fonts. * * @return The type of object that this is. */ public String getType() { return font.getNameAsString( COSName.TYPE ); } // Memorized values to avoid repeated dictionary lookups private String subtype = null; private boolean type1Font; private boolean type3Font; private boolean trueTypeFont; private boolean type0Font; /** * This will get the subtype of font, Type1, Type3, ... * * @return The type of font that this is. */ public String getSubType() { if (subtype == null) { subtype = font.getNameAsString( COSName.SUBTYPE ); type1Font = "Type1".equals(subtype); trueTypeFont = "TrueType".equals(subtype); type0Font = "Type0".equals(subtype); type3Font = "Type3".equals(subtype); } return subtype; } /** * Determines if the font is a type 1 font. * @return returns true if the font is a type 1 font */ protected boolean isType1Font() { getSubType(); return type1Font; } /** * Determines if the font is a type 3 font. * * @return returns true if the font is a type 3 font */ public boolean isType3Font() { getSubType(); return type3Font; } /** * Determines if the font is a type 0 font. * @return returns true if the font is a type 0 font */ protected boolean isType0Font() { getSubType(); return type0Font; } private boolean isTrueTypeFont() { getSubType(); return trueTypeFont; } /** * Determines if the font is a symbolic font. * * @return returns true if the font is a symbolic font */ public boolean isSymbolicFont() { if (getFontDescriptor() != null) { return getFontDescriptor().isSymbolic(); } return false; } /** * The PostScript name of the font. * * @return The postscript name of the font. */ public String getBaseFont() { return font.getNameAsString( COSName.BASE_FONT ); } /** * Set the PostScript name of the font. * * @param baseFont The postscript name for the font. */ public void setBaseFont( String baseFont ) { font.setName( COSName.BASE_FONT, baseFont ); } /** * The code for the first char or -1 if there is none. * * @return The code for the first character. */ public int getFirstChar() { return font.getInt( COSName.FIRST_CHAR, -1 ); } /** * Set the first character this font supports. * * @param firstChar The first character. */ public void setFirstChar( int firstChar ) { font.setInt( COSName.FIRST_CHAR, firstChar ); } /** * The code for the last char or -1 if there is none. * * @return The code for the last character. */ public int getLastChar() { return font.getInt( COSName.LAST_CHAR, -1 ); } /** * Set the last character this font supports. * * @param lastChar The last character. */ public void setLastChar( int lastChar ) { font.setInt( COSName.LAST_CHAR, lastChar ); } /** * The widths of the characters. This will be null for the standard 14 fonts. * * @return The widths of the characters. * */ public List getWidths() { if (widths == null && !widthsAreMissing) { COSArray array = (COSArray)font.getDictionaryObject( COSName.WIDTHS ); if (array != null) { widths = COSArrayList.convertFloatCOSArrayToList(array); } else { widthsAreMissing = true; } } return widths; } /** * Set the widths of the characters code. * * @param widthsList The widths of the character codes. */ public void setWidths(List widthsList) { widths = widthsList; font.setItem( COSName.WIDTHS, COSArrayList.converterToCOSArray( widths ) ); } /** * This will get the matrix that is used to transform glyph space to * text space. By default there are 1000 glyph units to 1 text space * unit, but type3 fonts can use any value. * * Note:If this is a type3 font then it can be modified via the PDType3Font.setFontMatrix, otherwise this * is a read-only property. * * @return The matrix to transform from glyph space to text space. */ public PDMatrix getFontMatrix() { if (fontMatrix == null) { COSArray array = (COSArray)font.getDictionaryObject( COSName.FONT_MATRIX ); if( array == null ) { array = new COSArray(); array.add( new COSFloat( 0.001f ) ); array.add( COSInteger.ZERO ); array.add( COSInteger.ZERO ); array.add( new COSFloat( 0.001f ) ); array.add( COSInteger.ZERO ); array.add( COSInteger.ZERO ); } fontMatrix = new PDMatrix(array); } return fontMatrix; } /** * This will get the fonts bounding box. * * @return The fonts bounding box. * * @throws IOException If there is an error getting the bounding box. */ public abstract PDRectangle getFontBoundingBox() throws IOException; /** * {@inheritDoc} */ public boolean equals( Object other ) { return other instanceof PDFont && ((PDFont)other).getCOSObject() == this.getCOSObject(); } /** * {@inheritDoc} */ public int hashCode() { return this.getCOSObject().hashCode(); } /** * Determines the width of the given character. * @param charCode the code of the given character * @return the width of the character */ public float getFontWidth( int charCode ) { float width = -1; int firstChar = getFirstChar(); int lastChar = getLastChar(); if (charCode >= firstChar && charCode <= lastChar) { // maybe the font doesn't provide any widths if (!widthsAreMissing) { getWidths(); if (widths != null) { Float w = widths.get(charCode - firstChar); width = w == null ? 0 : w; } } } else { PDFontDescriptor fd = getFontDescriptor(); if (fd instanceof PDFontDescriptorDictionary) { width = fd.getMissingWidth(); } } return width; } /** * Determines if a font as a ToUnicode entry. * @return true if the font has a ToUnicode entry */ protected boolean hasToUnicode() { return hasToUnicode; } /** * Sets hasToUnicode to the given value. * @param hasToUnicodeValue the given value for hasToUnicode */ protected void setHasToUnicode(boolean hasToUnicodeValue) { hasToUnicode = hasToUnicodeValue; } /** * Determines the width of the space character. * @return the width of the space character */ public abstract float getSpaceWidth(); /** * Returns the toUnicode mapping if present. * * @return the CMap representing the toUnicode mapping */ public CMap getToUnicodeCMap() { return toUnicodeCmap; } /** * Calling this will release all cached information. */ public void clear() { } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy