All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sejda.sambox.pdmodel.font.PDSimpleFont Maven / Gradle / Ivy

Go to download

An Apache PDFBox fork intended to be used as PDF processor for Sejda and PDFsam related projects

There is a newer version: 3.0.21
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.sejda.sambox.pdmodel.font;

import static java.util.Objects.isNull;

import java.awt.geom.GeneralPath;
import java.io.IOException;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

import org.apache.fontbox.FontBoxFont;
import org.sejda.sambox.cos.COSBase;
import org.sejda.sambox.cos.COSDictionary;
import org.sejda.sambox.cos.COSName;
import org.sejda.sambox.pdmodel.font.encoding.DictionaryEncoding;
import org.sejda.sambox.pdmodel.font.encoding.Encoding;
import org.sejda.sambox.pdmodel.font.encoding.GlyphList;
import org.sejda.sambox.pdmodel.font.encoding.MacRomanEncoding;
import org.sejda.sambox.pdmodel.font.encoding.StandardEncoding;
import org.sejda.sambox.pdmodel.font.encoding.WinAnsiEncoding;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A simple font. Simple fonts use a PostScript encoding vector.
 *
 * @author John Hewson
 */
public abstract class PDSimpleFont extends PDFont
{
    private static final Logger LOG = LoggerFactory.getLogger(PDSimpleFont.class);

    protected Encoding encoding;
    protected GlyphList glyphList;
    private Boolean isSymbolic;
    private final Set noUnicode = new HashSet<>(); // for logging

    /**
     * Constructor for embedding.
     */
    PDSimpleFont()
    {
    }

    /**
     * Constructor for Standard 14.
     */
    PDSimpleFont(String baseFont)
    {
        super(baseFont);
        assignGlyphList(baseFont);
    }

    /**
     * Constructor.
     *
     * @param fontDictionary Font dictionary.
     */
    PDSimpleFont(COSDictionary fontDictionary) throws IOException
    {
        super(fontDictionary);
    }

    /**
     * Reads the Encoding from the Font dictionary or the embedded or substituted font file. Must be
     * called at the end of any subclass constructors.
     *
     * @throws IOException if the font file could not be read
     */
    protected void readEncoding() throws IOException
    {
        COSBase encodingBase = dict.getDictionaryObject(COSName.ENCODING);
        if (encodingBase instanceof COSName encodingName)
        {
            this.encoding = Encoding.getInstance(encodingName);
            if (this.encoding == null)
            {
                LOG.warn("Unknown encoding: " + encodingName.getName());
                this.encoding = readEncodingFromFont(); // fallback
            }
        }
        else if (encodingBase instanceof COSDictionary encodingDict)
        {
            Encoding builtIn = null;
            Boolean symbolic = getSymbolicFlag();

            COSName baseEncoding = encodingDict.getCOSName(COSName.BASE_ENCODING);

            boolean hasValidBaseEncoding =
                    baseEncoding != null && Encoding.getInstance(baseEncoding) != null;

            if (!hasValidBaseEncoding && Boolean.TRUE.equals(symbolic))
            {
                builtIn = readEncodingFromFont();
            }

            if (symbolic == null)
            {
                symbolic = false;
            }
            this.encoding = new DictionaryEncoding(encodingDict, !symbolic, builtIn);
        }
        else
        {
            this.encoding = readEncodingFromFont();
        }

        // normalise the standard 14 name, e.g "Symbol,Italic" -> "Symbol"
        String standard14Name = Standard14Fonts.getMappedFontName(getName());
        assignGlyphList(standard14Name);
    }

    /**
     * Called by readEncoding() if the encoding needs to be extracted from the font file.
     *
     * @throws IOException if the font file could not be read.
     */
    protected abstract Encoding readEncodingFromFont() throws IOException;

    /**
     * Returns the Encoding vector.
     */
    public Encoding getEncoding()
    {
        return encoding;
    }

    /**
     * Returns the Encoding vector.
     */
    public GlyphList getGlyphList()
    {
        return glyphList;
    }

    /**
     * Returns true the font is a symbolic (that is, it does not use the Adobe Standard Roman
     * character set).
     */
    public final boolean isSymbolic()
    {
        if (isSymbolic == null)
        {
            Boolean result = isFontSymbolic();
            // unless we can prove that the font is symbolic, we assume that it is not
            isSymbolic = Objects.requireNonNullElse(result, true);
        }
        return isSymbolic;
    }

    /**
     * Internal implementation of isSymbolic, allowing for the fact that the result may be
     * indeterminate.
     */
    protected Boolean isFontSymbolic()
    {
        Boolean result = getSymbolicFlag();
        if (result != null)
        {
            return result;
        }
        if (isStandard14())
        {
            String mappedName = Standard14Fonts.getMappedFontName(getName());
            return mappedName.equals("Symbol") || mappedName.equals("ZapfDingbats");
        }
        if (encoding == null)
        {
            // sanity check, should never happen
            if (!(this instanceof PDTrueTypeFont))
            {
                throw new IllegalStateException("Encoding should not be null!");
            }

            // TTF without its non-symbolic flag set must be symbolic
            return true;
        }
        if (encoding instanceof WinAnsiEncoding || encoding instanceof MacRomanEncoding
                || encoding instanceof StandardEncoding)
        {
            return false;
        }
        if (encoding instanceof DictionaryEncoding)
        {
            // each name in Differences array must also be in the latin character set
            for (String name : ((DictionaryEncoding) encoding).getDifferences().values())
            {
                if (".notdef".equals(name))
                {
                    // skip
                }
                else if (!(WinAnsiEncoding.INSTANCE.contains(name)
                        && MacRomanEncoding.INSTANCE.contains(name)
                        && StandardEncoding.INSTANCE.contains(name)))
                {
                    return true;
                }

            }
            return false;
        }
        // we don't know
        return null;
    }

    /**
     * Returns the value of the symbolic flag, allowing for the fact that the result may be
     * indeterminate.
     */
    protected final Boolean getSymbolicFlag()
    {
        if (getFontDescriptor() != null)
        {
            // fixme: isSymbolic() defaults to false if the flag is missing so we can't trust this
            return getFontDescriptor().isSymbolic();
        }
        return null;
    }

    @Override
    public String toUnicode(int code) throws IOException
    {
        return toUnicode(code, GlyphList.getAdobeGlyphList());
    }

    @Override
    public String toUnicode(int code, GlyphList customGlyphList) throws IOException
    {
        // allow the glyph list to be overridden for the purpose of extracting Unicode
        // we only do this when the font's glyph list is the AGL, to avoid breaking Zapf Dingbats
        GlyphList unicodeGlyphList;
        if (this.glyphList == GlyphList.getAdobeGlyphList())
        {
            unicodeGlyphList = customGlyphList;
        }
        else
        {
            unicodeGlyphList = this.glyphList;
        }

        // first try to use a ToUnicode CMap
        String unicode = super.toUnicode(code);
        if (unicode != null)
        {
            return unicode;
        }

        // if the font is a "simple font" and uses MacRoman/MacExpert/WinAnsi[Encoding]
        // or has Differences with names from only Adobe Standard and/or Symbol, then:
        //
        // a) Map the character codes to names
        // b) Look up the name in the Adobe Glyph List to obtain the Unicode value

        String name = null;
        if (encoding != null)
        {
            name = encoding.getName(code);
            unicode = unicodeGlyphList.toUnicode(name);
            if (unicode != null)
            {
                return unicode;
            }
        }

        // if no value has been produced, there is no way to obtain Unicode for the character.
        if (LOG.isWarnEnabled() && !noUnicode.contains(code))
        {
            // we keep track of which warnings have been issued, so we don't log multiple times
            noUnicode.add(code);
            if (name != null)
            {
                LOG.warn("No Unicode mapping for " + name + " (" + code + ") in font " + getName());
            }
            else
            {
                LOG.warn("No Unicode mapping for character code " + code + " in font " + getName());
            }
        }

        return null;
    }

    @Override
    public boolean isVertical()
    {
        return false;
    }

    @Override
    protected final float getStandard14Width(int code)
    {
        if (getStandard14AFM() != null)
        {
            String nameInAFM = getEncoding().getName(code);

            // the Adobe AFMs don't include .notdef, but Acrobat uses 250, test with PDFBOX-2334
            if (".notdef".equals(nameInAFM))
            {
                return 250f;
            }

            if ("nbspace".equals(nameInAFM))
            {
                // PDFBOX-4944: nbspace is missing in AFM files,
                // but PDF specification tells "it shall be typographically the same as SPACE"
                nameInAFM = "space";
            }
            else if ("sfthyphen".equals(nameInAFM))
            {
                // PDFBOX-5115: sfthyphen is missing in AFM files,
                // but PDF specification tells "it shall be typographically the same as hyphen"
                nameInAFM = "hyphen";
            }

            return getStandard14AFM().getCharacterWidth(nameInAFM);
        }
        throw new IllegalStateException("No AFM");
    }

    @Override
    public boolean isStandard14()
    {
        // this logic is based on Acrobat's behaviour, see see PDFBOX-2372
        // the Encoding entry cannot have Differences if we want "standard 14" font handling
        if (getEncoding() instanceof DictionaryEncoding dictionary)
        {
            if (dictionary.getDifferences().size() > 0)
            {
                // we also require that the differences are actually different, see PDFBOX-1900 with
                // the file from PDFBOX-2192 on Windows
                Encoding baseEncoding = dictionary.getBaseEncoding();
                if (isNull(baseEncoding))
                {
                    return false;
                }
                for (Map.Entry entry : dictionary.getDifferences().entrySet())
                {
                    if (!entry.getValue().equals(baseEncoding.getName(entry.getKey())))
                    {
                        return false;
                    }
                }
            }
        }
        return super.isStandard14();
    }

    /**
     * Returns the path for the character with the given name. For some fonts, GIDs may be used
     * instead of names when calling this method.
     *
     * @return glyph path
     * @throws IOException if the path could not be read
     */
    public abstract GeneralPath getPath(String name) throws IOException;

    /**
     * Returns true if the font contains the character with the given name.
     *
     * @throws IOException if the path could not be read
     */
    public abstract boolean hasGlyph(String name) throws IOException;

    /**
     * Returns the embedded or system font used for rendering. This is never null.
     */
    public abstract FontBoxFont getFontBoxFont();

    @Override
    public void addToSubset(int codePoint)
    {
        throw new UnsupportedOperationException();
    }

    @Override
    public void subset() throws IOException
    {
        // only TTF subsetting via PDType0Font is currently supported
        throw new UnsupportedOperationException();
    }

    @Override
    public boolean willBeSubset()
    {
        return false;
    }

    @Override
    public boolean hasExplicitWidth(int code) throws IOException
    {
        if (dict.containsKey(COSName.WIDTHS))
        {
            int firstChar = dict.getInt(COSName.FIRST_CHAR, -1);
            return code >= firstChar && code - firstChar < getWidths().size();
        }
        return false;
    }

    private void assignGlyphList(String baseFont)
    {
        // assign the glyph list based on the font
        if ("ZapfDingbats".equals(baseFont))
        {
            glyphList = GlyphList.getZapfDingbats();
        }
        else
        {
            glyphList = GlyphList.getAdobeGlyphList();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy