All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sejda.sambox.pdmodel.font.FileSystemFontProvider Maven / Gradle / Ivy

Go to download

An Apache PDFBox fork intended to be used as PDF processor for Sejda and PDFsam related projects

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.sejda.sambox.pdmodel.font;

import static java.util.Objects.nonNull;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.security.AccessControlException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;

import org.apache.fontbox.FontBoxFont;
import org.apache.fontbox.cff.CFFCIDFont;
import org.apache.fontbox.cff.CFFFont;
import org.apache.fontbox.ttf.NamingTable;
import org.apache.fontbox.ttf.OS2WindowsMetricsTable;
import org.apache.fontbox.ttf.OTFParser;
import org.apache.fontbox.ttf.OpenTypeFont;
import org.apache.fontbox.ttf.TTFParser;
import org.apache.fontbox.ttf.TrueTypeCollection;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.fontbox.type1.Type1Font;
import org.apache.fontbox.util.autodetect.FontFileFinder;
import org.sejda.commons.util.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A FontProvider which searches for fonts on the local filesystem.
 *
 * @author John Hewson
 */
public class FileSystemFontProvider extends FontProvider
{
    private static final FontCache FONT_CACHE = new FontCache(); // todo: static cache isn't ideal
    private static final String FONT_CACHE_SEPARATOR = "|";

    private static final Logger LOG = LoggerFactory.getLogger(FileSystemFontProvider.class);

    private final List fontInfoList = new ArrayList<>();

    private static class FSFontInfo extends FontInfo
    {
        private final String postScriptName;
        private final FontFormat format;
        private final CIDSystemInfo cidSystemInfo;
        private final int usWeightClass;
        private final int sFamilyClass;
        private final int ulCodePageRange1;
        private final int ulCodePageRange2;
        private final int macStyle;
        private final PDPanoseClassification panose;
        private final File file;

        private FSFontInfo(File file, FontFormat format, String postScriptName,
                CIDSystemInfo cidSystemInfo, int usWeightClass, int sFamilyClass,
                int ulCodePageRange1, int ulCodePageRange2, int macStyle, byte[] panose)
        {
            this.file = file;
            this.format = format;
            this.postScriptName = postScriptName;
            this.cidSystemInfo = cidSystemInfo;
            this.usWeightClass = usWeightClass;
            this.sFamilyClass = sFamilyClass;
            this.ulCodePageRange1 = ulCodePageRange1;
            this.ulCodePageRange2 = ulCodePageRange2;
            this.macStyle = macStyle;
            this.panose = panose != null
                    && panose.length >= PDPanoseClassification.LENGTH ? new PDPanoseClassification(
                    panose) : null;
        }

        @Override
        public String getPostScriptName()
        {
            return postScriptName;
        }

        @Override
        public FontFormat getFormat()
        {
            return format;
        }

        @Override
        public CIDSystemInfo getCIDSystemInfo()
        {
            return cidSystemInfo;
        }

        @Override
        public synchronized FontBoxFont getFont()
        {
            FontBoxFont cached = FONT_CACHE.getFont(this);
            if (cached != null)
            {
                return cached;
            }
            FontBoxFont font;
            LOG.debug("Loading {} from {}", postScriptName, file);
            switch (format)
            {
            case PFB:
                font = getType1Font(postScriptName, file);
                break;
            case TTF:
                font = getTrueTypeFont(postScriptName, file);
                break;
            case OTF:
                font = getOTFFont(postScriptName, file);
                break;
            default:
                throw new RuntimeException("can't happen");
            }
            if (font != null)
            {
                FONT_CACHE.addFont(this, font);
            }
            return font;
        }

        @Override
        public int getFamilyClass()
        {
            return sFamilyClass;
        }

        @Override
        public int getWeightClass()
        {
            return usWeightClass;
        }

        @Override
        public int getCodePageRange1()
        {
            return ulCodePageRange1;
        }

        @Override
        public int getCodePageRange2()
        {
            return ulCodePageRange2;
        }

        @Override
        public int getMacStyle()
        {
            return macStyle;
        }

        @Override
        public PDPanoseClassification getPanose()
        {
            return panose;
        }

        @Override
        public String toString()
        {
            return super.toString() + " " + file;
        }

        private TrueTypeFont getTrueTypeFont(String postScriptName, File file)
        {
            try
            {
                TrueTypeFont ttf = readTrueTypeFont(postScriptName, file);
                LOG.debug("Loaded {} from {}", postScriptName, file);
                return ttf;
            }
            catch (NullPointerException | IOException e) // TTF parser is buggy
            {
                LOG.warn("Could not load font file: " + file, e);
            }
            return null;
        }

        private TrueTypeFont readTrueTypeFont(String postScriptName, File file) throws IOException
        {
            if (file.getName().toLowerCase().endsWith(".ttc"))
            {
                @SuppressWarnings("squid:S2095")
                // ttc not closed here because it is needed later when ttf is accessed,
                // e.g. rendering PDF with non-embedded font which is in ttc file in our font directory
                TrueTypeCollection ttc = new TrueTypeCollection(file);
                TrueTypeFont ttf = null;
                try
                {
                    ttf = ttc.getFontByName(postScriptName);
                }
                catch (IOException ex)
                {
                    ttc.close();
                    throw ex;
                }
                if (ttf == null)
                {
                    ttc.close();
                    throw new IOException("Font " + postScriptName + " not found in " + file);
                }
                return ttf;
            }
            TTFParser ttfParser = new TTFParser(false, true);
            return ttfParser.parse(file);
        }

        private static OpenTypeFont getOTFFont(String postScriptName, File file)
        {
            try
            {
                if (file.getName().toLowerCase().endsWith(".ttc"))
                {
                    @SuppressWarnings("squid:S2095")
                    // ttc not closed here because it is needed later when ttf is accessed,
                    // e.g. rendering PDF with non-embedded font which is in ttc file in our font directory
                    TrueTypeCollection ttc = new TrueTypeCollection(file);
                    TrueTypeFont ttf = null;
                    try
                    {
                        ttf = ttc.getFontByName(postScriptName);
                    }
                    catch (IOException ex)
                    {
                        LOG.error(ex.getMessage(), ex);
                        ttc.close();
                        return null;
                    }
                    if (ttf == null)
                    {
                        ttc.close();
                        throw new IOException("Font " + postScriptName + " not found in " + file);
                    }
                    return (OpenTypeFont) ttf;
                }
                OTFParser parser = new OTFParser(false, true);
                OpenTypeFont otf = parser.parse(file);
                LOG.debug("Loaded {} from {}", postScriptName, file);
                return otf;
            }
            catch (IOException e)
            {
                LOG.warn("Could not load font file: " + file, e);
            }
            return null;
        }

        private static Type1Font getType1Font(String postScriptName, File file)
        {
            try (InputStream input = new FileInputStream(file))
            {
                Type1Font type1 = Type1Font.createWithPFB(input);
                LOG.debug("Loaded {} from {}", postScriptName, file);
                return type1;
            }
            catch (IOException e)
            {
                LOG.warn("Could not load font file " + file, e);
            }
            return null;
        }

    }

    /**
     * Represents ignored fonts (i.e. bitmap fonts).
     */
    private static final class FSIgnored extends FSFontInfo
    {
        private FSIgnored(File file, FontFormat format, String postScriptName)
        {
            super(file, format, postScriptName, null, 0, 0, 0, 0, 0, null);
        }

    }
    
    protected List findFontFiles()
    {
        return new FontFileFinder().find().stream().map(File::new).collect(Collectors.toList());
    }


    private void initialize()
    {
        try
        {
            LOG.trace("Will search the local system for fonts");

            // scan the local system for font files
            List files = new ArrayList<>();
            List fonts = findFontFiles();
            files.addAll(fonts);

            LOG.trace("Found {} fonts on the local system", files.size());

            if (!files.isEmpty())
            {
                // load cached FontInfo objects
                List cachedInfos = loadDiskCache(files);
                if (cachedInfos != null && !cachedInfos.isEmpty())
                {
                    fontInfoList.addAll(cachedInfos);
                }
                else
                {
                    LOG.warn("Building on-disk font cache, this may take a while");
                    scanFonts(files);
                    var executor = Executors.newSingleThreadExecutor();
                    executor.execute(this::saveDiskCache);
                    executor.shutdown();
                    LOG.info("Finished building on-disk font cache, found {} fonts",
                            fontInfoList.size());
                }
            }
        }
        catch (AccessControlException e)
        {
            LOG.error("Error accessing the file system", e);
        }
    }

    private void scanFonts(List files)
    {
        for (File file : files)
        {
            try
            {
                String filePath = file.getPath().toLowerCase();
                if (filePath.endsWith(".ttf") || filePath.endsWith(".otf"))
                {
                    addTrueTypeFont(file);
                }
                else if (filePath.toLowerCase().endsWith(".ttc") || filePath.toLowerCase()
                        .endsWith(".otc"))
                {
                    addTrueTypeCollection(file);
                }
                else if (filePath.toLowerCase().endsWith(".pfb"))
                {
                    addType1Font(file);
                }
            }
            catch (Exception e) // TTF parser is buggy
            {
                LOG.warn("Unable to load font file: " + file, e);
            }
        }
    }

    protected File getDiskCacheFile()
    {
        String path = System.getProperty("org.sambox.fontcache");
        if (isBadPath(path))
        {
            path = System.getProperty("user.home");
            if (isBadPath(path))
            {
                path = System.getProperty("java.io.tmpdir");
            }
        }
        return new File(path, ".sambox.cache");
    }

    private static boolean isBadPath(String path)
    {
        return path == null || !new File(path).isDirectory() || !new File(path).canWrite();
    }

    /**
     * Saves the font metadata cache to disk.
     */
    private void saveDiskCache()
    {
        try
        {
            File file = getDiskCacheFile();
            try (BufferedWriter writer = new BufferedWriter(new FileWriter(file)))
            {
                for (FSFontInfo fontInfo : fontInfoList)
                {
                    writer.write(fontInfo.postScriptName.trim().replace("|", "\\|"));
                    writer.write(FONT_CACHE_SEPARATOR);
                    writer.write(fontInfo.format.toString());
                    writer.write(FONT_CACHE_SEPARATOR);
                    if (fontInfo.cidSystemInfo != null)
                    {
                        writer.write(fontInfo.cidSystemInfo.getRegistry() + '-'
                                + fontInfo.cidSystemInfo.getOrdering() + '-'
                                + fontInfo.cidSystemInfo.getSupplement());
                    }
                    writer.write(FONT_CACHE_SEPARATOR);
                    if (fontInfo.usWeightClass > -1)
                    {
                        writer.write(Integer.toHexString(fontInfo.usWeightClass));
                    }
                    writer.write(FONT_CACHE_SEPARATOR);
                    if (fontInfo.sFamilyClass > -1)
                    {
                        writer.write(Integer.toHexString(fontInfo.sFamilyClass));
                    }
                    writer.write(FONT_CACHE_SEPARATOR);
                    writer.write(Integer.toHexString(fontInfo.ulCodePageRange1));
                    writer.write(FONT_CACHE_SEPARATOR);
                    writer.write(Integer.toHexString(fontInfo.ulCodePageRange2));
                    writer.write(FONT_CACHE_SEPARATOR);
                    if (fontInfo.macStyle > -1)
                    {
                        writer.write(Integer.toHexString(fontInfo.macStyle));
                    }
                    writer.write(FONT_CACHE_SEPARATOR);
                    if (fontInfo.panose != null)
                    {
                        byte[] bytes = fontInfo.panose.getBytes();
                        for (int i = 0; i < 10; i++)
                        {
                            String str = Integer.toHexString(bytes[i]);
                            if (str.length() == 1)
                            {
                                writer.write('0');
                            }
                            writer.write(str);
                        }
                    }
                    writer.write(FONT_CACHE_SEPARATOR);
                    writer.write(fontInfo.file.getAbsolutePath());
                    writer.newLine();
                }
            }
        }
        catch (IOException | SecurityException e)
        {
            LOG.error("Could not write to font cache", e);
        }
    }

    /**
     * Loads the font metadata cache from disk.
     */
    private List loadDiskCache(List files)
    {
        Set pending = new HashSet<>();
        for (File file : files)
        {
            pending.add(file.getAbsolutePath());
        }

        List results = new ArrayList<>();
        File file = getDiskCacheFile();
        boolean fileExists = false;

        try
        {
            fileExists = file.exists();
        }
        catch (SecurityException e)
        {
        }

        if (fileExists)
        {
            try (BufferedReader reader = new BufferedReader(new FileReader(file)))
            {
                String line;
                while ((line = reader.readLine()) != null)
                {
                    String[] parts = line.split("(? 0)
                    {
                        String[] ros = parts[2].split("-");
                        cidSystemInfo = new CIDSystemInfo(ros[0], ros[1], Integer.parseInt(ros[2]));
                    }
                    if (parts[3].length() > 0)
                    {
                        usWeightClass = (int) Long.parseLong(parts[3], 16);
                    }
                    if (parts[4].length() > 0)
                    {
                        sFamilyClass = (int) Long.parseLong(parts[4], 16);
                    }
                    ulCodePageRange1 = (int) Long.parseLong(parts[5], 16);
                    ulCodePageRange2 = (int) Long.parseLong(parts[6], 16);
                    if (parts[7].length() > 0)
                    {
                        macStyle = (int) Long.parseLong(parts[7], 16);
                    }
                    if (parts[8].length() > 0)
                    {
                        panose = new byte[10];
                        for (int i = 0; i < 10; i++)
                        {
                            String str = parts[8].substring(i * 2, i * 2 + 2);
                            int b = Integer.parseInt(str, 16);
                            panose[i] = (byte) (b & 0xff);
                        }
                    }
                    fontFile = new File(parts[9]);

                    if (fontFile.exists())
                    {

                        FSFontInfo info = new FSFontInfo(fontFile, format, postScriptName,
                                cidSystemInfo, usWeightClass, sFamilyClass, ulCodePageRange1,
                                ulCodePageRange2, macStyle, panose);
                        results.add(info);
                    }
                    else
                    {
                        LOG.debug("Font file {} not found, skipped", fontFile.getAbsolutePath());
                    }
                    pending.remove(fontFile.getAbsolutePath());
                }
            }
            catch (IOException e)
            {
                LOG.warn("Error loading font cache, will be re-built", e);
                return null;
            }
        }

        if (!pending.isEmpty())
        {
            // re-build the entire cache if we encounter un-cached fonts (could be optimised)
            LOG.warn("New fonts found, font cache will be re-built");
            return null;
        }

        return results;
    }

    /**
     * Adds a TTC or OTC to the file cache. To reduce memory, the parsed font is not cached.
     *
     * @throws IOException
     */
    private void addTrueTypeCollection(final File ttcFile) throws IOException
    {
        try (TrueTypeCollection ttc = new TrueTypeCollection(ttcFile))
        {
            ttc.processAllFonts(ttf -> addTrueTypeFontImpl(ttf, ttcFile));
        }
    }

    /**
     * Adds an OTF or TTF font to the file cache. To reduce memory, the parsed font is not cached.
     *
     * @throws IOException
     */
    private void addTrueTypeFont(File ttfFile) throws IOException
    {
        if (ttfFile.getPath().toLowerCase().endsWith(".otf"))
        {
            OTFParser parser = new OTFParser(false, true);
            OpenTypeFont otf = parser.parse(ttfFile);
            addTrueTypeFontImpl(otf, ttfFile);
        }
        else
        {
            TTFParser parser = new TTFParser(false, true);
            TrueTypeFont ttf = parser.parse(ttfFile);
            addTrueTypeFontImpl(ttf, ttfFile);
        }
    }

    /**
     * Adds an OTF or TTF font to the file cache. To reduce memory, the parsed font is not cached.
     */
    private void addTrueTypeFontImpl(TrueTypeFont ttf, File file) throws IOException
    {
        try
        {
            // read PostScript name, if any
            if (ttf.getName() != null)
            {
                // ignore bitmap fonts
                if (ttf.getHeader() == null)
                {
                    fontInfoList.add(new FSIgnored(file, FontFormat.TTF, ttf.getName()));
                    return;
                }
                int macStyle = ttf.getHeader().getMacStyle();

                int sFamilyClass = -1;
                int usWeightClass = -1;
                int ulCodePageRange1 = 0;
                int ulCodePageRange2 = 0;
                byte[] panose = null;

                OS2WindowsMetricsTable os2WindowsMetricsTable = ttf.getOS2Windows();

                // Apple's AAT fonts don't have an OS/2 table
                if (nonNull(os2WindowsMetricsTable))
                {
                    sFamilyClass = os2WindowsMetricsTable.getFamilyClass();
                    usWeightClass = os2WindowsMetricsTable.getWeightClass();
                    ulCodePageRange1 = (int) os2WindowsMetricsTable.getCodePageRange1();
                    ulCodePageRange2 = (int) os2WindowsMetricsTable.getCodePageRange2();
                    panose = os2WindowsMetricsTable.getPanose();
                }

                String format;
                if (ttf instanceof OpenTypeFont && ((OpenTypeFont) ttf).isPostScript())
                {
                    format = "OTF";
                    CFFFont cff = ((OpenTypeFont) ttf).getCFF().getFont();
                    CIDSystemInfo ros = null;
                    if (cff instanceof CFFCIDFont cidFont)
                    {
                        String registry = cidFont.getRegistry();
                        String ordering = cidFont.getOrdering();
                        int supplement = cidFont.getSupplement();
                        ros = new CIDSystemInfo(registry, ordering, supplement);
                    }
                    fontInfoList.add(
                            new FSFontInfo(file, FontFormat.OTF, ttf.getName(), ros, usWeightClass,
                                    sFamilyClass, ulCodePageRange1, ulCodePageRange2, macStyle,
                                    panose));
                }
                else
                {
                    CIDSystemInfo ros = null;
                    if (ttf.getTableMap().containsKey("gcid"))
                    {
                        // Apple's AAT fonts have a "gcid" table with CID info
                        byte[] bytes = ttf.getTableBytes(ttf.getTableMap().get("gcid"));
                        String reg = new String(bytes, 10, 64, StandardCharsets.US_ASCII);
                        String registryName = reg.substring(0, reg.indexOf('\0'));
                        String ord = new String(bytes, 76, 64, StandardCharsets.US_ASCII);
                        String orderName = ord.substring(0, ord.indexOf('\0'));
                        int supplementVersion = bytes[140] << 8 & (bytes[141] & 0xFF);
                        ros = new CIDSystemInfo(registryName, orderName, supplementVersion);
                    }

                    format = "TTF";
                    fontInfoList.add(
                            new FSFontInfo(file, FontFormat.TTF, ttf.getName(), ros, usWeightClass,
                                    sFamilyClass, ulCodePageRange1, ulCodePageRange2, macStyle,
                                    panose));
                }

                if (LOG.isTraceEnabled())
                {
                    NamingTable name = ttf.getNaming();
                    if (name != null)
                    {
                        LOG.trace(format + ": '" + name.getPostScriptName() + "' / '"
                                + name.getFontFamily() + "' / '" + name.getFontSubFamily() + "'");
                    }
                }
            }
            else
            {
                fontInfoList.add(new FSIgnored(file, FontFormat.TTF, "*skipnoname*"));
                LOG.warn("Missing 'name' entry for PostScript name in font " + file);
            }
        }
        catch (IOException e)
        {
            fontInfoList.add(new FSIgnored(file, FontFormat.TTF, "*skipexception*"));
            LOG.warn("Could not load font file: " + file, e);
        }
        finally
        {
            IOUtils.close(ttf);
        }
    }

    /**
     * Adds a Type 1 font to the file cache. To reduce memory, the parsed font is not cached.
     *
     * @throws IOException
     */
    private void addType1Font(File pfbFile) throws IOException
    {
        try (InputStream input = new FileInputStream(pfbFile))
        {
            Type1Font type1 = Type1Font.createWithPFB(input);
            fontInfoList.add(
                    new FSFontInfo(pfbFile, FontFormat.PFB, type1.getName(), null, -1, -1, 0, 0, -1,
                            null));

            if (LOG.isTraceEnabled())
            {
                LOG.trace("PFB: '" + type1.getName() + "' / '" + type1.getFamilyName() + "' / '"
                        + type1.getWeight() + "'");
            }
        }
    }

    @Override
    public List getFontInfo()
    {
        initialize();
        return fontInfoList;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy