All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sejda.impl.sambox.util.FontUtils Maven / Gradle / Ivy

There is a newer version: 5.1.7
Show newest version
/*
 * Copyright 2015 by Andrea Vacondio ([email protected]).
 * 
 * This file is part of the Sejda source code
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package org.sejda.impl.sambox.util;

import static java.util.Objects.nonNull;
import static java.util.Optional.ofNullable;
import static org.sejda.sambox.util.BidiUtils.visualToLogical;
import static org.sejda.util.RequireUtils.requireNotNullArg;

import java.awt.geom.GeneralPath;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Pattern;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.sejda.fonts.OptionalUnicodeType0Font;
import org.sejda.fonts.UnicodeType0Font;
import org.sejda.impl.sambox.component.TextWithFont;
import org.sejda.model.exception.TaskIOException;
import org.sejda.model.pdf.FontResource;
import org.sejda.model.pdf.StandardType1Font;
import org.sejda.sambox.cos.COSDictionary;
import org.sejda.sambox.pdmodel.PDDocument;
import org.sejda.sambox.pdmodel.common.PDRectangle;
import org.sejda.sambox.pdmodel.font.FontMappers;
import org.sejda.sambox.pdmodel.font.FontMapping;
import org.sejda.sambox.pdmodel.font.PDFont;
import org.sejda.sambox.pdmodel.font.PDFontDescriptor;
import org.sejda.sambox.pdmodel.font.PDSimpleFont;
import org.sejda.sambox.pdmodel.font.PDType0Font;
import org.sejda.sambox.pdmodel.font.PDType1Font;
import org.sejda.sambox.pdmodel.font.PDType3CharProc;
import org.sejda.sambox.pdmodel.font.PDType3Font;
import org.sejda.sambox.pdmodel.font.PDVectorFont;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Utility to map from Sejda font definition to PDFBox.
 *
 * @author Andrea Vacondio
 */
public final class FontUtils {

    private static final Logger LOG = LoggerFactory.getLogger(FontUtils.class);

    private FontUtils() {
        // hide
    }

    private static final Map STANDARD_TYPE1_FONTS;

    static {
        Map fontsCache = new EnumMap<>(StandardType1Font.class);
        fontsCache.put(StandardType1Font.CURIER, PDType1Font.COURIER);
        fontsCache.put(StandardType1Font.CURIER_BOLD, PDType1Font.COURIER_BOLD);
        fontsCache.put(StandardType1Font.CURIER_BOLD_OBLIQUE, PDType1Font.COURIER_BOLD_OBLIQUE);
        fontsCache.put(StandardType1Font.CURIER_OBLIQUE, PDType1Font.COURIER_OBLIQUE);
        fontsCache.put(StandardType1Font.HELVETICA, PDType1Font.HELVETICA);
        fontsCache.put(StandardType1Font.HELVETICA_BOLD, PDType1Font.HELVETICA_BOLD);
        fontsCache.put(StandardType1Font.HELVETICA_BOLD_OBLIQUE, PDType1Font.HELVETICA_BOLD_OBLIQUE);
        fontsCache.put(StandardType1Font.HELVETICA_OBLIQUE, PDType1Font.HELVETICA_OBLIQUE);
        fontsCache.put(StandardType1Font.SYMBOL, PDType1Font.SYMBOL);
        fontsCache.put(StandardType1Font.ZAPFDINGBATS, PDType1Font.ZAPF_DINGBATS);
        fontsCache.put(StandardType1Font.TIMES_BOLD, PDType1Font.TIMES_BOLD);
        fontsCache.put(StandardType1Font.TIMES_BOLD_ITALIC, PDType1Font.TIMES_BOLD_ITALIC);
        fontsCache.put(StandardType1Font.TIMES_ITALIC, PDType1Font.TIMES_ITALIC);
        fontsCache.put(StandardType1Font.TIMES_ROMAN, PDType1Font.TIMES_ROMAN);
        STANDARD_TYPE1_FONTS = Collections.unmodifiableMap(fontsCache);
    }

    public static PDFont HELVETICA = PDType1Font.HELVETICA;

    /**
     * Mapping between Sejda and PDFBox standard type 1 fonts implementation
     *
     * @param st1Font
     * @return the PDFBox font.
     */
    public static PDType1Font getStandardType1Font(StandardType1Font st1Font) {
        return STANDARD_TYPE1_FONTS.get(st1Font);
    }

    /**
     * Checks the text can be written with the given font, find a fallback font otherwise
     */
    public static PDFont fontOrFallback(String text, PDFont font, PDDocument document) {
        if (!canDisplay(text, font)) {
            PDFont fallback = findFontFor(document, text);
            String fallbackName = fallback == null ? null : fallback.getName();
            LOG.debug("Text '{}' cannot be written with font {}, using fallback {}", text, font.getName(), fallbackName);
            return fallback;
        }
        return font;
    }

    // caches fonts, PER DOCUMENT
    // has no auto-magical way to clear the cache when doc processing is done
    // if you use this in a long lived process, call the cache clear method to avoid leaking memory
    // if we get some issue we could consider something like com.twelvemonkeys.util.WeakWeakMap
    private static Map> loadedFontCache = new HashMap<>();

    public static void clearLoadedFontCache() {
        loadedFontCache.clear();
    }

    public static void clearLoadedFontCache(PDDocument document) {
        loadedFontCache.remove(document);
    }

    public static PDFont loadFont(PDDocument document, FontResource font) {
        if (!loadedFontCache.containsKey(document)) {
            loadedFontCache.put(document, new HashMap<>());
        }

        Map docCache = loadedFontCache.get(document);
        if (docCache.containsKey(font.getResource())) {
            return docCache.get(font.getResource());
        }

        InputStream in = font.getFontStream();
        try {
            PDType0Font loaded = PDType0Font.load(document, in);
            LOG.trace("Loaded font {}", loaded.getName());
            docCache.put(font.getResource(), loaded);
            return loaded;
        } catch (IOException e) {
            LOG.warn("Failed to load font " + font, e);
            return null;
        } finally {
            IOUtils.closeQuietly(in);
        }
    }

    /**
     * @param document
     * @param text
     * @return a font capable of displaying the given string or null
     */
    public static final PDFont findFontFor(PDDocument document, String text) {
        try {
            // lets make sure the jar is in the classpath
            Class.forName("org.sejda.fonts.UnicodeType0Font");
            PDFont found = findFontAmong(document, text, UnicodeType0Font.values());
            if (nonNull(found)) {
                return found;
            }
            Class.forName("org.sejda.fonts.OptionalUnicodeType0Font");
            return findFontAmong(document, text, OptionalUnicodeType0Font.values());

        } catch (ClassNotFoundException clf) {
            LOG.warn("Fallback fonts not available");
        }
        return null;
    }

    private static PDFont findFontAmong(PDDocument document, String text, FontResource... fonts) {
        for (FontResource font : fonts) {
            PDFont loaded = loadFont(document, font);
            if (canDisplay(text, loaded)) {
                LOG.debug("Found suitable font {} to display '{}'", loaded, text);
                return loaded;
            }
        }
        return null;
    }

    /**
     * Check is given text contains only unicode whitespace characters
     *
     * @param text
     * @return
     */
    public static boolean isOnlyWhitespace(String text) {
        return text.replaceAll("\\p{Zs}", "").length() == 0;
    }

    /**
     * Removes all unicode whitespace characters from the input string
     *
     * @param text
     * @return
     */
    public static String removeWhitespace(String text) {
        return text.replaceAll("\\p{Zs}", "").replaceAll("\\r\\n", "").replaceAll("\\n", "");
    }

    public static boolean canDisplaySpace(PDFont font) {
        try {
            font.encode(" ");
            return true;
        } catch (IllegalArgumentException | IOException | UnsupportedOperationException | NullPointerException e) {
            // Nope
        }
        return false;
    }

    /**
     * Returns true if the given font can display the given text. IMPORTANT: Ignores all whitespace in text.
     */
    public static boolean canDisplay(String text, PDFont font) {
        if (font == null)
            return false;

        // LOG.debug("Can display '{}' using {}?", text, font);

        try {
            // remove all whitespace characters and check only if those can be written using the font
            byte[] encoded = font.encode(removeWhitespace(text));

            if (font instanceof PDVectorFont) {
                InputStream in = new ByteArrayInputStream(encoded);
                while (in.available() > 0) {
                    int code = font.readCode(in);

                    // LOG.debug("Read codePoint {}", code);

                    PDVectorFont vectorFont = (PDVectorFont) font;
                    GeneralPath path = vectorFont.getPath(code);
                    // if(path != null) {
                    // LOG.debug("GeneralPath is {} for '{}' (code = {}, font = {})", path.getBounds2D(), new String(Character.toChars(code)), code, font.getName());
                    // }

                    if (path == null || path.getBounds2D().getWidth() == 0) {
                        return false;
                    }
                }
            }

            return true;
        } catch (IllegalArgumentException | IOException | UnsupportedOperationException | NullPointerException e) {
            // LOG.debug("Cannot display text with font", e);
        }
        return false;
    }

    public static double calculateBBoxHeight(String text, PDFont font) {
        requireNotNullArg(font, "Font cannot be null");
        double maxHeight = 0;
        try {
            InputStream in = new ByteArrayInputStream(font.encode(text));
            while (in.available() > 0) {
                int code = font.readCode(in);
                if (font instanceof PDType3Font) {
                    maxHeight = Math.max(maxHeight,
                            ofNullable(((PDType3Font) font).getCharProc(code)).map(PDType3CharProc::getGlyphBBox)
                                    .map(PDRectangle::toGeneralPath).map(p -> p.getBounds2D().getHeight()).orElse(0d));
                } else if (font instanceof PDVectorFont) {
                    maxHeight = Math.max(maxHeight, ofNullable(((PDVectorFont) font).getPath(code))
                            .map(p -> p.getBounds2D().getHeight()).orElse(0d));
                } else if (font instanceof PDSimpleFont) {
                    PDSimpleFont simpleFont = (PDSimpleFont) font;
                    String name = ofNullable(simpleFont.getEncoding()).map(e -> e.getName(code)).orElse(null);
                    if (nonNull(name)) {
                        maxHeight = Math.max(maxHeight, simpleFont.getPath(name).getBounds2D().getHeight());
                    }
                }
            }
        } catch (IOException e) {
            LOG.warn("An error occured while calculating the highest glyph bbox", e);
        }
        return maxHeight;
    }

    public static boolean isBold(PDFont font) {
        String lowercasedName = font.getName().toLowerCase();
        return lowercasedName.contains("bold");
    }

    public static boolean isItalic(PDFont font) {
        String lowercasedName = font.getName().toLowerCase();
        return lowercasedName.contains("italic") || lowercasedName.contains("oblique");
    }

    /**
     * Helper for subset fonts. Determines if a font is subset, computes original font name. Provides methods for loading the original full font from the system, if available, or
     * loading a fallback font.
     */
    public static class FontSubsetting {
        public final String fontName;
        public final boolean isSubset;
        public final PDFont subsetFont;

        public FontSubsetting(PDFont subsetFont) {
            this.subsetFont = subsetFont;

            // is it a subset font? ABCDEF+Verdana
            String fontName = StringUtils.trimToEmpty(subsetFont.getName());
            String[] fontNameFragments = fontName.split("\\+");

            if (fontNameFragments.length == 2 && fontNameFragments[0].length() == 6) {
                this.isSubset = true;
                this.fontName = fontNameFragments[1];
            } else {
                this.isSubset = false;
                this.fontName = null;
            }
        }

        public PDFont loadOriginalOrSimilar(PDDocument document) {
            PDFont original = loadOriginal(document);
            if (original == null) {
                return loadSimilar(document);
            }
            return original;
        }

        /**
         * Tries to load the original full font from the system
         */
        public PDFont loadOriginal(PDDocument document) {
            String lookupName = fontName.replace("-", " ");

            LOG.debug("Searching the system for a font matching name '{}'", lookupName);

            FontMapping fontMapping = FontMappers.instance().getTrueTypeFont(lookupName, null);
            if (fontMapping != null && fontMapping.getFont() != null && !fontMapping.isFallback()) {
                TrueTypeFont mappedFont = fontMapping.getFont();

                try {
                    LOG.debug("Original font available on the system: {}", fontName);
                    return PDType0Font.load(document, mappedFont.getOriginalData());
                } catch (IOException ioe) {
                    LOG.warn("Failed to load font from system", ioe);
                    try {
                        mappedFont.close();
                    } catch (IOException e) {
                        LOG.warn("Failed closing font", e);
                    }
                }
            }

            return null;
        }

        /**
         * Tries to load a similar full font from the system
         */
        public PDFont loadSimilar(PDDocument document) {
            String lookupName = fontName.replace("-", " ");

            // Eg: Arial-BoldMT
            PDFontDescriptor descriptor = new PDFontDescriptor(new COSDictionary());
            descriptor.setFontName(fontName.split("-")[0]);
            descriptor.setForceBold(FontUtils.isBold(subsetFont));
            descriptor.setItalic(FontUtils.isItalic(subsetFont));

            LOG.debug(
                    "Searching the system for a font matching name '{}' and description [name:{}, bold:{}, italic:{}]",
                    lookupName, descriptor.getFontName(), descriptor.isForceBold(), descriptor.isItalic());

            FontMapping fontMapping = FontMappers.instance().getTrueTypeFont(lookupName, descriptor);
            if (fontMapping != null && fontMapping.getFont() != null) {
                TrueTypeFont mappedFont = fontMapping.getFont();

                try {
                    if (fontMapping.isFallback()) {
                        LOG.debug("Fallback font available on the system: {} (for {})", mappedFont.getName(), fontName);
                    } else {
                        LOG.debug("Original font available on the system: {}", fontName);
                    }

                    return PDType0Font.load(document, mappedFont.getOriginalData());
                } catch (IOException ioe) {
                    LOG.warn("Failed to load font from system", ioe);
                    try {
                        mappedFont.close();
                    } catch (Exception e) {
                        LOG.warn("Failed closing font", e);
                    }
                }
            }

            return null;
        }
    }

    /**
     * Wraps the given text on multiple lines, if it does not fit within the given maxWidth
     * It will try to determine if all text can be written with given font and find a fallback for parts that are not supported.
     */
    public static List wrapLines(String rawLabel, PDFont font, float fontSize, double maxWidth, PDDocument document) throws TaskIOException {
        List lines = new ArrayList<>();

        String label = org.sejda.core.support.util.StringUtils.normalizeWhitespace(rawLabel);

        StringBuilder currentString = new StringBuilder();
        double currentWidth = 0;

        List resolvedStringsToFonts = FontUtils.resolveFonts(label, font, document);

        for (TextWithFont stringAndFont : resolvedStringsToFonts) {
            try {
                PDFont resolvedFont = stringAndFont.getFont();
                String resolvedLabel = stringAndFont.getText();

                String[] words = visualToLogical(resolvedLabel).split("(?<=\\b)");
                for(String word: words) {
                    double textWidth = getSimpleStringWidth(word, resolvedFont, fontSize);

                    if (textWidth > maxWidth || word.length() > 10) {
                        // this is a giant word that has no breaks and exceeds max width

                        // check for each char if it can be added to current line, wrap on new line if not
                        Iterator codePointIterator = word.codePoints().iterator();
                        while (codePointIterator.hasNext()) {
                            int codePoint = codePointIterator.next();

                            String ch = new String(Character.toChars(codePoint));
                            double chWidth = getSimpleStringWidth(ch, resolvedFont, fontSize);
                            if (currentWidth + chWidth > maxWidth) {
                                currentString.append("-");
                                lines.add(currentString.toString().trim());
                                currentString = new StringBuilder();
                                currentWidth = 0;
                            }

                            currentWidth += chWidth;
                            currentString.append(ch);
                        }
                    } else {
                        // regular scenario: check if word can be added to current line, wrap on new line if not
                        if (currentWidth + textWidth > maxWidth) {
                            lines.add(currentString.toString().trim());
                            currentString = new StringBuilder();
                            currentWidth = 0;
                        }

                        currentWidth += textWidth;
                        currentString.append(word);
                    }
                }

            } catch (IOException e) {
                throw new TaskIOException(e);
            }
        }

        if(!currentString.toString().isEmpty()) {
            lines.add(currentString.toString().trim());
        }

        return lines;
    }

    /**
     * Calculates the width of the string using the given font.
     * Does not try to find out if the text can actually be written with the given font and find fallback
     */
    public static double getSimpleStringWidth(String text, PDFont font, double fontSize) throws IOException {
        double textWidth = font.getStringWidth(text) / 1000 * fontSize;

        // sometimes the string width is reported incorrectly, too small. when writing ' ' (space) it leads to missing spaces.
        // use the largest value between font average width and text string width
        // TODO: replace zero with heuristic based "small value"
        if (textWidth == 0) {
            textWidth = font.getAverageFontWidth() / 1000 * fontSize;
        }

        return textWidth;
    }

    /**
     * Supports writing labels which require multiple fonts (eg: mixing thai and english words) Returns a list of text with associated font.
     */
    public static List resolveFonts(String label, PDFont font, PDDocument document)
            throws TaskIOException {
        PDFont currentFont = font;
        StringBuilder currentString = new StringBuilder();

        // we want to keep the insertion order
        List result = new ArrayList<>();
        Iterator codePointIterator = visualToLogical(label).codePoints().iterator();
        while (codePointIterator.hasNext()) {
            int codePoint = codePointIterator.next();

            String s = new String(Character.toChars(codePoint));

            PDFont f = fontOrFallback(s, font, document);
            if (s.equals(" ")) {
                // we want space to be a separate text item
                // because some fonts are missing the space glyph
                // so we'll handle it separate from the other chars

                // some fonts don't have glyphs for space.
                // figure out if that's the case and switch to a standard font as fallback
                if (!FontUtils.canDisplaySpace(f)) {
                    f = FontUtils.getStandardType1Font(StandardType1Font.HELVETICA);
                }

                if(f != currentFont) {
                    // end current string, before space
                    if (currentString.length() > 0) {
                        result.add(new TextWithFont(currentString.toString(), currentFont));
                    }

                    // add space
                    result.add(new TextWithFont(" ", f));
                    currentString = new StringBuilder();
                    currentFont = f;
                } else {
                    currentString.append(s);
                }
            } else if (currentFont == f) {
                currentString.append(s);
            } else {
                if (currentString.length() > 0) {
                    result.add(new TextWithFont(currentString.toString(), currentFont));
                }

                currentString = new StringBuilder(s);
                currentFont = f;
            }
        }

        for (TextWithFont each : result) {
            LOG.trace("Will write '{}' with {}", each.getText(), each.getFont());
        }

        result.add(new TextWithFont(currentString.toString(), currentFont));

        return result;
    }

    public static String removeUnsupportedCharacters(String text, PDDocument doc) throws TaskIOException {
        List resolved = resolveFonts(text, HELVETICA, doc);
        Set unsupported = new HashSet<>();
        resolved.forEach(tf -> {
            if (tf.getFont() == null) {
                unsupported.add(tf.getText());
            }
        });

        String result = text;
        for (String s : unsupported) {
            result = result.replaceAll(Pattern.quote(s), "");
        }

        return result;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy