org.sejda.impl.sambox.util.FontUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sejda-sambox Show documentation
Show all versions of sejda-sambox Show documentation
Package containing tasks implemented using sambox.
/*
* Copyright 2015 by Andrea Vacondio ([email protected]).
*
* This file is part of the Sejda source code
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package org.sejda.impl.sambox.util;
import static java.util.Objects.nonNull;
import static java.util.Optional.ofNullable;
import static org.sejda.sambox.util.BidiUtils.visualToLogical;
import static org.sejda.util.RequireUtils.requireNotNullArg;
import java.awt.geom.GeneralPath;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.sejda.fonts.OptionalUnicodeType0Font;
import org.sejda.fonts.UnicodeType0Font;
import org.sejda.impl.sambox.component.TextWithFont;
import org.sejda.model.exception.TaskIOException;
import org.sejda.model.pdf.FontResource;
import org.sejda.model.pdf.StandardType1Font;
import org.sejda.sambox.cos.COSDictionary;
import org.sejda.sambox.pdmodel.PDDocument;
import org.sejda.sambox.pdmodel.common.PDRectangle;
import org.sejda.sambox.pdmodel.font.FontMappers;
import org.sejda.sambox.pdmodel.font.FontMapping;
import org.sejda.sambox.pdmodel.font.PDFont;
import org.sejda.sambox.pdmodel.font.PDFontDescriptor;
import org.sejda.sambox.pdmodel.font.PDSimpleFont;
import org.sejda.sambox.pdmodel.font.PDType0Font;
import org.sejda.sambox.pdmodel.font.PDType1Font;
import org.sejda.sambox.pdmodel.font.PDType3CharProc;
import org.sejda.sambox.pdmodel.font.PDType3Font;
import org.sejda.sambox.pdmodel.font.PDVectorFont;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utility to map from Sejda font definition to PDFBox.
*
* @author Andrea Vacondio
*/
public final class FontUtils {
private static final Logger LOG = LoggerFactory.getLogger(FontUtils.class);
private FontUtils() {
// hide
}
private static final Map STANDARD_TYPE1_FONTS;
static {
Map fontsCache = new EnumMap<>(StandardType1Font.class);
fontsCache.put(StandardType1Font.CURIER, PDType1Font.COURIER);
fontsCache.put(StandardType1Font.CURIER_BOLD, PDType1Font.COURIER_BOLD);
fontsCache.put(StandardType1Font.CURIER_BOLD_OBLIQUE, PDType1Font.COURIER_BOLD_OBLIQUE);
fontsCache.put(StandardType1Font.CURIER_OBLIQUE, PDType1Font.COURIER_OBLIQUE);
fontsCache.put(StandardType1Font.HELVETICA, PDType1Font.HELVETICA);
fontsCache.put(StandardType1Font.HELVETICA_BOLD, PDType1Font.HELVETICA_BOLD);
fontsCache.put(StandardType1Font.HELVETICA_BOLD_OBLIQUE, PDType1Font.HELVETICA_BOLD_OBLIQUE);
fontsCache.put(StandardType1Font.HELVETICA_OBLIQUE, PDType1Font.HELVETICA_OBLIQUE);
fontsCache.put(StandardType1Font.SYMBOL, PDType1Font.SYMBOL);
fontsCache.put(StandardType1Font.ZAPFDINGBATS, PDType1Font.ZAPF_DINGBATS);
fontsCache.put(StandardType1Font.TIMES_BOLD, PDType1Font.TIMES_BOLD);
fontsCache.put(StandardType1Font.TIMES_BOLD_ITALIC, PDType1Font.TIMES_BOLD_ITALIC);
fontsCache.put(StandardType1Font.TIMES_ITALIC, PDType1Font.TIMES_ITALIC);
fontsCache.put(StandardType1Font.TIMES_ROMAN, PDType1Font.TIMES_ROMAN);
STANDARD_TYPE1_FONTS = Collections.unmodifiableMap(fontsCache);
}
public static PDFont HELVETICA = PDType1Font.HELVETICA;
/**
* Mapping between Sejda and PDFBox standard type 1 fonts implementation
*
* @param st1Font
* @return the PDFBox font.
*/
public static PDType1Font getStandardType1Font(StandardType1Font st1Font) {
return STANDARD_TYPE1_FONTS.get(st1Font);
}
/**
* Checks the text can be written with the given font, find a fallback font otherwise
*/
public static PDFont fontOrFallback(String text, PDFont font, PDDocument document) {
if (!canDisplay(text, font)) {
PDFont fallback = findFontFor(document, text);
String fallbackName = fallback == null ? null : fallback.getName();
LOG.debug("Text '{}' cannot be written with font {}, using fallback {}", text, font.getName(), fallbackName);
return fallback;
}
return font;
}
// caches fonts, PER DOCUMENT
// has no auto-magical way to clear the cache when doc processing is done
// if you use this in a long lived process, call the cache clear method to avoid leaking memory
// if we get some issue we could consider something like com.twelvemonkeys.util.WeakWeakMap
private static Map> loadedFontCache = new HashMap<>();
public static void clearLoadedFontCache() {
loadedFontCache.clear();
}
public static void clearLoadedFontCache(PDDocument document) {
loadedFontCache.remove(document);
}
public static PDFont loadFont(PDDocument document, FontResource font) {
if (!loadedFontCache.containsKey(document)) {
loadedFontCache.put(document, new HashMap<>());
}
Map docCache = loadedFontCache.get(document);
if (docCache.containsKey(font.getResource())) {
return docCache.get(font.getResource());
}
InputStream in = font.getFontStream();
try {
PDType0Font loaded = PDType0Font.load(document, in);
LOG.trace("Loaded font {}", loaded.getName());
docCache.put(font.getResource(), loaded);
return loaded;
} catch (IOException e) {
LOG.warn("Failed to load font " + font, e);
return null;
} finally {
IOUtils.closeQuietly(in);
}
}
/**
* @param document
* @param text
* @return a font capable of displaying the given string or null
*/
public static final PDFont findFontFor(PDDocument document, String text) {
try {
// lets make sure the jar is in the classpath
Class.forName("org.sejda.fonts.UnicodeType0Font");
PDFont found = findFontAmong(document, text, UnicodeType0Font.values());
if (nonNull(found)) {
return found;
}
Class.forName("org.sejda.fonts.OptionalUnicodeType0Font");
return findFontAmong(document, text, OptionalUnicodeType0Font.values());
} catch (ClassNotFoundException clf) {
LOG.warn("Fallback fonts not available");
}
return null;
}
private static PDFont findFontAmong(PDDocument document, String text, FontResource... fonts) {
for (FontResource font : fonts) {
PDFont loaded = loadFont(document, font);
if (canDisplay(text, loaded)) {
LOG.debug("Found suitable font {} to display '{}'", loaded, text);
return loaded;
}
}
return null;
}
/**
* Check is given text contains only unicode whitespace characters
*
* @param text
* @return
*/
public static boolean isOnlyWhitespace(String text) {
return text.replaceAll("\\p{Zs}", "").length() == 0;
}
/**
* Removes all unicode whitespace characters from the input string
*
* @param text
* @return
*/
public static String removeWhitespace(String text) {
return text.replaceAll("\\p{Zs}", "").replaceAll("\\r\\n", "").replaceAll("\\n", "");
}
public static boolean canDisplaySpace(PDFont font) {
try {
font.encode(" ");
return true;
} catch (IllegalArgumentException | IOException | UnsupportedOperationException | NullPointerException e) {
// Nope
}
return false;
}
/**
* Returns true if the given font can display the given text. IMPORTANT: Ignores all whitespace in text.
*/
public static boolean canDisplay(String text, PDFont font) {
if (font == null)
return false;
// LOG.debug("Can display '{}' using {}?", text, font);
try {
// remove all whitespace characters and check only if those can be written using the font
byte[] encoded = font.encode(removeWhitespace(text));
if (font instanceof PDVectorFont) {
InputStream in = new ByteArrayInputStream(encoded);
while (in.available() > 0) {
int code = font.readCode(in);
// LOG.debug("Read codePoint {}", code);
PDVectorFont vectorFont = (PDVectorFont) font;
GeneralPath path = vectorFont.getPath(code);
// if(path != null) {
// LOG.debug("GeneralPath is {} for '{}' (code = {}, font = {})", path.getBounds2D(), new String(Character.toChars(code)), code, font.getName());
// }
if (path == null || path.getBounds2D().getWidth() == 0) {
return false;
}
}
}
return true;
} catch (IllegalArgumentException | IOException | UnsupportedOperationException | NullPointerException e) {
// LOG.debug("Cannot display text with font", e);
}
return false;
}
public static double calculateBBoxHeight(String text, PDFont font) {
requireNotNullArg(font, "Font cannot be null");
double maxHeight = 0;
try {
InputStream in = new ByteArrayInputStream(font.encode(text));
while (in.available() > 0) {
int code = font.readCode(in);
if (font instanceof PDType3Font) {
maxHeight = Math.max(maxHeight,
ofNullable(((PDType3Font) font).getCharProc(code)).map(PDType3CharProc::getGlyphBBox)
.map(PDRectangle::toGeneralPath).map(p -> p.getBounds2D().getHeight()).orElse(0d));
} else if (font instanceof PDVectorFont) {
maxHeight = Math.max(maxHeight, ofNullable(((PDVectorFont) font).getPath(code))
.map(p -> p.getBounds2D().getHeight()).orElse(0d));
} else if (font instanceof PDSimpleFont) {
PDSimpleFont simpleFont = (PDSimpleFont) font;
String name = ofNullable(simpleFont.getEncoding()).map(e -> e.getName(code)).orElse(null);
if (nonNull(name)) {
maxHeight = Math.max(maxHeight, simpleFont.getPath(name).getBounds2D().getHeight());
}
}
}
} catch (IOException e) {
LOG.warn("An error occured while calculating the highest glyph bbox", e);
}
return maxHeight;
}
public static boolean isBold(PDFont font) {
String lowercasedName = font.getName().toLowerCase();
return lowercasedName.contains("bold");
}
public static boolean isItalic(PDFont font) {
String lowercasedName = font.getName().toLowerCase();
return lowercasedName.contains("italic") || lowercasedName.contains("oblique");
}
/**
* Helper for subset fonts. Determines if a font is subset, computes original font name. Provides methods for loading the original full font from the system, if available, or
* loading a fallback font.
*/
public static class FontSubsetting {
public final String fontName;
public final boolean isSubset;
public final PDFont subsetFont;
public FontSubsetting(PDFont subsetFont) {
this.subsetFont = subsetFont;
// is it a subset font? ABCDEF+Verdana
String fontName = StringUtils.trimToEmpty(subsetFont.getName());
String[] fontNameFragments = fontName.split("\\+");
if (fontNameFragments.length == 2 && fontNameFragments[0].length() == 6) {
this.isSubset = true;
this.fontName = fontNameFragments[1];
} else {
this.isSubset = false;
this.fontName = null;
}
}
public PDFont loadOriginalOrSimilar(PDDocument document) {
PDFont original = loadOriginal(document);
if (original == null) {
return loadSimilar(document);
}
return original;
}
/**
* Tries to load the original full font from the system
*/
public PDFont loadOriginal(PDDocument document) {
String lookupName = fontName.replace("-", " ");
LOG.debug("Searching the system for a font matching name '{}'", lookupName);
FontMapping fontMapping = FontMappers.instance().getTrueTypeFont(lookupName, null);
if (fontMapping != null && fontMapping.getFont() != null && !fontMapping.isFallback()) {
TrueTypeFont mappedFont = fontMapping.getFont();
try {
LOG.debug("Original font available on the system: {}", fontName);
return PDType0Font.load(document, mappedFont.getOriginalData());
} catch (IOException ioe) {
LOG.warn("Failed to load font from system", ioe);
try {
mappedFont.close();
} catch (IOException e) {
LOG.warn("Failed closing font", e);
}
}
}
return null;
}
/**
* Tries to load a similar full font from the system
*/
public PDFont loadSimilar(PDDocument document) {
String lookupName = fontName.replace("-", " ");
// Eg: Arial-BoldMT
PDFontDescriptor descriptor = new PDFontDescriptor(new COSDictionary());
descriptor.setFontName(fontName.split("-")[0]);
descriptor.setForceBold(FontUtils.isBold(subsetFont));
descriptor.setItalic(FontUtils.isItalic(subsetFont));
LOG.debug(
"Searching the system for a font matching name '{}' and description [name:{}, bold:{}, italic:{}]",
lookupName, descriptor.getFontName(), descriptor.isForceBold(), descriptor.isItalic());
FontMapping fontMapping = FontMappers.instance().getTrueTypeFont(lookupName, descriptor);
if (fontMapping != null && fontMapping.getFont() != null) {
TrueTypeFont mappedFont = fontMapping.getFont();
try {
if (fontMapping.isFallback()) {
LOG.debug("Fallback font available on the system: {} (for {})", mappedFont.getName(), fontName);
} else {
LOG.debug("Original font available on the system: {}", fontName);
}
return PDType0Font.load(document, mappedFont.getOriginalData());
} catch (IOException ioe) {
LOG.warn("Failed to load font from system", ioe);
try {
mappedFont.close();
} catch (Exception e) {
LOG.warn("Failed closing font", e);
}
}
}
return null;
}
}
/**
* Wraps the given text on multiple lines, if it does not fit within the given maxWidth
* It will try to determine if all text can be written with given font and find a fallback for parts that are not supported.
*/
public static List wrapLines(String rawLabel, PDFont font, float fontSize, double maxWidth, PDDocument document) throws TaskIOException {
List lines = new ArrayList<>();
String label = org.sejda.core.support.util.StringUtils.normalizeWhitespace(rawLabel);
StringBuilder currentString = new StringBuilder();
double currentWidth = 0;
List resolvedStringsToFonts = FontUtils.resolveFonts(label, font, document);
for (TextWithFont stringAndFont : resolvedStringsToFonts) {
try {
PDFont resolvedFont = stringAndFont.getFont();
String resolvedLabel = stringAndFont.getText();
String[] words = visualToLogical(resolvedLabel).split("(?<=\\b)");
for(String word: words) {
double textWidth = getSimpleStringWidth(word, resolvedFont, fontSize);
if (textWidth > maxWidth || word.length() > 10) {
// this is a giant word that has no breaks and exceeds max width
// check for each char if it can be added to current line, wrap on new line if not
Iterator codePointIterator = word.codePoints().iterator();
while (codePointIterator.hasNext()) {
int codePoint = codePointIterator.next();
String ch = new String(Character.toChars(codePoint));
double chWidth = getSimpleStringWidth(ch, resolvedFont, fontSize);
if (currentWidth + chWidth > maxWidth) {
currentString.append("-");
lines.add(currentString.toString().trim());
currentString = new StringBuilder();
currentWidth = 0;
}
currentWidth += chWidth;
currentString.append(ch);
}
} else {
// regular scenario: check if word can be added to current line, wrap on new line if not
if (currentWidth + textWidth > maxWidth) {
lines.add(currentString.toString().trim());
currentString = new StringBuilder();
currentWidth = 0;
}
currentWidth += textWidth;
currentString.append(word);
}
}
} catch (IOException e) {
throw new TaskIOException(e);
}
}
if(!currentString.toString().isEmpty()) {
lines.add(currentString.toString().trim());
}
return lines;
}
/**
* Calculates the width of the string using the given font.
* Does not try to find out if the text can actually be written with the given font and find fallback
*/
public static double getSimpleStringWidth(String text, PDFont font, double fontSize) throws IOException {
double textWidth = font.getStringWidth(text) / 1000 * fontSize;
// sometimes the string width is reported incorrectly, too small. when writing ' ' (space) it leads to missing spaces.
// use the largest value between font average width and text string width
// TODO: replace zero with heuristic based "small value"
if (textWidth == 0) {
textWidth = font.getAverageFontWidth() / 1000 * fontSize;
}
return textWidth;
}
/**
* Supports writing labels which require multiple fonts (eg: mixing thai and english words) Returns a list of text with associated font.
*/
public static List resolveFonts(String label, PDFont font, PDDocument document)
throws TaskIOException {
PDFont currentFont = font;
StringBuilder currentString = new StringBuilder();
// we want to keep the insertion order
List result = new ArrayList<>();
Iterator codePointIterator = visualToLogical(label).codePoints().iterator();
while (codePointIterator.hasNext()) {
int codePoint = codePointIterator.next();
String s = new String(Character.toChars(codePoint));
PDFont f = fontOrFallback(s, font, document);
if (s.equals(" ")) {
// we want space to be a separate text item
// because some fonts are missing the space glyph
// so we'll handle it separate from the other chars
// some fonts don't have glyphs for space.
// figure out if that's the case and switch to a standard font as fallback
if (!FontUtils.canDisplaySpace(f)) {
f = FontUtils.getStandardType1Font(StandardType1Font.HELVETICA);
}
if(f != currentFont) {
// end current string, before space
if (currentString.length() > 0) {
result.add(new TextWithFont(currentString.toString(), currentFont));
}
// add space
result.add(new TextWithFont(" ", f));
currentString = new StringBuilder();
currentFont = f;
} else {
currentString.append(s);
}
} else if (currentFont == f) {
currentString.append(s);
} else {
if (currentString.length() > 0) {
result.add(new TextWithFont(currentString.toString(), currentFont));
}
currentString = new StringBuilder(s);
currentFont = f;
}
}
for (TextWithFont each : result) {
LOG.trace("Will write '{}' with {}", each.getText(), each.getFont());
}
result.add(new TextWithFont(currentString.toString(), currentFont));
return result;
}
public static String removeUnsupportedCharacters(String text, PDDocument doc) throws TaskIOException {
List resolved = resolveFonts(text, HELVETICA, doc);
Set unsupported = new HashSet<>();
resolved.forEach(tf -> {
if (tf.getFont() == null) {
unsupported.add(tf.getText());
}
});
String result = text;
for (String s : unsupported) {
result = result.replaceAll(Pattern.quote(s), "");
}
return result;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy