
org.docx4j.fonts.PhysicalFonts Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of docx4j Show documentation
Show all versions of docx4j Show documentation
docx4j is a library which helps you to work with the Office Open
XML file format as used in docx
documents, pptx presentations, and xlsx spreadsheets.
package org.docx4j.fonts;
import java.io.File;
import java.net.URL;
import java.util.*;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.docx4j.fonts.fop.fonts.EmbedFontInfo;
import org.docx4j.fonts.fop.fonts.FontCache;
import org.docx4j.fonts.fop.fonts.FontResolver;
import org.docx4j.fonts.fop.fonts.FontSetup;
import org.docx4j.fonts.fop.fonts.FontTriplet;
import org.docx4j.fonts.fop.fonts.autodetect.FontFileFinder;
import org.docx4j.fonts.fop.fonts.autodetect.FontInfoFinder;
import org.docx4j.fonts.microsoft.MicrosoftFonts;
import org.docx4j.fonts.microsoft.MicrosoftFontsRegistry;
import org.docx4j.openpackaging.packages.OpcPackage;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.ObfuscatedFontPart;
//import com.lowagie.text.pdf.BaseFont;
/**
* The fonts which are physically installed on the system.
*
* They can be discovered automatically, or you can
* just add specific fonts.
*
* Do NOT add fonts embedded in a docx to physicalFontMap!
*
* @author dev
*
*/
public class PhysicalFonts {
protected static Logger log = LoggerFactory.getLogger(PhysicalFonts.class);
protected static FontCache fontCache;
/** These are the physical fonts on the system which we have discovered.
* Do NOT add fonts embedded in a docx to physicalFontMap! */
private final static Map physicalFontMap;
@Deprecated // want to enforce case insensitive
public static Map getPhysicalFonts() {
return physicalFontMap;
}
/**
* Get a PhysicalFont
* by case-insensitive name. (Although Word always
* uses Title Case for font names, it is actually
* case insensitive; the spec is silent on this.)
*
* @param key
* @return
*/
public static PhysicalFont get(String key) {
return physicalFontMap.get(key.toLowerCase());
}
/**
* Put a PhysicalFont
* by case-insensitive name. (Although Word always
* uses Title Case for font names, it is actually
* case insensitive; the spec is silent on this.)
*
* @param key
* @param pf
*/
public static void put(String key, PhysicalFont pf) {
if (physicalFontMap.get(key.toLowerCase())!=null) {
log.warn("Overwriting existing physicalFontMap entry: " + key.toLowerCase());
}
physicalFontMap.put(key.toLowerCase(), pf);
}
private final static Map physicalFontMapByFilenameLowercase;
// private final static Map physicalFontFamiliesMap;
// int lastSeenNumberOfPhysicalFonts = 0;
//
//
// /** Max difference for it to be considered an acceptable match.
// * Note that this value will depend on the weights in the
// * difference function.
// */
// public static final int MATCH_THRESHOLD = 30;
private static FontResolver fontResolver;
// parse font to ascertain font info
private static FontInfoFinder fontInfoFinder;
private static String osName;
static {
try {
osName = System.getProperty("os.name");
fontCache = FontCache.load();
if (fontCache == null) {
fontCache = new FontCache();
}
physicalFontMap = new HashMap();
physicalFontMapByFilenameLowercase
= new HashMap();
// physicalFontFamiliesMap = new HashMap();
fontResolver = FontSetup.createMinimalFontResolver();
// parse font to ascertain font info
fontInfoFinder = new FontInfoFinder();
// setupPhysicalFonts();
} catch (Exception exc) {
throw new RuntimeException(exc);
}
}
private static String regex;
public static String getRegex() {
return regex;
}
/**
* Set a regex to limit to the common fonts in order to lower memory use.
* eg on Mac regex=".*(Courier New|Arial|Times New Roman|Comic Sans|Georgia|Impact|Lucida Console|Lucida Sans Unicode|Palatino Linotype|Tahoma|Trebuchet|Verdana|Symbol|Webdings|Wingdings|MS Sans Serif|MS Serif).*";
* on Windows: regex=".*(calibri|cour|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingding).*";
*
* If you want to use this, set it before instantiating a Mapper.
*
* @since 2.8.1
*/
public static void setRegex(String regex) {
PhysicalFonts.regex = regex;
}
/**
* Autodetect fonts available on the system.
*
*/
public final static void discoverPhysicalFonts() throws Exception {
// Currently we use FOP - inspired by org.apache.fop.render.PrintRendererConfigurator
// iText also has a font discoverer (which we could use
// instead, but don't, since in docx4j we're settled on
// PDF output via XSL FO)
FontFileFinder fontFileFinder = new FontFileFinder();
// Automagically finds a list of font files on local system
// based on os.name
List fontFileList = fontFileFinder.find();
if (regex==null) {
for (Iterator iter = fontFileList.iterator(); iter.hasNext();) {
URL fontUrl = getURL(iter.next());
// parse font to ascertain font info
addPhysicalFont( fontUrl);
}
} else {
Pattern pattern = Pattern.compile(regex);
for (Iterator iter = fontFileList.iterator(); iter.hasNext();) {
URL fontUrl = getURL(iter.next());
// parse font to ascertain font info
if (pattern.matcher(fontUrl.toString()).matches()){
addPhysicalFont( fontUrl);
} else {
// log.debug("Ignoring " + fontUrl.toString() );
}
}
}
// docx4j 3.2.2: no, these are document specific, so don't belong in PhysicalFonts
// // Add fonts from our Temporary Embedded Fonts dir
// fontFileList = fontFileFinder.find( ObfuscatedFontPart.getTemporaryEmbeddedFontsDir() );
// for (Iterator iter = fontFileList.iterator(); iter.hasNext();) {
// URL fontUrl = getURL(iter.next());
// addPhysicalFont( fontUrl);
// }
fontCache.save();
}
private static URL getURL(Object o) throws Exception {
if (o instanceof java.io.File) {
// Running in Tomcat
java.io.File f = (java.io.File)o;
return f.toURL();
} else if (o instanceof java.net.URL) {
return (URL)o;
} else {
throw new Exception("Unexpected object:" + o.getClass().getName() );
}
}
private static boolean loggedWarningAlready = false;
/**
* Add a physical font's EmbedFontInfo object. Not to be used for embedded fonts.
*
* @param fontUrl eg new java.net.URL("file:" + path)
*/
public static void addPhysicalFont(URL fontUrl) {
addPhysicalFonts(null, fontUrl);
}
/**
* Add a physical font's EmbedFontInfo object. Not to be used for embedded fonts.
*
* @param fontUrl eg new java.net.URL("file:" + path)
*/
public static void addPhysicalFonts(String nameAsInFontTablePart, URL fontUrl) {
List physicalFonts = getPhysicalFont( nameAsInFontTablePart, fontUrl);
if (physicalFonts==null) return;
for (PhysicalFont pf : physicalFonts) {
if (pf!=null) {
// Add it to the map
put(pf.getName(), pf);
log.debug("Added '" + pf.getName() + "' -> " + pf.getEmbeddedFile());
if (nameAsInFontTablePart != null
&& get(nameAsInFontTablePart)==null) {
put(nameAsInFontTablePart, pf);
log.debug("Added '" + nameAsInFontTablePart + "' -> " + pf.getEmbeddedFile());
}
// We also need to add it to map by filename
String filename = pf.getEmbeddedFile();
// eg on Windows: file:/C:/Windows/FONTS/cour.ttf
filename = filename.substring( filename.lastIndexOf("/")+1).toLowerCase();
if (osName.startsWith("Mac")) {
filename = filename.replace("%20", " ");
/* there are a few like this on Windows as well, but they're exotic,
* eg biondi%20light.ttf catriel ligurino *Tiger* tandelle
*/
}
physicalFontMapByFilenameLowercase.put(filename, pf);
log.debug("added to filename map: " + filename);
// String familyName = triplet.getName();
// pf.setFamilyName(familyName);
//
// PhysicalFontFamily pff;
// if (physicalFontFamiliesMap.get(familyName)==null) {
// pff = new PhysicalFontFamily(familyName);
// physicalFontFamiliesMap.put(familyName, pff);
// } else {
// pff = physicalFontFamiliesMap.get(familyName);
// }
// pff.addFont(pf);
}
}
}
/**
* Get a physical font's EmbedFontInfo object.
*
* @param fontUrl eg new java.net.URL("file:" + path)
*/
public static List getPhysicalFont(String nameAsInFontTablePart, URL fontUrl) {
List pfList = new ArrayList();
log.debug(nameAsInFontTablePart);
//List embedFontInfoList = fontInfoFinder.find(fontUrl, fontResolver, fontCache);
EmbedFontInfo[] embedFontInfoList = fontInfoFinder.find(fontUrl, fontResolver, fontCache);
/* FOP r644208 (Bugzilla #44737) 3/04/08 made this an array,
// so if you are using non-patched FOP, it needs to be at least this revision
// (but doesn't seem to be in FOP 0.95 binary?!) */
if (embedFontInfoList==null) {
// Quite a few fonts exist that we can't seem to get
// EmbedFontInfo for. To be investigated.
log.warn("Aborting: " + fontUrl.toString() + " (can't get EmbedFontInfo[] .. try deleting fop-fonts.cache?)");
return null;
}
StringBuffer debug = new StringBuffer();
for ( EmbedFontInfo fontInfo : embedFontInfoList ) {
/* EmbedFontInfo has:
* - subFontName (if the underlying CustomFont is a TTC)
* - PostScriptName = CustomFont.getFontName()
* - FontTriplets named:
* - CustomFont.getFullName() with quotes stripped
* - CustomFont.getFontName() with whitespace stripped
* - each family name (with quotes stripped)
*
* By creating one PhysicalFont object
* per triplet, each referring to the same
* EmbedFontInfo, we increase the chances
* of a match
*
ComicSansMS
.. triplet Comic Sans MS (priority + 0
.. triplet ComicSansMS (priority + 0
ComicSansMS-Bold
.. triplet Comic Sans MS Bold (priority + 0
.. triplet ComicSansMS-Bold (priority + 0
.. triplet Comic Sans MS (priority + 5
*
* but the second triplet is what FOP creates where its
* getPostScriptName()
* does FontUtil.stripWhiteSpace(getFullName());.
*
* and the third is just the family name.
*
* So we only get the first.
*
*/
if (fontInfo == null) {
// return;
continue;
}
debug.append("------- \n");
try {
debug.append(fontInfo.getPostScriptName() + "\n" );
if (!fontInfo.isEmbeddable() ) {
// log.info(tokens[x] + " is not embeddable; skipping.");
/*
* No point looking at this font, since if we tried to use it,
* later, we'd get:
*
* com.lowagie.text.DocumentException: file:/usr/share/fonts/truetype/ttf-tamil-fonts/lohit_ta.ttf cannot be embedded due to licensing restrictions.
at com.lowagie.text.pdf.TrueTypeFont.(TrueTypeFont.java:364)
at com.lowagie.text.pdf.TrueTypeFont.(TrueTypeFont.java:335)
at com.lowagie.text.pdf.BaseFont.createFont(BaseFont.java:399)
at com.lowagie.text.pdf.BaseFont.createFont(BaseFont.java:345)
at org.xhtmlrenderer.pdf.ITextFontResolver.addFont(ITextFontResolver.java:164)
will be thrown if os_2.fsType == 2
*/
log.warn(fontInfo.getEmbedFile() + " is not embeddable; ignoring this font.");
//return;
continue;
}
} catch (Exception e1) {
// NB isEmbeddable() only exists in our patched FOP
if (!loggedWarningAlready) {
log.warn("Not using patched FOP; isEmbeddable() method missing.");
loggedWarningAlready = true;
}
}
PhysicalFont pf;
// for (Iterator iterIn = fontInfo.getFontTriplets().iterator() ; iterIn.hasNext();) {
// FontTriplet triplet = (FontTriplet)iterIn.next();
FontTriplet triplet = (FontTriplet)fontInfo.getFontTriplets().get(0);
// There is one triplet for each of the font family names
// this font has, and we create a PhysicalFont object
// for each of them. For our purposes though, each of
// these physical font objects contains the same info
String lower = fontInfo.getEmbedFile().toLowerCase();
log.debug("Processing physical font: " + lower);
debug.append(".. triplet " + triplet.getName()
+ " (priority " + triplet.getPriority() +"\n" );
pf = null;
// xhtmlrenderer's org.xhtmlrenderer.pdf.ITextFontResolver.addFont
// can handle
// .otf, .ttf, .ttc, .pfb
if (lower.endsWith(".otf") || lower.endsWith(".ttf") || lower.endsWith(".ttc") ) {
pf = new PhysicalFont(triplet.getName(), fontInfo, fontResolver);
} else if (lower.endsWith(".pfb") ) {
// See whether we have everything org.xhtmlrenderer.pdf.ITextFontResolver.addFont
// will need - for a .pfb file, it needs a corresponding .afm or .pfm
String afm = FontUtils.pathFromURL(lower);
afm = afm.substring(0, afm.length()-4 ) + ".afm"; // drop the 'file:'
log.debug("Looking for: " + afm);
File f = new File(afm);
if (f.exists()) {
log.debug(".. found");
// Uncomment if you want to use the iText stuff in docx4j-extras
// // We're only interested if this font supports UTF-8 encoding
// // since otherwise iText can't use it (at least on a
// // UTF8 encoded XHTML document)
// try {
// BaseFont bf = BaseFont.createFont(afm,
// BaseFont.IDENTITY_H,
// BaseFont.NOT_EMBEDDED);
// } catch (java.io.UnsupportedEncodingException uee) {
// log.error(afm + " does not support UTF encoding, so ignoring");
// continue;
// } catch (Exception e) {
// log.error(e.getMessage(), e);
// continue;
// }
pf = new PhysicalFont(triplet.getName(),fontInfo, fontResolver);
} else {
// Should we be doing afm first, or pfm?
String pfm = FontUtils.pathFromURL(lower);
pfm = pfm.substring(0, pfm.length()-4 ) + ".pfm"; // drop the 'file:'
log.debug("Looking for: " + pfm);
f = new File(pfm);
if (f.exists()) {
log.debug(".. found");
// Uncomment if you want to use the iText stuff in docx4j-extras
// // We're only interested if this font supports UTF-8 encoding
// try {
// BaseFont bf = BaseFont.createFont(pfm,
// BaseFont.IDENTITY_H,
// BaseFont.NOT_EMBEDDED);
// } catch (java.io.UnsupportedEncodingException uee) {
// log.error(pfm + " does not support UTF encoding, so ignoring");
// continue;
// } catch (Exception e) {
// log.error(e.getMessage(), e);
// continue;
// }
pf = new PhysicalFont(triplet.getName(), fontInfo, fontResolver);
} else {
log.warn("Skipping " + triplet.getName() + "; couldn't find .afm or .pfm for : " + fontInfo.getEmbedFile());
}
}
} else {
log.warn("Skipping " + triplet.getName() + "; unsupported type: " + fontInfo.getEmbedFile());
}
if (pf!=null) {
pfList.add(pf);
}
}
log.debug(debug.toString() );
return pfList;
}
public static PhysicalFont getBoldForm( PhysicalFont pf) {
// look up the font in MicrosoftFontsRegistry
MicrosoftFonts.Font msFont = MicrosoftFontsRegistry.getMsFonts().get(pf.getName() );
if (msFont==null) {
log.warn("No entry in MicrosoftFontsRegistry for: " + pf.getName());
return null;
}
if (msFont.getBold()==null) {
log.debug("No bold form for: " + pf.getName());
return null;
} else {
// We have to go via the file name, grrr..
// since MicrosoftFonts.xml doesn't give the associate font name
String filename;
if (osName.startsWith("Mac")) {
if (msFont.getBold().getMac()==null) {
log.debug("No bold form for mac for: " + pf.getName());
return null;
}
filename = msFont.getBold().getMac().toLowerCase();
} else {
filename = msFont.getBold().getFilename().toLowerCase();
}
log.debug("Fetching: " + filename);
return physicalFontMapByFilenameLowercase.get(filename);
}
}
public static PhysicalFont getBoldItalicForm( PhysicalFont pf) {
// look up the font in MicrosoftFontsRegistry
MicrosoftFonts.Font msFont = MicrosoftFontsRegistry.getMsFonts().get(pf.getName() );
if (msFont==null) {
log.warn("No entry in MicrosoftFontsRegistry for: " + pf.getName());
return null;
}
if (msFont.getBolditalic()==null) {
log.debug("No Bolditalic form for: " + pf.getName());
return null;
} else {
// We have to go via the file name, grrr..
// since MicrosoftFonts.xml doesn't give the associate font name
String filename;
if (osName.startsWith("Mac")) {
if (msFont.getBolditalic().getMac()==null) {
log.debug("No Bolditalic form for mac for: " + pf.getName());
return null;
}
filename = msFont.getBolditalic().getMac().toLowerCase();
} else {
filename = msFont.getBolditalic().getFilename().toLowerCase();
}
log.debug("Fetching: " + filename);
return physicalFontMapByFilenameLowercase.get(filename);
}
}
public static PhysicalFont getItalicForm( PhysicalFont pf) {
// look up the font in MicrosoftFontsRegistry
MicrosoftFonts.Font msFont = MicrosoftFontsRegistry.getMsFonts().get(pf.getName() );
if (msFont==null) {
log.debug("No entry in MicrosoftFontsRegistry for: " + pf.getName());
return null;
}
if (msFont.getItalic()==null) {
log.info("No italic form for: " + pf.getName());
return null;
} else {
// We have to go via the file name, grrr..
// since MicrosoftFonts.xml doesn't give the associate font name
String filename;
if (osName.startsWith("Mac")) {
if (msFont.getItalic().getMac()==null) {
log.info("No italic form for mac for: " + pf.getName());
return null;
}
filename = msFont.getItalic().getMac().toLowerCase();
} else {
filename = msFont.getItalic().getFilename().toLowerCase();
}
log.debug("Fetching: " + filename);
return physicalFontMapByFilenameLowercase.get(filename);
}
}
public static String getPhysicalFont(OpcPackage wmlPackage, String fontName) {
log.debug("looking for: " + fontName);
if (!(wmlPackage instanceof WordprocessingMLPackage)) {
log.error("Implement me for pptx4j");
return null;
}
PhysicalFont pf = ((WordprocessingMLPackage)wmlPackage).getFontMapper().get(fontName);
if (pf!=null) {
log.debug("Font '" + fontName + "' maps to " + pf.getName() );
return pf.getName();
} else {
log.warn("Font '" + fontName + "' is not mapped to a physical font. " );
return null;
}
}
public static void main(String[] args) throws Exception {
discoverPhysicalFonts();
System.out.println("That should have listed your physical fonts (provided you have logging enabled).");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy