org.docx4j.fonts.PhysicalFonts Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of docx4j-core Show documentation
Show all versions of docx4j-core Show documentation
docx4j is a library which helps you to work with the Office Open
XML file format as used in docx
documents, pptx presentations, and xlsx spreadsheets.
package org.docx4j.fonts;
import java.io.File;
import java.net.URL;
import java.util.*;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.docx4j.fonts.fop.fonts.EmbedFontInfo;
import org.docx4j.fonts.fop.fonts.FontCache;
import org.docx4j.fonts.fop.fonts.FontResolver;
import org.docx4j.fonts.fop.fonts.FontSetup;
import org.docx4j.fonts.fop.fonts.FontTriplet;
import org.docx4j.fonts.fop.fonts.autodetect.FontFileFinder;
import org.docx4j.fonts.fop.fonts.autodetect.FontInfoFinder;
import org.docx4j.fonts.microsoft.MicrosoftFonts;
import org.docx4j.fonts.microsoft.MicrosoftFontsRegistry;
import org.docx4j.openpackaging.packages.OpcPackage;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.ObfuscatedFontPart;
//import com.lowagie.text.pdf.BaseFont;
/**
* The fonts which are physically installed on the system.
*
* They can be discovered automatically, or you can
* just add specific fonts.
*
* Do NOT add fonts embedded in a docx to physicalFontMap!
*
* @author dev
*
*/
public class PhysicalFonts {
protected static Logger log = LoggerFactory.getLogger(PhysicalFonts.class);
protected static FontCache fontCache;
/** These are the physical fonts on the system which we have discovered.
* Do NOT add fonts embedded in a docx to physicalFontMap! */
private final static Map physicalFontMap;
@Deprecated // want to enforce case insensitive
public static Map getPhysicalFonts() {
return physicalFontMap;
}
/**
* Get a PhysicalFont
* by case-insensitive name. (Although Word always
* uses Title Case for font names, it is actually
* case insensitive; the spec is silent on this.)
*
* @param key
* @return
*/
public static PhysicalFont get(String key) {
return physicalFontMap.get(key.toLowerCase());
}
/**
* Put a PhysicalFont
* by case-insensitive name. (Although Word always
* uses Title Case for font names, it is actually
* case insensitive; the spec is silent on this.)
*
* @param key
* @param pf
*/
public static void put(String key, PhysicalFont pf) {
if (physicalFontMap.get(key.toLowerCase())!=null) {
log.warn("Overwriting existing physicalFontMap entry: " + key.toLowerCase());
}
physicalFontMap.put(key.toLowerCase(), pf);
}
private final static Map physicalFontMapByFilenameLowercase;
// private final static Map physicalFontFamiliesMap;
// int lastSeenNumberOfPhysicalFonts = 0;
//
//
// /** Max difference for it to be considered an acceptable match.
// * Note that this value will depend on the weights in the
// * difference function.
// */
// public static final int MATCH_THRESHOLD = 30;
private static FontResolver fontResolver;
// parse font to ascertain font info
private static FontInfoFinder fontInfoFinder;
private static String osName;
static {
try {
osName = System.getProperty("os.name");
fontCache = FontCache.load();
if (fontCache == null) {
fontCache = new FontCache();
}
physicalFontMap = new HashMap();
physicalFontMapByFilenameLowercase
= new HashMap();
// physicalFontFamiliesMap = new HashMap();
fontResolver = FontSetup.createMinimalFontResolver();
// parse font to ascertain font info
fontInfoFinder = new FontInfoFinder();
// setupPhysicalFonts();
} catch (Exception exc) {
throw new RuntimeException(exc);
}
}
private static String regex;
public static String getRegex() {
return regex;
}
/**
* Set a regex to limit to the common fonts in order to lower memory use.
* eg on Mac regex=".*(Courier New|Arial|Times New Roman|Comic Sans|Georgia|Impact|Lucida Console|Lucida Sans Unicode|Palatino Linotype|Tahoma|Trebuchet|Verdana|Symbol|Webdings|Wingdings|MS Sans Serif|MS Serif).*";
* on Windows: regex=".*(calibri|cour|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingding).*";
*
* If you want to use this, set it before instantiating a Mapper.
*
* @since 2.8.1
*/
public static void setRegex(String regex) {
PhysicalFonts.regex = regex;
}
/**
* Autodetect fonts available on the system.
*
*/
public final static void discoverPhysicalFonts() throws Exception {
// Currently we use FOP - inspired by org.apache.fop.render.PrintRendererConfigurator
// iText also has a font discoverer (which we could use
// instead, but don't, since in docx4j we're settled on
// PDF output via XSL FO)
FontFileFinder fontFileFinder = new FontFileFinder();
// Automagically finds a list of font files on local system
// based on os.name
List fontFileList = fontFileFinder.find();
if (regex==null) {
for (Iterator iter = fontFileList.iterator(); iter.hasNext();) {
URL fontUrl = getURL(iter.next());
// parse font to ascertain font info
addPhysicalFont( fontUrl);
}
} else {
Pattern pattern = Pattern.compile(regex);
for (Iterator iter = fontFileList.iterator(); iter.hasNext();) {
URL fontUrl = getURL(iter.next());
// parse font to ascertain font info
if (pattern.matcher(fontUrl.toString()).matches()){
addPhysicalFont( fontUrl);
} else {
// log.debug("Ignoring " + fontUrl.toString() );
}
}
}
// docx4j 3.2.2: no, these are document specific, so don't belong in PhysicalFonts
// // Add fonts from our Temporary Embedded Fonts dir
// fontFileList = fontFileFinder.find( ObfuscatedFontPart.getTemporaryEmbeddedFontsDir() );
// for (Iterator iter = fontFileList.iterator(); iter.hasNext();) {
// URL fontUrl = getURL(iter.next());
// addPhysicalFont( fontUrl);
// }
fontCache.save();
}
private static URL getURL(Object o) throws Exception {
if (o instanceof java.io.File) {
// Running in Tomcat
java.io.File f = (java.io.File)o;
return f.toURL();
} else if (o instanceof java.net.URL) {
return (URL)o;
} else {
throw new Exception("Unexpected object:" + o.getClass().getName() );
}
}
private static boolean loggedWarningAlready = false;
/**
* Add a physical font's EmbedFontInfo object. Not to be used for embedded fonts.
*
* @param fontUrl eg new java.net.URL("file:" + path)
*/
public static void addPhysicalFont(URL fontUrl) {
addPhysicalFonts(null, fontUrl);
}
/**
* Add a physical font's EmbedFontInfo object. Not to be used for embedded fonts.
*
* @param fontUrl eg new java.net.URL("file:" + path)
*/
public static void addPhysicalFonts(String nameAsInFontTablePart, URL fontUrl) {
List physicalFonts = getPhysicalFont( nameAsInFontTablePart, fontUrl);
if (physicalFonts==null) return;
for (PhysicalFont pf : physicalFonts) {
if (pf!=null) {
// Add it to the map
put(pf.getName(), pf);
log.debug("Added '" + pf.getName() + "' -> " + pf.getEmbeddedFile());
if (nameAsInFontTablePart != null
&& get(nameAsInFontTablePart)==null) {
put(nameAsInFontTablePart, pf);
log.debug("Added '" + nameAsInFontTablePart + "' -> " + pf.getEmbeddedFile());
}
// We also need to add it to map by filename
String filename = pf.getEmbeddedFile();
// eg on Windows: file:/C:/Windows/FONTS/cour.ttf
filename = filename.substring( filename.lastIndexOf("/")+1).toLowerCase();
if (osName.startsWith("Mac")) {
filename = filename.replace("%20", " ");
/* there are a few like this on Windows as well, but they're exotic,
* eg biondi%20light.ttf catriel ligurino *Tiger* tandelle
*/
}
physicalFontMapByFilenameLowercase.put(filename, pf);
log.debug("added to filename map: " + filename);
// String familyName = triplet.getName();
// pf.setFamilyName(familyName);
//
// PhysicalFontFamily pff;
// if (physicalFontFamiliesMap.get(familyName)==null) {
// pff = new PhysicalFontFamily(familyName);
// physicalFontFamiliesMap.put(familyName, pff);
// } else {
// pff = physicalFontFamiliesMap.get(familyName);
// }
// pff.addFont(pf);
}
}
}
/**
* Get a physical font's EmbedFontInfo object.
*
* @param fontUrl eg new java.net.URL("file:" + path)
*/
public static List getPhysicalFont(String nameAsInFontTablePart, URL fontUrl) {
List pfList = new ArrayList();
log.debug(nameAsInFontTablePart);
//List embedFontInfoList = fontInfoFinder.find(fontUrl, fontResolver, fontCache);
EmbedFontInfo[] embedFontInfoList = fontInfoFinder.find(fontUrl, fontResolver, fontCache);
/* FOP r644208 (Bugzilla #44737) 3/04/08 made this an array,
// so if you are using non-patched FOP, it needs to be at least this revision
// (but doesn't seem to be in FOP 0.95 binary?!) */
if (embedFontInfoList==null) {
// Quite a few fonts exist that we can't seem to get
// EmbedFontInfo for. To be investigated.
log.warn("Aborting: " + fontUrl.toString() + " (can't get EmbedFontInfo[] .. try deleting fop-fonts.cache?)");
return null;
}
StringBuffer debug = new StringBuffer();
for ( EmbedFontInfo fontInfo : embedFontInfoList ) {
/* EmbedFontInfo has:
* - subFontName (if the underlying CustomFont is a TTC)
* - PostScriptName = CustomFont.getFontName()
* - FontTriplets named:
* - CustomFont.getFullName() with quotes stripped
* - CustomFont.getFontName() with whitespace stripped
* - each family name (with quotes stripped)
*
* By creating one PhysicalFont object
* per triplet, each referring to the same
* EmbedFontInfo, we increase the chances
* of a match
*
ComicSansMS
.. triplet Comic Sans MS (priority + 0
.. triplet ComicSansMS (priority + 0
ComicSansMS-Bold
.. triplet Comic Sans MS Bold (priority + 0
.. triplet ComicSansMS-Bold (priority + 0
.. triplet Comic Sans MS (priority + 5
*
* but the second triplet is what FOP creates where its
* getPostScriptName()
* does FontUtil.stripWhiteSpace(getFullName());.
*
* and the third is just the family name.
*
* So we only get the first.
*
*/
if (fontInfo == null) {
// return;
continue;
}
debug.append("------- \n");
try {
debug.append(fontInfo.getPostScriptName() + "\n" );
if (!fontInfo.isEmbeddable() ) {
// log.info(tokens[x] + " is not embeddable; skipping.");
/*
* No point looking at this font, since if we tried to use it,
* later, we'd get:
*
* com.lowagie.text.DocumentException: file:/usr/share/fonts/truetype/ttf-tamil-fonts/lohit_ta.ttf cannot be embedded due to licensing restrictions.
at com.lowagie.text.pdf.TrueTypeFont.(TrueTypeFont.java:364)
at com.lowagie.text.pdf.TrueTypeFont.(TrueTypeFont.java:335)
at com.lowagie.text.pdf.BaseFont.createFont(BaseFont.java:399)
at com.lowagie.text.pdf.BaseFont.createFont(BaseFont.java:345)
at org.xhtmlrenderer.pdf.ITextFontResolver.addFont(ITextFontResolver.java:164)
will be thrown if os_2.fsType == 2
*/
log.warn(fontInfo.getEmbedFile() + " is not embeddable; ignoring this font.");
//return;
continue;
}
} catch (Exception e1) {
// NB isEmbeddable() only exists in our patched FOP
if (!loggedWarningAlready) {
log.warn("Not using patched FOP; isEmbeddable() method missing.");
loggedWarningAlready = true;
}
}
PhysicalFont pf;
// for (Iterator iterIn = fontInfo.getFontTriplets().iterator() ; iterIn.hasNext();) {
// FontTriplet triplet = (FontTriplet)iterIn.next();
FontTriplet triplet = (FontTriplet)fontInfo.getFontTriplets().get(0);
// There is one triplet for each of the font family names
// this font has, and we create a PhysicalFont object
// for each of them. For our purposes though, each of
// these physical font objects contains the same info
String lower = fontInfo.getEmbedFile().toLowerCase();
log.debug("Processing physical font: " + lower);
debug.append(".. triplet " + triplet.getName()
+ " (priority " + triplet.getPriority() +"\n" );
pf = null;
// xhtmlrenderer's org.xhtmlrenderer.pdf.ITextFontResolver.addFont
// can handle
// .otf, .ttf, .ttc, .pfb
if (lower.endsWith(".otf") || lower.endsWith(".ttf") || lower.endsWith(".ttc") ) {
pf = new PhysicalFont(triplet.getName(), fontInfo, fontResolver);
} else if (lower.endsWith(".pfb") ) {
// See whether we have everything org.xhtmlrenderer.pdf.ITextFontResolver.addFont
// will need - for a .pfb file, it needs a corresponding .afm or .pfm
String afm = FontUtils.pathFromURL(lower);
afm = afm.substring(0, afm.length()-4 ) + ".afm"; // drop the 'file:'
log.debug("Looking for: " + afm);
File f = new File(afm);
if (f.exists()) {
log.debug(".. found");
// Uncomment if you want to use the iText stuff in docx4j-extras
// // We're only interested if this font supports UTF-8 encoding
// // since otherwise iText can't use it (at least on a
// // UTF8 encoded XHTML document)
// try {
// BaseFont bf = BaseFont.createFont(afm,
// BaseFont.IDENTITY_H,
// BaseFont.NOT_EMBEDDED);
// } catch (java.io.UnsupportedEncodingException uee) {
// log.error(afm + " does not support UTF encoding, so ignoring");
// continue;
// } catch (Exception e) {
// log.error(e.getMessage(), e);
// continue;
// }
pf = new PhysicalFont(triplet.getName(),fontInfo, fontResolver);
} else {
// Should we be doing afm first, or pfm?
String pfm = FontUtils.pathFromURL(lower);
pfm = pfm.substring(0, pfm.length()-4 ) + ".pfm"; // drop the 'file:'
log.debug("Looking for: " + pfm);
f = new File(pfm);
if (f.exists()) {
log.debug(".. found");
// Uncomment if you want to use the iText stuff in docx4j-extras
// // We're only interested if this font supports UTF-8 encoding
// try {
// BaseFont bf = BaseFont.createFont(pfm,
// BaseFont.IDENTITY_H,
// BaseFont.NOT_EMBEDDED);
// } catch (java.io.UnsupportedEncodingException uee) {
// log.error(pfm + " does not support UTF encoding, so ignoring");
// continue;
// } catch (Exception e) {
// log.error(e.getMessage(), e);
// continue;
// }
pf = new PhysicalFont(triplet.getName(), fontInfo, fontResolver);
} else {
log.warn("Skipping " + triplet.getName() + "; couldn't find .afm or .pfm for : " + fontInfo.getEmbedFile());
}
}
} else {
log.warn("Skipping " + triplet.getName() + "; unsupported type: " + fontInfo.getEmbedFile());
}
if (pf!=null) {
pfList.add(pf);
}
}
log.debug(debug.toString() );
return pfList;
}
public static PhysicalFont getBoldForm( PhysicalFont pf) {
// look up the font in MicrosoftFontsRegistry
MicrosoftFonts.Font msFont = MicrosoftFontsRegistry.getMsFonts().get(pf.getName() );
if (msFont==null) {
log.warn("No entry in MicrosoftFontsRegistry for: " + pf.getName());
return null;
}
if (msFont.getBold()==null) {
log.debug("No bold form for: " + pf.getName());
return null;
} else {
// We have to go via the file name, grrr..
// since MicrosoftFonts.xml doesn't give the associate font name
String filename;
if (osName.startsWith("Mac")) {
if (msFont.getBold().getMac()==null) {
log.debug("No bold form for mac for: " + pf.getName());
return null;
}
filename = msFont.getBold().getMac().toLowerCase();
} else {
filename = msFont.getBold().getFilename().toLowerCase();
}
log.debug("Fetching: " + filename);
return physicalFontMapByFilenameLowercase.get(filename);
}
}
public static PhysicalFont getBoldItalicForm( PhysicalFont pf) {
// look up the font in MicrosoftFontsRegistry
MicrosoftFonts.Font msFont = MicrosoftFontsRegistry.getMsFonts().get(pf.getName() );
if (msFont==null) {
log.warn("No entry in MicrosoftFontsRegistry for: " + pf.getName());
return null;
}
if (msFont.getBolditalic()==null) {
log.debug("No Bolditalic form for: " + pf.getName());
return null;
} else {
// We have to go via the file name, grrr..
// since MicrosoftFonts.xml doesn't give the associate font name
String filename;
if (osName.startsWith("Mac")) {
if (msFont.getBolditalic().getMac()==null) {
log.debug("No Bolditalic form for mac for: " + pf.getName());
return null;
}
filename = msFont.getBolditalic().getMac().toLowerCase();
} else {
filename = msFont.getBolditalic().getFilename().toLowerCase();
}
log.debug("Fetching: " + filename);
return physicalFontMapByFilenameLowercase.get(filename);
}
}
public static PhysicalFont getItalicForm( PhysicalFont pf) {
// look up the font in MicrosoftFontsRegistry
MicrosoftFonts.Font msFont = MicrosoftFontsRegistry.getMsFonts().get(pf.getName() );
if (msFont==null) {
log.debug("No entry in MicrosoftFontsRegistry for: " + pf.getName());
return null;
}
if (msFont.getItalic()==null) {
log.info("No italic form for: " + pf.getName());
return null;
} else {
// We have to go via the file name, grrr..
// since MicrosoftFonts.xml doesn't give the associate font name
String filename;
if (osName.startsWith("Mac")) {
if (msFont.getItalic().getMac()==null) {
log.info("No italic form for mac for: " + pf.getName());
return null;
}
filename = msFont.getItalic().getMac().toLowerCase();
} else {
filename = msFont.getItalic().getFilename().toLowerCase();
}
log.debug("Fetching: " + filename);
return physicalFontMapByFilenameLowercase.get(filename);
}
}
public static String getPhysicalFont(OpcPackage wmlPackage, String fontName) {
log.debug("looking for: " + fontName);
if (!(wmlPackage instanceof WordprocessingMLPackage)) {
log.error("Implement me for pptx4j");
return null;
}
PhysicalFont pf = ((WordprocessingMLPackage)wmlPackage).getFontMapper().get(fontName);
if (pf!=null) {
log.debug("Font '" + fontName + "' maps to " + pf.getName() );
return pf.getName();
} else {
log.warn("Font '" + fontName + "' is not mapped to a physical font. " );
return null;
}
}
public static void main(String[] args) throws Exception {
discoverPhysicalFonts();
System.out.println("That should have listed your physical fonts (provided you have logging enabled).");
}
}