org.docx4j.fonts.PhysicalFonts Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of docx4j-core Show documentation
docx4j is a library which helps you to work with the Office Open XML file format as used in docx documents, pptx presentations, and xlsx spreadsheets.
There is a newer version: 11.5.0
Show newest version
package org.docx4j.fonts;

import java.io.File;
import java.net.URL;
import java.util.*;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.docx4j.fonts.fop.fonts.EmbedFontInfo;
import org.docx4j.fonts.fop.fonts.FontCache;
import org.docx4j.fonts.fop.fonts.FontResolver;
import org.docx4j.fonts.fop.fonts.FontSetup;
import org.docx4j.fonts.fop.fonts.FontTriplet;
import org.docx4j.fonts.fop.fonts.autodetect.FontFileFinder;
import org.docx4j.fonts.fop.fonts.autodetect.FontInfoFinder;
import org.docx4j.fonts.microsoft.MicrosoftFonts;
import org.docx4j.fonts.microsoft.MicrosoftFontsRegistry;
import org.docx4j.openpackaging.packages.OpcPackage;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.ObfuscatedFontPart;

//import com.lowagie.text.pdf.BaseFont;

/**
 * The fonts which are physically installed on the system.
 * 
 * They can be discovered automatically, or you can
 * just add specific fonts.
 * 
 * Do NOT add fonts embedded in a docx to physicalFontMap!
 * 
 * @author dev
 *
 */
public class PhysicalFonts {

	protected static Logger log = LoggerFactory.getLogger(PhysicalFonts.class);
	
	protected static FontCache fontCache;

	
	/** These are the physical fonts on the system which we have discovered. 
	 * Do NOT add fonts embedded in a docx to physicalFontMap! */ 
	private final static Map physicalFontMap;
	
	@Deprecated // want to enforce case insensitive
	public static Map getPhysicalFonts() {
		return physicalFontMap;
	}
	
	/**
	 * Get a PhysicalFont 
	 * by case-insensitive name.  (Although Word always
	 * uses Title Case for font names, it is actually
	 * case insensitive; the spec is silent on this.)  
	 * 
	 * @param key
	 * @return
	 */
	public static PhysicalFont get(String key) {
		return physicalFontMap.get(key.toLowerCase());
	}
	/**
	 * Put a PhysicalFont 
	 * by case-insensitive name.  (Although Word always
	 * uses Title Case for font names, it is actually
	 * case insensitive; the spec is silent on this.)  
	 * 
	 * @param key
	 * @param pf
	 */
	public static void put(String key, PhysicalFont pf) {
		if (physicalFontMap.get(key.toLowerCase())!=null) {
			log.warn("Overwriting existing physicalFontMap entry: " + key.toLowerCase());
		}
		physicalFontMap.put(key.toLowerCase(), pf);
	}

	private final static Map physicalFontMapByFilenameLowercase;
	
	
	//	private final static Map physicalFontFamiliesMap;
//	int lastSeenNumberOfPhysicalFonts = 0;
//	
//    
//    /** Max difference for it to be considered an acceptable match.
//     *  Note that this value will depend on the weights in the
//     *  difference function.
//     */ 
//    public static final int MATCH_THRESHOLD = 30;

    private static FontResolver fontResolver;        
	
    // parse font to ascertain font info
    private static FontInfoFinder fontInfoFinder; 

    private static String osName;
    
    static {
		
		try {

		    osName = System.getProperty("os.name");

	        fontCache = FontCache.load();
	        if (fontCache == null) {
	            fontCache = new FontCache();
	        }			
			
			physicalFontMap = new HashMap();
			physicalFontMapByFilenameLowercase 
							= new HashMap();
			
//			physicalFontFamiliesMap = new HashMap();
			
			fontResolver = FontSetup.createMinimalFontResolver();
			
            // parse font to ascertain font info
			fontInfoFinder = new FontInfoFinder();			
			
			// setupPhysicalFonts();
			
		} catch (Exception exc) {
			throw new RuntimeException(exc);
		}
	}

    
    private static String regex;
	public static String getRegex() {
		return regex;
	}
	
	/**
	 *  Set a regex to limit to the common fonts in order to lower memory use.
	 *  eg on Mac regex=".*(Courier New|Arial|Times New Roman|Comic Sans|Georgia|Impact|Lucida Console|Lucida Sans Unicode|Palatino Linotype|Tahoma|Trebuchet|Verdana|Symbol|Webdings|Wingdings|MS Sans Serif|MS Serif).*";
	 *  on Windows:  regex=".*(calibri|cour|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingding).*";
	 *  
	 *  If you want to use this, set it before instantiating a Mapper. 
	 *  
	 *  @since 2.8.1
	 */
	public static void setRegex(String regex) {
		PhysicalFonts.regex = regex;
	}
    
	
	/**
	 * Autodetect fonts available on the system.
	 * 
	 */ 
	public final static void discoverPhysicalFonts() throws Exception {
		
		// Currently we use FOP - inspired by org.apache.fop.render.PrintRendererConfigurator
		// iText also has a font discoverer (which we could use
		// instead, but don't, since in docx4j we're settled on
		// PDF output via XSL FO)		
		
        FontFileFinder fontFileFinder = new FontFileFinder();
        
        // Automagically finds a list of font files on local system
        // based on os.name
        List fontFileList = fontFileFinder.find();      
        
        
        if (regex==null) {
            for (Iterator iter = fontFileList.iterator(); iter.hasNext();) {
            	
            	URL fontUrl = getURL(iter.next());
                
                // parse font to ascertain font info
            	addPhysicalFont( fontUrl);
            }
        } else {
        	Pattern pattern = Pattern.compile(regex);
            for (Iterator iter = fontFileList.iterator(); iter.hasNext();) {
            	
            	URL fontUrl = getURL(iter.next());
                
            	
                // parse font to ascertain font info
            	if (pattern.matcher(fontUrl.toString()).matches()){
            		addPhysicalFont( fontUrl);        		
            	} else {
//                	log.debug("Ignoring " + fontUrl.toString() );

            	}
            }
        }
        

// docx4j 3.2.2: no, these are document specific, so don't belong in PhysicalFonts        
//        // Add fonts from our Temporary Embedded Fonts dir
//        fontFileList = fontFileFinder.find( ObfuscatedFontPart.getTemporaryEmbeddedFontsDir() );
//        for (Iterator iter = fontFileList.iterator(); iter.hasNext();) {
//            URL fontUrl = getURL(iter.next());
//            addPhysicalFont( fontUrl);
//        }
        
        fontCache.save();
        
	}
	
	private static URL getURL(Object o) throws Exception {
		
    	if (o instanceof java.io.File) {
    		// Running in Tomcat
    		java.io.File f = (java.io.File)o;
    		return f.toURL();
    	} else if (o instanceof java.net.URL) {
    		return (URL)o;
    	} else {
    		throw new Exception("Unexpected object:" + o.getClass().getName() );
    	}        	
	}

	private static boolean loggedWarningAlready = false;
	
	/**
	 * Add a physical font's EmbedFontInfo object.  Not to be used for embedded fonts.
	 *
	 * @param fontUrl eg new java.net.URL("file:" + path)
	 */
	public static void addPhysicalFont(URL fontUrl) {
		addPhysicalFonts(null, fontUrl);
	}

	/**
	 * Add a physical font's EmbedFontInfo object.  Not to be used for embedded fonts.
	 * 
	 * @param fontUrl eg new java.net.URL("file:" + path)
	 */
	public static void addPhysicalFonts(String nameAsInFontTablePart, URL fontUrl) {

		List physicalFonts = getPhysicalFont( nameAsInFontTablePart,  fontUrl);
		if (physicalFonts==null) return;
		for (PhysicalFont pf : physicalFonts) {
			
	        if (pf!=null) {
	        		        	
	        	// Add it to the map
	        	put(pf.getName(), pf);
	    		log.debug("Added '" + pf.getName() + "' -> " + pf.getEmbeddedFile());

				if (nameAsInFontTablePart != null 
						&& get(nameAsInFontTablePart)==null) {
					
					put(nameAsInFontTablePart, pf);
					log.debug("Added '" + nameAsInFontTablePart + "' -> " + pf.getEmbeddedFile());
				}
	    		
	    		// We also need to add it to map by filename
	    		String filename = pf.getEmbeddedFile();
	    		// eg on Windows:  file:/C:/Windows/FONTS/cour.ttf
	    				    		
	    		filename = filename.substring( filename.lastIndexOf("/")+1).toLowerCase();
	    		
	    		if (osName.startsWith("Mac")) {
	    			filename = filename.replace("%20", " "); 
	    			/* there are a few like this on Windows as well, but they're exotic, 
	    			 * eg  biondi%20light.ttf catriel ligurino *Tiger* tandelle
	    			 */
	    		}
	    		physicalFontMapByFilenameLowercase.put(filename, pf);
	    		log.debug("added to filename map: " + filename);
	        	
//	        	String familyName = triplet.getName();
//	        	pf.setFamilyName(familyName);
//	        	
//	        	PhysicalFontFamily pff;
//	        	if (physicalFontFamiliesMap.get(familyName)==null) {
//	        		pff = new PhysicalFontFamily(familyName);
//	        		physicalFontFamiliesMap.put(familyName, pff);
//	        	} else {
//	        		pff = physicalFontFamiliesMap.get(familyName);
//	        	}
//	        	pff.addFont(pf);
	        	
	        }			
		}
		
	}
	
	/**
	 * Get a physical font's EmbedFontInfo object.
	 * 
	 * @param fontUrl eg new java.net.URL("file:" + path)
	 */
	public static List getPhysicalFont(String nameAsInFontTablePart, URL fontUrl) {

		List pfList = new ArrayList();
		
		log.debug(nameAsInFontTablePart);

		
		//List embedFontInfoList = fontInfoFinder.find(fontUrl, fontResolver, fontCache);		
		EmbedFontInfo[] embedFontInfoList = fontInfoFinder.find(fontUrl, fontResolver, fontCache);
		/* FOP r644208 (Bugzilla #44737) 3/04/08 made this an array,
		// so if you are using non-patched FOP, it needs to be at least this revision
		// (but doesn't seem to be in FOP 0.95 binary?!) */ 
		
		if (embedFontInfoList==null) {
			// Quite a few fonts exist that we can't seem to get
			// EmbedFontInfo for. To be investigated.
			log.warn("Aborting: " + fontUrl.toString() +  " (can't get EmbedFontInfo[] .. try deleting fop-fonts.cache?)");
			return null;
		}
		
		StringBuffer debug = new StringBuffer();
		for ( EmbedFontInfo fontInfo : embedFontInfoList ) {
			
			/* EmbedFontInfo has:
			 * - subFontName (if the underlying CustomFont is a TTC)
			 * - PostScriptName = CustomFont.getFontName()
			 * - FontTriplets named:
			 * 		- CustomFont.getFullName() with quotes stripped
			 * 		- CustomFont.getFontName() with whitespace stripped
			 * 		- each family name        (with quotes stripped)
			 * 
			 * By creating one PhysicalFont object 
			 * per triplet, each referring to the same
			 * EmbedFontInfo, we increase the chances 
			 * of a match
			 * 
				ComicSansMS
				.. triplet Comic Sans MS (priority + 0
				.. triplet ComicSansMS (priority + 0
				
				ComicSansMS-Bold
				.. triplet Comic Sans MS Bold (priority + 0
				.. triplet ComicSansMS-Bold (priority + 0
				.. triplet Comic Sans MS (priority + 5
			 * 
			 * but the second triplet is what FOP creates where its
			 * getPostScriptName() 
			 * does FontUtil.stripWhiteSpace(getFullName());.
			 * 
			 * and the third is just the family name.
			 * 
			 * So we only get the first.
			 * 
			 */
			
			
			if (fontInfo == null) {
//				return;
				continue;
			}
			
			debug.append("------- \n");
			
			 try {
				debug.append(fontInfo.getPostScriptName() + "\n" );
				if (!fontInfo.isEmbeddable() ) {			        	
//	        	log.info(tokens[x] + " is not embeddable; skipping.");
					 
						/*
						 * No point looking at this font, since if we tried to use it,
						 * later, we'd get:
						 *  
						 * com.lowagie.text.DocumentException: file:/usr/share/fonts/truetype/ttf-tamil-fonts/lohit_ta.ttf cannot be embedded due to licensing restrictions.
							at com.lowagie.text.pdf.TrueTypeFont.(TrueTypeFont.java:364)
							at com.lowagie.text.pdf.TrueTypeFont.(TrueTypeFont.java:335)
							at com.lowagie.text.pdf.BaseFont.createFont(BaseFont.java:399)
							at com.lowagie.text.pdf.BaseFont.createFont(BaseFont.java:345)
							at org.xhtmlrenderer.pdf.ITextFontResolver.addFont(ITextFontResolver.java:164)
							
							will be thrown if os_2.fsType == 2
							
						 */
				    	log.warn(fontInfo.getEmbedFile() + " is not embeddable; ignoring this font.");
					 
					 //return;
				    continue;
				 }
			} catch (Exception e1) {
				// NB isEmbeddable() only exists in our patched FOP
				if (!loggedWarningAlready) {
					log.warn("Not using patched FOP; isEmbeddable() method missing.");
					loggedWarningAlready = true;
				}				
			}
				
			PhysicalFont pf; 
			
//			for (Iterator iterIn = fontInfo.getFontTriplets().iterator() ; iterIn.hasNext();) {
//				FontTriplet triplet = (FontTriplet)iterIn.next();
			
				FontTriplet triplet = (FontTriplet)fontInfo.getFontTriplets().get(0); 
				// There is one triplet for each of the font family names
				// this font has, and we create a PhysicalFont object 
				// for each of them.  For our purposes though, each of
				// these physical font objects contains the same info
		    	
		        String lower = fontInfo.getEmbedFile().toLowerCase();
		        log.debug("Processing physical font: " + lower);
				debug.append(".. triplet " + triplet.getName() 
						+ " (priority " + triplet.getPriority() +"\n" );
		        		        
		        pf = null;
		        // xhtmlrenderer's org.xhtmlrenderer.pdf.ITextFontResolver.addFont
		        // can handle
		        // .otf, .ttf, .ttc, .pfb
		        if (lower.endsWith(".otf") || lower.endsWith(".ttf") || lower.endsWith(".ttc") ) {
		        	pf = new PhysicalFont(triplet.getName(), fontInfo, fontResolver);
		        } else if (lower.endsWith(".pfb") ) {
		        	// See whether we have everything org.xhtmlrenderer.pdf.ITextFontResolver.addFont
		        	// will need - for a .pfb file, it needs a corresponding .afm or .pfm
					String afm = FontUtils.pathFromURL(lower);
					afm = afm.substring(0, afm.length()-4 ) + ".afm";  // drop the 'file:'
					log.debug("Looking for: " + afm);					
					File f = new File(afm);
			        if (f.exists()) {
			        	
			        	log.debug(".. found");

// Uncomment if you want to use the iText stuff in docx4j-extras			        	
//			        	// We're only interested if this font supports UTF-8 encoding
//			        	// since otherwise iText can't use it (at least on a
//			        	// UTF8 encoded XHTML document) 
//			        	try {
//			    		    BaseFont bf = BaseFont.createFont(afm,
//			    		    		BaseFont.IDENTITY_H, 
//									BaseFont.NOT_EMBEDDED);
//						} catch (java.io.UnsupportedEncodingException uee) {
//							log.error(afm + " does not support UTF encoding, so ignoring");
//							continue;
//						} catch (Exception e) {
//							log.error(e.getMessage(), e);
//							continue;
//						}
			        	pf = new PhysicalFont(triplet.getName(),fontInfo, fontResolver);
			        	
			        	
			        } else {
			        	// Should we be doing afm first, or pfm?
						String pfm = FontUtils.pathFromURL(lower);
						pfm = pfm.substring(0, pfm.length()-4 ) + ".pfm";  // drop the 'file:'
						log.debug("Looking for: " + pfm);
						f = new File(pfm);
				        if (f.exists()) {				
				        	log.debug(".. found");

				        	// Uncomment if you want to use the iText stuff in docx4j-extras			        					        	
//				        	// We're only interested if this font supports UTF-8 encoding
//				        	try {
//				    		    BaseFont bf = BaseFont.createFont(pfm,
//				    		    		BaseFont.IDENTITY_H, 
//										BaseFont.NOT_EMBEDDED);
//							} catch (java.io.UnsupportedEncodingException uee) {
//								log.error(pfm + " does not support UTF encoding, so ignoring");
//								continue;
//							} catch (Exception e) {
//								log.error(e.getMessage(), e);
//								continue;
//							}
				        	pf = new PhysicalFont(triplet.getName(), fontInfo, fontResolver);
				        } else {
				    		log.warn("Skipping " + triplet.getName() + "; couldn't find .afm or .pfm for : " + fontInfo.getEmbedFile());                	                    					        	
				        }
			        }
		        } else {                    	
		    		log.warn("Skipping " + triplet.getName() + "; unsupported type: " + fontInfo.getEmbedFile());                	                    	
		        }
		        
		        if (pf!=null) {
					pfList.add(pf);
		        }

			}            	
		
		log.debug(debug.toString() );
		return pfList;
	}
	
	public static PhysicalFont getBoldForm( PhysicalFont pf) {
		
		// look up the font in MicrosoftFontsRegistry
		MicrosoftFonts.Font msFont = MicrosoftFontsRegistry.getMsFonts().get(pf.getName() );
		
		if (msFont==null) {
			log.warn("No entry in MicrosoftFontsRegistry for: " + pf.getName());
			return null;
		}
		
		if (msFont.getBold()==null) {
			log.debug("No bold form for: " + pf.getName());
			return null;
		} else {
			
			// We have to go via the file name, grrr..
			// since MicrosoftFonts.xml doesn't give the associate font name

			String filename;
		    if (osName.startsWith("Mac")) {
		    	if (msFont.getBold().getMac()==null) {
					log.debug("No bold form for mac for: " + pf.getName());
		    		return null;		    	
		    	}
		    	filename = msFont.getBold().getMac().toLowerCase();
		    } else {
		    	filename = msFont.getBold().getFilename().toLowerCase();
		    }
			log.debug("Fetching: " + filename);
			return physicalFontMapByFilenameLowercase.get(filename);
		}		
	}
	
	public static PhysicalFont getBoldItalicForm( PhysicalFont pf) {
		
		// look up the font in MicrosoftFontsRegistry
		MicrosoftFonts.Font msFont = MicrosoftFontsRegistry.getMsFonts().get(pf.getName() );
		
		if (msFont==null) {
			log.warn("No entry in MicrosoftFontsRegistry for: " + pf.getName());
			return null;
		}
		
		if (msFont.getBolditalic()==null) {
			log.debug("No Bolditalic form for: " + pf.getName());
			return null;
		} else {
			
			// We have to go via the file name, grrr..
			// since MicrosoftFonts.xml doesn't give the associate font name
			String filename;
		    if (osName.startsWith("Mac")) {
		    	if (msFont.getBolditalic().getMac()==null) {
					log.debug("No Bolditalic form for mac for: " + pf.getName());
		    		return null;		    	
		    	}
		    	filename = msFont.getBolditalic().getMac().toLowerCase();
		    } else {
		    	filename = msFont.getBolditalic().getFilename().toLowerCase();
		    }	
			log.debug("Fetching: " + filename);
			return physicalFontMapByFilenameLowercase.get(filename);
		}		
	}
	
	public static PhysicalFont getItalicForm( PhysicalFont pf) {
		
		// look up the font in MicrosoftFontsRegistry
		MicrosoftFonts.Font msFont = MicrosoftFontsRegistry.getMsFonts().get(pf.getName() );
		
		if (msFont==null) {
			log.debug("No entry in MicrosoftFontsRegistry for: " + pf.getName());
			return null;
		}
		
		if (msFont.getItalic()==null) {
			log.info("No italic form for: " + pf.getName());
			return null;
		} else {
			
			// We have to go via the file name, grrr..
			// since MicrosoftFonts.xml doesn't give the associate font name
			String filename;
		    if (osName.startsWith("Mac")) {
		    	if (msFont.getItalic().getMac()==null) {
					log.info("No italic form for mac for: " + pf.getName());
		    		return null;		    	
		    	}
		    	filename = msFont.getItalic().getMac().toLowerCase();
		    } else {
		    	filename = msFont.getItalic().getFilename().toLowerCase();
		    }	
			log.debug("Fetching: " + filename);
			return physicalFontMapByFilenameLowercase.get(filename);
		}
		
	}
	
	public static String getPhysicalFont(OpcPackage wmlPackage, String fontName) {
		
		log.debug("looking for: " + fontName);

		if (!(wmlPackage instanceof WordprocessingMLPackage)) {
			log.error("Implement me for pptx4j");
			return null;
		}
		PhysicalFont pf = ((WordprocessingMLPackage)wmlPackage).getFontMapper().get(fontName);
		if (pf!=null) {
			log.debug("Font '" + fontName + "' maps to " + pf.getName() );
			return pf.getName();
		} else {
			log.warn("Font '" + fontName + "' is not mapped to a physical font. " );			
			return null;
		}		
	}	
	

	public static void main(String[] args) throws Exception {

		discoverPhysicalFonts();
		System.out.println("That should have listed your physical fonts (provided you have logging enabled).");
	}
	

}