All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.fonts.PdfFont Maven / Gradle / Ivy

There is a newer version: 20151002
Show newest version
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2016 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
     This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


 *
 * ---------------
 * PdfFont.java
 * ---------------
 */
package org.jpedal.fonts;

//standard java

import java.awt.Font;
import java.awt.Rectangle;
import java.io.BufferedReader;
import java.io.Serializable;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import java.util.StringTokenizer;
import org.jpedal.exception.PdfFontException;
import org.jpedal.fonts.glyph.PdfJavaGlyphs;
import org.jpedal.io.ObjectStore;
import org.jpedal.io.PdfObjectReader;
import org.jpedal.objects.raw.*;
import org.jpedal.parser.DecoderOptions;
import org.jpedal.parser.PdfFontFactory;
import org.jpedal.utils.LogWriter;
import org.jpedal.utils.NumberUtils;

/**
 * contains all generic pdf font data for fonts.

* */ public class PdfFont implements Serializable { PdfObject ToUnicode; String truncatedName; private boolean isArial; private boolean isWidthVertical; private Rectangle BBox; public String CMapName; boolean handleOddSapFontMapping; /** used to track if CID font is double byte or not by looking at first 2 values*/ boolean isFirstScan=true; int isDouble=-1; //-1=unset //value to use if no width set for this private static final int noWidth=-1; //workaroud for type3 fonts which contain both Hex and Denary Differences tables protected boolean containsHexNumbers, allNumbers; protected String embeddedFontName,embeddedFamilyName,copyright; private int objID; private float missingWidth=noWidth; boolean isSingleByte; protected boolean isFontVertical; /**cached value for last width value returned*/ private float lastWidth=noWidth; public PdfJavaGlyphs glyphs=new PdfJavaGlyphs(); /**cache for translate values*/ private String[] cachedValue=new String[256]; //unmapped font name private String rawFontName; //used by HTML to translate non-standard glyfs to correct values final Map nonStandardMappings=new HashMap(256); public boolean hasDoubleBytes; static{ setStandardFontMappings(); } public PdfFont(){} /**get handles onto Reader so we can access the file*/ public PdfFont(final PdfObjectReader current_pdf_file) { init(current_pdf_file); } public String getTruncatedName() { return truncatedName; } private static void setStandardFontMappings(){ final int count=StandardFonts.files_names.length; for(int i=0;i rawDiffKeys=new HashMap(); private int[] diffCharTable; /**lookup for which of each char for embedded fonts which we can flush*/ private float[] widthTable ; /**size to use for space if not defined (-1 is no setting)*/ private float possibleSpaceWidth=noWidth; /**handle onto file access*/ protected PdfObjectReader currentPdfFile; /**loader to load data from jar*/ protected final ClassLoader loader = this.getClass().getClassLoader(); /**FontBBox for font*/ public double[] FontMatrix={0.001d,0d,0d,0.001d,0,0}; /**font bounding box*/ public float[] FontBBox= { 0f, 0f, 1000f, 1000f }; /** * flag to show * Gxxx, Bxxx, Cxxx. */ protected boolean isHex; /**holds lookup to map char values*/ private String[] unicodeMappings; /**encoding pattern used for font. -1 means not set*/ protected int fontEnc = -1; /**flag to show type of font*/ protected boolean isCIDFont; /**lookup CID index mappings*/ private String[] CMAP; private int[] rawCMAP; /** CID font encoding*/ private String CIDfontEncoding; /**default width for font*/ private float defaultWidth=1f; protected boolean isFontSubstituted; protected int italicAngle; private byte[] stream; private int[] CIDToGIDMap; boolean hasCIDToGIDMap; /** * used to show truetype used for type 0 CID */ public boolean isFontSubstituted() { return isFontSubstituted; } /**Method to add the widths of a CID font*/ private void setCIDFontWidths( Object[] values) { widthTable=new float[65536]; //set all values to -1 so I can spot ones with no value for(int ii=0;ii<65536;ii++) { widthTable[ii]=noWidth; } int ptr=0, count=values.length,start,end; float nextWidth; byte[] nextNumber = null,rawNextWidth; Object o; for(int i=0;i=unicodeMappings.length) { return null; } else { return unicodeMappings[char_int]; } } /**store encoding and load required mappings*/ public final void putFontEncoding(int enc) { if(enc==StandardFonts.WIN && getBaseFontName().equals("Symbol")){ putFontEncoding(StandardFonts.SYMBOL); enc=StandardFonts.SYMBOL; } fontEnc=enc; StandardFonts.checkLoaded(enc); } /**return the mapped character*/ public final String getUnicodeValue(final String displayValue, final int rawInt){ String textValue=getUnicodeMapping(rawInt); if(textValue==null) { textValue=displayValue; } //map out lignatures if(!displayValue.isEmpty()){ final int displayChar=displayValue.charAt(0); switch(displayChar){ case 173: if(fontEnc==StandardFonts.WIN || fontEnc==StandardFonts.STD) { textValue="-"; } break; case 64256: textValue="ff"; break; case 64257: textValue="fi"; break; case 64260: textValue="ffl"; break; } } return textValue; } /** * convert value read from TJ operand into correct glyph
Also check to * see if mapped onto unicode value */ public final String getGlyphValue(final int rawInt) { if(cachedValue[rawInt]!=null) { return cachedValue[rawInt]; } String return_value = null; if(isCIDFont){ // test for unicode final String unicodeMappings=getUnicodeMapping(rawInt); if(unicodeMappings!=null) { return_value=unicodeMappings; } if(return_value == null){ //get font encoding final String fontEncoding =CIDfontEncoding; if(diffTable!=null){ return_value =diffTable[rawInt]; }else if(fontEncoding!=null){ if(fontEncoding.startsWith("Identity-")){ return_value= String.valueOf((char) rawInt); }else if(CMAP!=null){ final String newChar=CMAP[rawInt]; if(newChar!=null) { return_value=newChar; } } //probably a bit of a hack to fix /PDFdata/baseline_screens/12dec/de0009797407_factsheets_b2c-voll_de_de_31102012_1360767414.pdf }else if(fontEncoding==null && CMAP!=null && CMapName!=null && !CMapName.endsWith("-V") && !CMapName.endsWith("-H")){ final String newChar=CMAP[rawInt]; if(newChar!=null) { return_value=newChar; } } if(return_value==null) { return_value= String.valueOf(((char) rawInt)); } } }else { return_value=getStandardGlyphValue(rawInt); } //save value for next time cachedValue[rawInt]=return_value; return return_value; } /** * read translation table * @throws PdfFontException */ private String handleCIDEncoding(final PdfObject Encoding, final PdfObject fontObject) { BufferedReader CIDstream=null; final int encodingType=Encoding.getGeneralType(PdfDictionary.Encoding); String encodingName=CIDEncodings.getNameForEncoding(encodingType); //see if any general value (ie /UniCNS-UTF16-H not predefined in spec) if(encodingName==null){ if(encodingType==PdfDictionary.Identity_H) { encodingName="Identity-H"; } else if(encodingType==PdfDictionary.Identity_V){ encodingName="Identity-V"; isFontVertical=true; }else { encodingName=Encoding.getGeneralStringValue(); } } CMapName=Encoding.getName(PdfDictionary.CMapName); if(CMapName!=null){ stream=currentPdfFile.readStream(Encoding,true,true,false, false,false, Encoding.getCacheName(currentPdfFile.getObjectReader())); encodingName=CMapName; /* * tidy up stream so it works nicely with * our simple Parser */ String CMAPstream=new String(stream); CMAPstream=CMAPstream.replaceAll(" begincidchar ", "\nbegincidchar\n"); CMAPstream=CMAPstream.replaceAll(" endcidchar", "\nendcidchar"); CMAPstream=CMAPstream.replaceAll(" begincidrange ", "\begincidrange\n"); CMAPstream=CMAPstream.replaceAll(" endcidrange", "\nendcidrange"); CIDstream=new BufferedReader(new StringReader(CMAPstream)); } boolean isIdentity=(encodingType==PdfDictionary.Identity_H || encodingType==PdfDictionary.Identity_V ); //System.out.println("Name="+encodingName+" "+encodingType); /*allow for odd file created by SAP*/ //baseline_screens/idoq/UAT 9001001438 INV_I58234.pdf if(!isIdentity && fontObject!=null){ final String name=fontObject.getName(PdfDictionary.BaseFont); if(encodingType!=-1 && name!=null && name.contains("Identity") ){ isIdentity=true; handleOddSapFontMapping =true; isSingleByte=true; } } //investigate for daeja/Armando //flag up some single file CIDs (bit of a hack for the moment) //if(encodingType==CIDEncodings.CMAP_90ms_RKSJ_H){ //isSingleByte=true; //} /* put encoding in lookup table*/ if(CIDstream==null) { CIDfontEncoding=encodingName; } /* if not 2 standard encodings * load CMAP */ if(isIdentity){ glyphs.setIsIdentity(true); }else{ //there are a large number of CMAP vtables provided by Adobe which I have put in cid.jar //this code detects if present and reads the required table //I also put a font in cid.jar which I think is probably a mistake now (we do not need it) //test if cid.jar present on first time needed and throw exception if not // if(!isCidJarPresent && CIDstream==null && StandardFonts.isAdobeCMAP(encodingName)){ // isCidJarPresent=true; // // try{ // InputStream in=PdfFont.class.getResourceAsStream("/org/jpedal/res/cid/00_ReadMe.pdf"); // if(in==null){ // throw new PdfFontException("cid.jar not on classpath"); // } // }catch(SecurityException ee){ // if(LogWriter.isOutput()){ // LogWriter.writeLog("Security exception "+ee+" cid.jar"); // } // } // } glyphs.setIsIdentity(false); CMAP=new String[65536]; rawCMAP=new int[65536]; glyphs.CMAP_Translate=new int[65536]; //load standard if not embedded //try{ //20120718 commented out by Mark as cid.jar now works (and breaks existing files like customers3/japanese.pdf) //if(CIDstream==null) //CIDstream =new BufferedReader //(new InputStreamReader(loader.getResourceAsStream("org/jpedal/res/cid/" + encodingName), "Cp1252")); // } catch (Exception e) { // e.printStackTrace(System.out); // if(LogWriter.isOutput()) // LogWriter.writeLog("1.Problem reading encoding for CID font "+fontID+" encoding="+encodingName+" Check CID.jar installed"); // } //read values into lookup table if (CIDstream != null) { readCIDCMap(CIDstream); } } if(CIDstream!=null){ try{ CIDstream.close(); } catch (final Exception e) { LogWriter.writeLog("2.Problem reading encoding for CID font "+fontID+ ' ' +encodingName+" Check CID.jar installed "+e); } } return CMapName; } private void readCIDCMap(final BufferedReader CIDstream) { String line = ""; int begin, end, entry; boolean inDefinition = false,inMap=false; while (true) { try{ line = CIDstream.readLine(); //System.out.println(line); } catch (final Exception e) { LogWriter.writeLog("[PDF] Error reading line from font "+e.getMessage()); } if (line == null) { break; } if (line.contains("endcidrange")) { inDefinition=false; } else if (line.contains("endcidchar")) { inMap=false; } if (inDefinition) { final StringTokenizer CIDentry =new StringTokenizer(line, " <>[]"); //flag if multiple values boolean multiple_values = false; if (line.indexOf('[') != -1) { multiple_values = true; } //first 2 values define start and end begin = Integer.parseInt(CIDentry.nextToken(), 16); end = Integer.parseInt(CIDentry.nextToken(), 16); //can be hex or base10 final String value=CIDentry.nextToken(); final int charCount=value.length(); //assume false and try to disprove - exit on first hex char boolean isHex=false; for(int ptr=0;ptr=48 && c<=57){ }else{ isHex=true; ptr=charCount; } } if(isHex){ entry = Integer.parseInt(value, 16); }else { entry = Integer.parseInt(value, 10); } //put into array for (int i = begin; i < end + 1; i++) { if (multiple_values) { //put either single values or range entry =Integer.parseInt(CIDentry.nextToken(), 16); rawCMAP[i]= entry; CMAP[i]= String.valueOf((char) entry); } else { CMAP[i]= String.valueOf((char) entry); rawCMAP[i]= entry; entry++; } } } else if (inMap) { try{ final StringTokenizer CIDentry =new StringTokenizer(line, " <>[]"); //System.out.println("line="+line+" "+CIDentry.countTokens()); if(CIDentry.countTokens()==2){ //flag if multiple values //boolean multiple_values = false; //if (line.indexOf('[') != -1) // multiple_values = true; //first 2 values define start and end begin = Integer.parseInt(CIDentry.nextToken(), 16); end = Integer.parseInt(CIDentry.nextToken()); //entry = Integer.parseInt(CIDentry.nextToken(), 16); //put into array //for (int i = begin; i < end + 1; i++) { //if (multiple_values == true) { //put either single values or range //entry =Integer.parseInt(CIDentry.nextToken(), 16); //CMAP[i]= String.valueOf((char) entry); //} else { glyphs.CMAP_Translate[begin]= end; //entry++; //} //} } }catch(final Exception ef){ LogWriter.writeLog("Exception "+ef); } } if (line.contains("begincidrange")) { inDefinition = true; } else if (line.contains("begincidchar")) { inMap = true; } } } /** * convert value read from TJ operand into correct glyph
Also check to * see if mapped onto unicode value */ private String getStandardGlyphValue(final int char_int) { //get possible unicode values final String unicode_char = getUnicodeMapping(char_int); //handle if unicode if (unicode_char != null) {// & (mapped_char==null)) return unicode_char; } //not unicode so get mapped char String return_value = "", mapped_char; //get font encoding final int font_encoding = getFontEncoding( true); mapped_char = getMappedChar(char_int,true); // handle if differences first then standard mappings if (mapped_char != null) { //convert name into character // First check if the char has been mapped specifically for this String char_mapping =null; if(this.fontEnc!=-1){ //ignore if no encoding set StandardFonts.getUnicodeName(this.fontEnc +mapped_char); } if (char_mapping != null) { return_value = char_mapping; } else { char_mapping =StandardFonts.getUnicodeName(mapped_char); if (char_mapping != null) { return_value = char_mapping; } else { if(mapped_char.length()==1){ return_value = mapped_char; }else if (mapped_char.length() > 1) { final char c = mapped_char.charAt(0); final char c2 = mapped_char.charAt(1); if (c == 'B' || c == 'C' || c == 'c' || c == 'G' ) { mapped_char = mapped_char.substring(1); try { final int val =(isHex) ? Integer.valueOf(mapped_char, 16) : Integer.parseInt(mapped_char); return_value = String.valueOf((char) val); } catch (final Exception e) { LogWriter.writeLog("Exception in handling char value "+e); return_value = ""; } } else { return_value = ""; } //allow for hex number final boolean isHex=((c>=48 && c<=57)||(c>=97 && c<=102) || (c>=65 && c<=70))&& ((c2>=48 && c2<=57)||(c2>=97 && c2<=102) || (c2>=65 && c2<=70)); if(return_value.isEmpty() && this.fontTypes ==StandardFonts.TYPE3 && mapped_char.length()==2 && isHex){ return_value= String.valueOf((char) Integer.parseInt(mapped_char, 16)); } //handle some odd mappings in Type3 and other cases if(return_value.isEmpty()){ if(fontTypes==StandardFonts.TYPE3) {// && !StandardFonts.isValidGlyphName(char_mapping)) return_value= String.valueOf((char) char_int); } else if(diffTable!=null && diffTable[char_int]!=null && fontEnc==StandardFonts.WIN){ //hack for odd file return_value=diffTable[char_int]; if(return_value.indexOf('_')!=-1) { return_value = return_value.replaceAll("_", ""); } } } } else { return_value = ""; } } } } else if (font_encoding > -1) { //handle encoding return_value=StandardFonts.getEncodedChar(font_encoding,char_int); } return return_value; } /**set the font used for default from Java fonts on system * - check it is a valid font (otherwise it will default to Lucida anyway) */ public final void setDefaultDisplayFont(final String fontName) { glyphs.defaultFont=fontName; } /** * Returns the java font, initializing it first if it hasn't been used before. */ public final Font getJavaFontX(final int size) { //allow user to totally over-ride //passing in this allows user to reset any global variables //set in this method as well. //Helper is a static instance of the inteface JPedalHelper if(DecoderOptions.Helper!=null){ final Font f=DecoderOptions.Helper.getJavaFontX(this,size); //if you want to implement JPedalHelper but not //use this function, just return null if(f!=null) { return f; } } //noinspection MagicConstant return new Font(glyphs.font_family_name, glyphs.style, size); } /** * get font name as a string from ID (ie Tf /F1) and load if one of Adobe 14 */ public final String getFontName() { //check if one of 14 standard fonts and load if needed StandardFonts.loadStandardFontWidth(glyphs.fontName); return glyphs.fontName; } /** * get the copyright information */ public final String getCopyright() { return copyright; } /** * get raw font name which may include +xxxxxx */ public final String getBaseFontName() { return glyphs.getBaseFontName(); } /** * get width of a space */ public final float getCurrentFontSpaceWidth() { float width; //allow for space mapped onto other value final int space_value =spaceChar; if (space_value !=-1) { width = getWidth(space_value); } else { width= possibleSpaceWidth; //use shortest width as a guess } //allow for no value if (width ==noWidth || width == 0) { width = 0.3f; //Kept original in case new value shows issue in the future //width = 0.2f; } return width; } /** * get width of a space for HTML */ public final float getSpaceWidthHTML() { float width = 0; if (spaceChar !=-1) { width = getWidth(spaceChar); } //allow for no value if (width ==noWidth || width == 0) { width = 0.25f;// In HTML we use fontSize/4 - this is the same value that Chrome's reader (pdfium) uses } return width; } public final int getFontEncoding( final boolean notNull) { int result = fontEnc; if (result == -1 && notNull) { result = StandardFonts.STD; } return result; } /** Returns width of the specified character
* Allows for no value set*/ public final float getWidth( final int charInt) { //if -1 return last value fetched if(charInt==-1) { return lastWidth; } //try embedded font first (indexed by number) float width =noWidth; if(widthTable!=null && charInt!=-1 && charInt0){ width = widthTable[ptr]; } } if (width == noWidth) { if(isCIDFont){ width= defaultWidth; }else{ //try standard values which are indexed under NAME of char String charName = getMappedChar( charInt,false); if((charName!=null)&&(charName.equals(".notdef"))) { charName=StandardFonts.getUnicodeChar(getFontEncoding( true) , charInt); } Float value =StandardFonts.getStandardWidth(glyphs.logicalfontName , charName); //allow for remapping of base 14 with no width if(value==null && rawFontName!=null){ //check loaded StandardFonts.loadStandardFontWidth(rawFontName); //try again value =StandardFonts.getStandardWidth(rawFontName , charName); } if (value != null) { width= value; } else{ if(missingWidth!=noWidth) { width=missingWidth*xscale; } else { width=0; } } } } //cache value so we can reread lastWidth=width; return width; } /**generic CID code * @throws PdfFontException */ public void createCIDFont(final PdfObject pdfObject, final PdfObject Descendent) throws PdfFontException{ cachedValue=new String[65536]; String CMapName=null; final PdfObject Encoding=pdfObject.getDictionary(PdfDictionary.Encoding); if(Encoding!=null){ CMapName=handleCIDEncoding(Encoding, pdfObject); } //handle to unicode mapping ToUnicode=pdfObject.getDictionary(PdfDictionary.ToUnicode); if(ToUnicode!=null){ final UnicodeReader uniReader=new UnicodeReader(currentPdfFile.readStream(ToUnicode,true,true,false, false,false, ToUnicode.getCacheName(currentPdfFile.getObjectReader()))); unicodeMappings=uniReader.readUnicode(); hasDoubleBytes=uniReader.hasDoubleByteValues(); } Object[] widths= Descendent.getObjectArray(PdfDictionary.W); //allow for vertical final Object[] verticalWidths=Descendent.getObjectArray(PdfDictionary.W2); if(verticalWidths!=null){ widths=verticalWidths; isWidthVertical=true; } if(widths!=null) { setCIDFontWidths(widths); } final int Width=Descendent.getInt(PdfDictionary.DW); if(Width>=0) { defaultWidth=(Width)/1000f; } final int[] VerticalWidth=Descendent.getIntArray(PdfDictionary.DW2); if(VerticalWidth!=null){ //may need more detailed implementation of vertical spacing (se PDFspec chapter 5) isWidthVertical=true; defaultWidth=(VerticalWidth[1])/1000f; } //it looks like in this case it uses average of values //but not enough data if(handleOddSapFontMapping){ defaultWidth=.5f; } final PdfObject FontDescriptor = Descendent.getDictionary(PdfDictionary.FontDescriptor); final PdfObject CIDToGID=Descendent.getDictionary(PdfDictionary.CIDToGIDMap); if(CIDToGID!=null){ final byte[] stream=currentPdfFile.readStream(CIDToGID,true,true,false, false,false, null); if(stream!=null){ int j=0; final int count=stream.length; CIDToGIDMap=new int[count/2]; for(int i=0;i0) { widthTable = new float[maxCharCount]; //set all values to noWidth so I can spot ones with no value for(int ii=0;ii0){ shortestWidth += widthValue; count++; } widthTable[i]=widthValue; }else { widthTable[i]=0; } j++; } } //save guess for space as half average char if(setSpace && count>0) { possibleSpaceWidth=shortestWidth/(2*count); } } /**read in a font and its details from the pdf file*/ public void createFont(final PdfObject pdfObject, final String fontID, final boolean renderPage, final ObjectStore objectStore, final Map substitutedFonts) throws Exception{ //generic setup init(fontID, renderPage); // get FontDescriptor object - if present contains metrics on glyphs final PdfObject pdfFontDescriptor=pdfObject.getDictionary(PdfDictionary.FontDescriptor); setName(pdfObject); setEncoding(pdfObject, pdfFontDescriptor); } protected void setName(final PdfObject pdfObject) { // Get fontName String baseFontName= pdfObject.getName(PdfDictionary.BaseFont); if(baseFontName==null) { baseFontName= pdfObject.getName(PdfDictionary.FontName); } if (baseFontName == null) { baseFontName = this.fontID; //if(PdfStreamDecoder.runningStoryPad) //remove spaces and unwanted chars } if(baseFontName.contains("#20")) { baseFontName= cleanupFontName(baseFontName); //System.out.println("baseFontName="+baseFontName); } glyphs.setBaseFontName(baseFontName); objID = pdfObject.getObjectRefID(); glyphs.setObjID(objID); // get name less any suffix (needs abcdef+ removed from start) truncatedName= pdfObject.getStringValue(PdfDictionary.BaseFont, PdfDictionary.REMOVEPOSTSCRIPTPREFIX); if(truncatedName==null) { truncatedName= pdfObject.getStringValue(PdfDictionary.FontName, PdfDictionary.REMOVEPOSTSCRIPTPREFIX); } if (truncatedName == null) { truncatedName = this.fontID; } if(truncatedName.contains("#20") || truncatedName.contains("#2D")) { truncatedName= cleanupFontName(truncatedName); //if(PdfStreamDecoder.runningStoryPad) //remove spaces and unwanted chars // truncatedName= cleanupFontName(truncatedName); } glyphs.fontName=truncatedName; if(truncatedName.equals("Arial-BoldMT")){ glyphs.logicalfontName="Arial,Bold"; StandardFonts.loadStandardFontWidth(glyphs.logicalfontName); }else if(truncatedName.equals("ArialMT")){ glyphs.logicalfontName="Arial"; StandardFonts.loadStandardFontWidth(glyphs.logicalfontName); }else { glyphs.logicalfontName=truncatedName; } if(glyphs.logicalfontName.equals("Arial")){ isArial=true; } } /** * used by PDF2HTML to replace unsuitable characters and make sure unique * @param newName */ public void resetNameForHTML(String newName){ glyphs.fontName=newName; glyphs.baseFontName=newName; } public int getObjID() { return objID; } protected void setEncoding(final PdfObject pdfObject, final PdfObject pdfFontDescriptor) { //handle to unicode mapping final PdfObject ToUnicode=pdfObject.getDictionary(PdfDictionary.ToUnicode); if(ToUnicode!=null) { unicodeMappings=new UnicodeReader(currentPdfFile.readStream(ToUnicode,true,true,false, false,false, ToUnicode.getCacheName(currentPdfFile.getObjectReader()))).readUnicode(); } //handle encoding final PdfObject Encoding=pdfObject.getDictionary(PdfDictionary.Encoding); if (Encoding != null) { handleFontEncoding(pdfObject,Encoding); } else { handleNoEncoding(0,pdfObject); } if(pdfFontDescriptor!=null){ //set missingWidth missingWidth=pdfFontDescriptor.getInt(PdfDictionary.MissingWidth); } } private float descent; public float getDescent() { return descent; } protected void setBoundsAndMatrix(final PdfObject pdfFontDescriptor) { if(pdfFontDescriptor!=null){ final double[] newFontmatrix=pdfFontDescriptor.getDoubleArray(PdfDictionary.FontMatrix); if(newFontmatrix!=null) { FontMatrix=newFontmatrix; } final float[] newFontBBox=pdfFontDescriptor.getFloatArray(PdfDictionary.FontBBox); if(newFontBBox!=null) { FontBBox=newFontBBox; //set ascent and descent // float value=pdfFontDescriptor.getFloatNumber(PdfDictionary.Ascent); // if(value!=0) // ascent=value; // descent = pdfFontDescriptor.getFloatNumber(PdfDictionary.Descent); // if(value!=0) // descent=value; } else { descent = 0; } } } protected void init(final String fontID, final boolean renderPage) { this.fontID = fontID; this.renderPage = renderPage; } /** * */ private int handleNoEncoding(int encValue, final PdfObject pdfObject) { final int enc=pdfObject.getGeneralType(PdfDictionary.Encoding); if(enc==StandardFonts.ZAPF || (!PdfFontFactory.isFontEmbedded(pdfObject) && getFontName().equals("Wingdings"))){ putFontEncoding(StandardFonts.ZAPF); glyphs.defaultFont="Zapf Dingbats"; //replace with single default StandardFonts.checkLoaded(StandardFonts.ZAPF); encValue=StandardFonts.ZAPF; }else if(enc==StandardFonts.SYMBOL){ putFontEncoding(StandardFonts.SYMBOL); encValue=StandardFonts.SYMBOL; }else { putFontEncoding(StandardFonts.STD); //default to standard } hasEncoding=false; return encValue; } /////////////////////////////////////////////////////////////////////// /** * handle font encoding and store information */ private void handleFontEncoding(final PdfObject pdfObject, final PdfObject Encoding){ final int subType=pdfObject.getParameterConstant(PdfDictionary.Subtype); int encValue =getFontEncoding( false); if (encValue == -1) { if (subType == StandardFonts.TRUETYPE) { encValue = StandardFonts.MAC; } else { encValue = StandardFonts.STD; } } /* * handle differences from main encoding */ final PdfArrayIterator Diffs=Encoding.getMixedArray(PdfDictionary.Differences); if (Diffs != null && Diffs.getTokenCount()>0) { glyphs.setIsSubsetted(true); //needed to workout if values hex of base10 //as we have examples with both //guess if hex or base10 by looking for numbers //if it has a number it must be base10 byte[][] rawData=null; if(Encoding!=null) { rawData=Encoding.getByteArray(PdfDictionary.Differences); } if(rawData!=null){ containsHexNumbers=true; allNumbers=true; for (final byte[] aRawData : rawData) { if (aRawData != null && aRawData[0] == '/') { final int length = aRawData.length; char c, charCount = 0; if (length == 3 && containsHexNumbers) { for (int jj = 1; jj < 3; jj++) { c = (char) aRawData[jj]; if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')) { charCount++; } } } if (charCount != 2) { containsHexNumbers = false; //System.out.println("Failed on="+new String(data)+"<"); //ii=rawData.length; } if ((allNumbers) && (length < 4)) { for (int jj = 2; jj < length; jj++) { c = (char) aRawData[jj]; if ((c >= '0' && c <= '9')) { } else { allNumbers = false; jj = length; } } } //exit if poss //if(!containsHexNumbers){ // ii=rawData.length; //} /**/ } } } int pointer = 0,type; while (Diffs.hasMoreTokens()) { type=Diffs.getNextValueType(); if(type==PdfArrayIterator.TYPE_KEY_INTEGER){ pointer=Diffs.getNextValueAsInteger(); }else{ if(type==PdfArrayIterator.TYPE_VALUE_INTEGER){ if(diffCharTable==null) { diffCharTable = new int[maxCharCount]; } //save so we can rempa glyph to get correct value for embedded font diffCharTable[pointer]=Diffs.getNextValueAsInteger(false); } String rawKey=Diffs.getNextValueAsString(false); // System.out.println(rawKey+" "+pointer); rawDiffKeys.put(rawKey.substring(1),pointer); putMappedChar( pointer,Diffs.getNextValueAsFontChar(pointer, containsHexNumbers, allNumbers)); pointer++; } } //get flag isHex=Diffs.hasHexChars(); //pick up space final int spaceChar=Diffs.getSpaceChar(); if(spaceChar!=-1) { this.spaceChar=spaceChar; } } int EncodingType=PdfDictionary.Unknown; if(Encoding!=null){ hasEncoding=true; //see if general value first ie /WinAnsiEncoding int newEncodingType=Encoding.getGeneralType(PdfDictionary.Encoding); //check object for value if(newEncodingType==PdfDictionary.Unknown){ if(getBaseFontName().equals("ZapfDingbats")) { newEncodingType=StandardFonts.ZAPF; } else { newEncodingType=Encoding.getParameterConstant(PdfDictionary.BaseEncoding); } } if(newEncodingType!=PdfDictionary.Unknown) { EncodingType=newEncodingType; } else { EncodingType=handleNoEncoding(encValue,pdfObject); } if(spaceChar==-1 && (newEncodingType==StandardFonts.WIN || newEncodingType==StandardFonts.PDF || newEncodingType==StandardFonts.MAC)){ spaceChar=32; } } putFontEncoding(EncodingType); } /** * used by PDF2HTML5 where encoding different from index in TJ command and we need to remap via glyf name * (ie sample_pdf_htmls/thoughtcorp/Simple Relational Contracts.pdf) * @param glypName * @return */ public final int getDiffChar(final String glypName){ int value=-1; final Integer newVal= nonStandardMappings.get(glypName); if(newVal!=null){ value=newVal; } return value; } /** Insert a new mapped char in the name mapping table */ protected final void putMappedChar(final int charInt, final String mappedChar) { if(diffTable==null){ diffTable = new String[maxCharCount]; } if(charInt>255 && maxCharCount==256){ //hack for odd file //System.out.println(charInt+" mappedChar="+mappedChar+"<"); //if(1==1) //throw new RuntimeException("xxx"); }else if(diffTable[charInt]==null && mappedChar!=null && !mappedChar.startsWith("glyph")){ diffTable[charInt]= mappedChar; } } /** * return char mapped onto value in Differences or null * @param charInt * @return */ public String getDiffMapping(final int charInt){ if(diffTable==null) { return null; } else { return diffTable[charInt]; } } /** Returns the char glyph corresponding to the specified code for the specified font. */ public final String getMappedChar(final int charInt, final boolean remap) { String result =null; //check differences if(diffTable!=null) { result =diffTable[charInt]; } if((remap)&&(result!=null)&&(result.equals(".notdef"))) { result=" "; } //check standard encoding if (result == null && charInt<335) { result =StandardFonts.getUnicodeChar(getFontEncoding( true) , charInt); } //all unused win values over 40 map to bullet if(result==null && charInt>40 && getFontEncoding(true)==StandardFonts.WIN ) { if(charInt==173) { result="hyphen"; } else { result="bullet"; } } //check embedded stream as not in baseFont encoding if(isFontEmbedded && result==null){ //diffs from embedded 1C file if(diffs!=null) { result =diffs[charInt]; } //Embedded encoding (which can be different from the encoding!) if (result == null && charInt<335) { result =StandardFonts.getUnicodeChar(this.embeddedEnc , charInt); } } return result; } public final String getEmbeddedChar(final int charInt) { String embeddedResult=null; //check embedded stream as not in baseFont encoding if(isFontEmbedded){ //diffs from embedded 1C file if(diffs!=null) { embeddedResult =diffs[charInt]; } //Embedded encoding (which can be different from the encoding!) if ((embeddedResult == null) && charInt<256) { embeddedResult =StandardFonts.getUnicodeChar(this.embeddedEnc , charInt); } } return embeddedResult; } /** * gets type of font (ie 3 ) so we can call type * specific code. * @return int of type */ public final int getFontType() { return fontTypes; } /** * name of font used to display */ public String getSubstituteFont() { return this.substituteFontFile; } /**used in generic renderer*/ public float getGlyphWidth(final String charGlyph, final int rawInt, final String displayValue) { float currentWidth=0; if(this.fontTypes==StandardFonts.TRUETYPE && isFontEmbedded){ //use embedded CMAP currentWidth=glyphs.getTTWidth(charGlyph,rawInt, displayValue,false); }else if(!isFontEmbedded){ //this should cascade gracefully and always give a sensible number if(rawInt<255) { currentWidth=getWidth(rawInt); } if(currentWidth==0){ //failsafe final Float value=StandardFonts.getStandardWidth("Arial" , charGlyph); currentWidth = value!=null? value : 0.0f; } } return currentWidth; } public PdfJavaGlyphs getGlyphData() { //glyphs.setHasWidths(this.hasWidths()); glyphs.setHasWidths(true); return glyphs; } public void setFont(final String font, final int textSize) { glyphs.setFont(font,textSize); } public boolean is1C() { return glyphs.is1C(); } public boolean isFontSubsetted() { return glyphs.isSubsetted; } public void setValuesForGlyph(final int rawInt, final String charGlyph, final String displayValue, final String embeddedChar) { glyphs.setValuesForGlyph(rawInt, charGlyph, displayValue, embeddedChar); } /** * remove unwanted chars from string name */ private static String cleanupFontName(final String baseFontName) { // baseFontName=baseFontName.toLowerCase(); final int length=baseFontName.length(); final StringBuilder cleanedName=new StringBuilder(length); char c; for(int aa=0;aa"+cleanedName.toString()); return cleanedName.toString(); } /** * get bounding box to highlight * @return */ public Rectangle getBoundingBox() { // if(BBox==null){ //if one of standard fonts, use value from afm file final float[] standardBB=StandardFonts.getFontBounds(getFontName()); if(standardBB==null){ if(!isFontEmbedded) { //use default as we are displaying in Lucida BBox=new Rectangle(0,0,1000,1000); } else { BBox=new Rectangle((int)(FontBBox[0]),(int)(FontBBox[1]),(int)(FontBBox[2]-FontBBox[0]),(int)(FontBBox[3]-FontBBox[1])); } }else { BBox=new Rectangle((int)(standardBB[0]),(int)(standardBB[1]),(int)(standardBB[2]-standardBB[0]),(int)(standardBB[3]-standardBB[1])); } // } return BBox; } public void setRawFontName(final String baseFont) { rawFontName=baseFont; } /** * workout spaces (if any) to add into content for a gap * from user settings, space info in pdf */ public static String getSpaces( float currentGap, final float spaceWidth, final float currentThreshold) { String space = ""; if (spaceWidth > 0) { //avoid silly huge gaps as well in data if ((currentGap > spaceWidth) && (currentGap/spaceWidth<300)&&(currentThreshold<1 || currentGap > spaceWidth*currentThreshold)) { while (currentGap >= spaceWidth) { space = ' ' + space; currentGap -= spaceWidth; } } else if (currentGap > spaceWidth * currentThreshold) { //ensure a gap of at least space_thresh_hold space += ' '; } } return space; } public int getDiffChar(final int index){ if(diffCharTable==null) { return 0; } else { return diffCharTable[index]; } } public float[] getFontBounds() { return this.FontBBox; } public boolean isFontVertical() { return isFontVertical; } /** * @param lastWidth */ public void setLastWidth(final float lastWidth) { this.lastWidth=lastWidth; } public String getFontID() { return fontID; } public boolean isSingleByte() { return isSingleByte; } /** * workaround to handle issue with some odd SAP files * Please do not use. * @return */ public boolean isBrokenFont() { return this.handleOddSapFontMapping; } /** * Used by HTML code to tell whether to use CID range remapping or not * @return Whether a 'ToUnicode' dictionary is provided. */ public boolean hasToUnicode() { return unicodeMappings != null; } /** * returns width or value in widthTable (-1 if no width set) * idx=-1 returns default */ public float getDefaultWidth(final int idx) { if(idx==-1){ return defaultWidth; }else{ if(widthTable==null){ return -1; }else{ return widthTable[idx]; } } } /** * work out if 1 or 2 bytes for each char * @param firstVal * @param secondByte * @param secondByteIsEscaped * @return * * 0=false * 1=true * -1=unset */ public int isDoubleBytes(final int firstVal, final int secondByte, final boolean secondByteIsEscaped) { if(hasDoubleBytes){ return 1; }else if(isFirstScan ){ //don't bother with test if we have figured it out already if((firstVal==secondByte && firstVal>0) || (!isHex && secondByte==41 && !secondByteIsEscaped) || (getMappedChar(firstVal, true)!=null && getMappedChar(secondByte,true)!=null)){ isDouble=0; }else{ isDouble=1; } isFirstScan=false; } return isDouble; } /** * flag to show if double (1), single (0) or unset (-1) * @return */ public int isDoubleBytes() { return isDouble; } public PdfObject getToUnicode() { return ToUnicode; } public int[] getCIDToGIDMap() { return CIDToGIDMap; } public String[] getCMAP() { return CMAP; } public boolean isWidthVertical() { return isWidthVertical; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy