org.apache.fop.afp.fonts.CharacterSetBuilder Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id: CharacterSetBuilder.java 1761020 2016-09-16 11:17:35Z ssteiner $ */
package org.apache.fop.afp.fonts;
import java.awt.Rectangle;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xmlgraphics.image.loader.util.SoftMapCache;
import org.apache.fop.afp.AFPConstants;
import org.apache.fop.afp.AFPEventProducer;
import org.apache.fop.afp.util.AFPResourceAccessor;
import org.apache.fop.afp.util.StructuredFieldReader;
import org.apache.fop.apps.io.InternalResourceResolver;
import org.apache.fop.fonts.Typeface;
/**
* The CharacterSetBuilder is responsible building the a CharacterSet instance that holds
* the font metric data. The data is either read from disk and passed to a CharacterSet (*)
* or a FopCharacterSet is instantiated that is composed of a Typeface instance configured
* with this data.
* -*- For referenced fonts CharacterSetBuilder is responsible for reading the font attributes
* from binary code page files and the character set metric files. In IBM font structure, a
* code page maps each character of text to the characters in a character set.
* Each character is translated into a code point. When the character is
* printed, each code point is matched to a character ID on the code page
* specified. The character ID is then matched to the image (raster pattern or
* outline pattern) of the character in the character set specified. The image
* in the character set is the image that is printed in the document. To be a
* valid code page for a particular character set, all character IDs in the code
* page must be included in that character set.
* This class will read the font information from the binary code page files and character
* set metric files in order to determine the correct metrics to use when rendering the
* formatted object.
*/
public abstract class CharacterSetBuilder {
/**
* Static logging instance
*/
protected static final Log LOG = LogFactory.getLog(CharacterSetBuilder.class);
/**
* Template used to convert lists to arrays.
*/
private static final CharacterSetOrientation[] EMPTY_CSO_ARRAY = new CharacterSetOrientation[0];
/** Codepage MO:DCA structured field. */
private static final byte[] CODEPAGE_SF = new byte[] {
(byte) 0xD3, (byte) 0xA8, (byte) 0x87};
/** Character table MO:DCA structured field. */
private static final byte[] CHARACTER_TABLE_SF = new byte[] {
(byte) 0xD3, (byte) 0x8C, (byte) 0x87};
/** Font descriptor MO:DCA structured field. */
private static final byte[] FONT_DESCRIPTOR_SF = new byte[] {
(byte) 0xD3, (byte) 0xA6, (byte) 0x89 };
/** Font control MO:DCA structured field. */
private static final byte[] FONT_CONTROL_SF = new byte[] {
(byte) 0xD3, (byte) 0xA7, (byte) 0x89 };
/** Font orientation MO:DCA structured field. */
private static final byte[] FONT_ORIENTATION_SF = new byte[] {
(byte) 0xD3, (byte) 0xAE, (byte) 0x89 };
/** Font position MO:DCA structured field. */
private static final byte[] FONT_POSITION_SF = new byte[] {
(byte) 0xD3, (byte) 0xAC, (byte) 0x89 };
/** Font index MO:DCA structured field. */
private static final byte[] FONT_INDEX_SF = new byte[] {
(byte) 0xD3, (byte) 0x8C, (byte) 0x89 };
/**
* The collection of code pages
*/
private final Map> codePagesCache
= Collections.synchronizedMap(new WeakHashMap>());
/**
* Cache of charactersets
*/
private final SoftMapCache characterSetsCache = new SoftMapCache(true);
/** Default constructor. */
private CharacterSetBuilder() {
}
/**
* Factory method for the single-byte implementation of AFPFontReader.
* @return AFPFontReader
*/
public static CharacterSetBuilder getSingleByteInstance() {
return SingleByteLoader.getInstance();
}
/**
* Factory method for the double-byte (CID Keyed font (Type 0)) implementation of AFPFontReader.
* @return AFPFontReader
*/
public static CharacterSetBuilder getDoubleByteInstance() {
return DoubleByteLoader.getInstance();
}
/**
* Returns an InputStream to a given file path and filename
*
* @param accessor the resource accessor
* @param uriStr the URI
* @param eventProducer for handling AFP related events
* @return an inputStream
* @throws IOException in the event that an I/O exception of some sort has occurred
*/
private InputStream openInputStream(AFPResourceAccessor accessor, String uriStr,
AFPEventProducer eventProducer)
throws IOException {
URI uri;
try {
uri = InternalResourceResolver.cleanURI(uriStr.trim());
} catch (URISyntaxException e) {
throw new MalformedURLException("Invalid uri: " + uriStr + " (" + e.getMessage() + ")");
}
if (LOG.isDebugEnabled()) {
LOG.debug("Opening " + uri);
}
return accessor.createInputStream(uri);
}
/**
* Closes the inputstream
*
* @param inputStream the inputstream to close
*/
private void closeInputStream(InputStream inputStream) {
try {
if (inputStream != null) {
inputStream.close();
}
} catch (Exception ex) {
// Lets log at least!
LOG.error(ex.getMessage());
}
}
/**
* Load the font details and metrics into the CharacterSetMetric object, this will use the
* actual afp code page and character set files to load the object with the necessary metrics.
*
* @param characterSetName name of the characterset
* @param codePageName name of the code page file
* @param encoding encoding name
* @param accessor used to load codepage and characterset
* @param eventProducer for handling AFP related events
* @return CharacterSet object
* @throws IOException if an I/O error occurs
*/
public CharacterSet buildSBCS(String characterSetName, String codePageName, String encoding,
AFPResourceAccessor accessor, AFPEventProducer eventProducer) throws IOException {
return processFont(characterSetName, codePageName, encoding, CharacterSetType.SINGLE_BYTE,
accessor, eventProducer);
}
/**
* Load the font details and metrics into the CharacterSetMetric object, this will use the
* actual afp code page and character set files to load the object with the necessary metrics.
* This method is to be used for double byte character sets (DBCS).
*
* @param characterSetName name of the characterset
* @param codePageName name of the code page file
* @param encoding encoding name
* @param charsetType the characterset type
* @param accessor used to load codepage and characterset
* @param eventProducer for handling AFP related events
* @return CharacterSet object
* @throws IOException if an I/O error occurs
*/
public CharacterSet buildDBCS(String characterSetName, String codePageName, String encoding,
CharacterSetType charsetType, AFPResourceAccessor accessor, AFPEventProducer eventProducer)
throws IOException {
return processFont(characterSetName, codePageName, encoding, charsetType, accessor,
eventProducer);
}
/**
* Load the font details and metrics into the CharacterSetMetric object, this will use the
* actual afp code page and character set files to load the object with the necessary metrics.
*
* @param characterSetName the CharacterSetMetric object to populate
* @param codePageName the name of the code page to use
* @param encoding name of the encoding in use
* @param typeface base14 font name
* @param eventProducer for handling AFP related events
* @return CharacterSet object
* @throws IOException if an I/O error occurs
*/
public CharacterSet build(String characterSetName, String codePageName, String encoding,
Typeface typeface, AFPEventProducer eventProducer) throws IOException {
return new FopCharacterSet(codePageName, encoding, characterSetName, typeface,
eventProducer);
}
public CharacterSet build(String characterSetName, String codePageName, String encoding,
Typeface typeface, AFPResourceAccessor accessor, AFPEventProducer eventProducer)
throws IOException {
return new FopCharacterSet(codePageName, encoding, characterSetName, typeface, accessor, eventProducer);
}
private CharacterSet processFont(String characterSetName, String codePageName, String encoding,
CharacterSetType charsetType, AFPResourceAccessor accessor, AFPEventProducer eventProducer)
throws IOException {
// check for cached version of the characterset
URI charSetURI = accessor.resolveURI(characterSetName);
String cacheKey = charSetURI.toASCIIString() + "_" + characterSetName + "_" + codePageName;
CharacterSet characterSet = (CharacterSet) characterSetsCache.get(cacheKey);
if (characterSet != null) {
return characterSet;
}
// characterset not in the cache, so recreating
characterSet = new CharacterSet(codePageName, encoding, charsetType, characterSetName,
accessor, eventProducer);
InputStream inputStream = null;
try {
/**
* Get the code page which contains the character mapping
* information to map the unicode character id to the graphic
* chracter global identifier.
*/
Map codePage;
// TODO: This could have performance implications if several threads want to use the
// codePagesCache to retrieve different codepages.
synchronized (codePagesCache) {
codePage = codePagesCache.get(codePageName);
if (codePage == null) {
codePage = loadCodePage(codePageName, encoding, accessor, eventProducer);
codePagesCache.put(codePageName, codePage);
}
}
inputStream = openInputStream(accessor, characterSetName, eventProducer);
StructuredFieldReader structuredFieldReader = new StructuredFieldReader(inputStream);
// Process D3A689 Font Descriptor
FontDescriptor fontDescriptor = processFontDescriptor(structuredFieldReader);
characterSet.setNominalVerticalSize(fontDescriptor.getNominalFontSizeInMillipoints());
// Process D3A789 Font Control
FontControl fontControl = processFontControl(structuredFieldReader);
if (fontControl != null) {
//process D3AE89 Font Orientation
CharacterSetOrientation[] characterSetOrientations
= processFontOrientation(structuredFieldReader);
double metricNormalizationFactor;
if (fontControl.isRelative()) {
metricNormalizationFactor = 1;
} else {
int dpi = fontControl.getDpi();
metricNormalizationFactor = 1000.0d * 72000.0d
/ fontDescriptor.getNominalFontSizeInMillipoints() / dpi;
}
ValueNormalizer normalizer = new ValueNormalizer(metricNormalizationFactor);
//process D3AC89 Font Position
processFontPosition(structuredFieldReader, characterSetOrientations, normalizer);
//process D38C89 Font Index (per orientation)
for (CharacterSetOrientation characterSetOrientation : characterSetOrientations) {
processFontIndex(structuredFieldReader, characterSetOrientation, codePage, normalizer);
characterSet.addCharacterSetOrientation(characterSetOrientation);
}
} else {
throw new IOException("Missing D3AE89 Font Control structured field.");
}
} finally {
closeInputStream(inputStream);
}
characterSetsCache.put(cacheKey, characterSet);
return characterSet;
}
private static class ValueNormalizer {
private final double factor;
public ValueNormalizer(double factor) {
this.factor = factor;
}
public int normalize(int value) {
return (int) Math.round(value * factor);
}
}
/**
* Load the code page information from the appropriate file. The file name
* to load is determined by the code page name and the file extension 'CDP'.
*
* @param codePage
* the code page identifier
* @param encoding
* the encoding to use for the character decoding
* @param accessor the resource accessor
* @param eventProducer for handling AFP related events
* @return a code page mapping (key: GCGID, value: Unicode character)
* @throws IOException if an I/O exception of some sort has occurred.
*/
protected Map loadCodePage(String codePage, String encoding,
AFPResourceAccessor accessor, AFPEventProducer eventProducer) throws IOException {
// Create the HashMap to store code page information
Map codePages = new HashMap();
InputStream inputStream = null;
try {
inputStream = openInputStream(accessor, codePage.trim(), eventProducer);
} catch (IOException e) {
eventProducer.codePageNotFound(this, e);
throw e;
}
try {
StructuredFieldReader structuredFieldReader = new StructuredFieldReader(inputStream);
byte[] data = structuredFieldReader.getNext(CHARACTER_TABLE_SF);
int position = 0;
byte[] gcgiBytes = new byte[8];
byte[] charBytes = new byte[1];
// Read data, ignoring bytes 0 - 2
for (int index = 3; index < data.length; index++) {
if (position < 8) {
// Build the graphic character global identifier key
gcgiBytes[position] = data[index];
position++;
} else if (position == 9) {
position = 0;
// Set the character
charBytes[0] = data[index];
String gcgiString = new String(gcgiBytes,
AFPConstants.EBCIDIC_ENCODING);
//Use the 8-bit char index to find the Unicode character using the Java encoding
//given in the configuration. If the code page and the Java encoding don't
//match, a wrong Unicode character will be associated with the AFP GCGID.
//Idea: we could use IBM's GCGID to Unicode map and build code pages ourselves.
String charString = new String(charBytes, encoding);
codePages.put(gcgiString, charString);
} else {
position++;
}
}
} finally {
closeInputStream(inputStream);
}
return codePages;
}
/**
* Process the font descriptor details using the structured field reader.
*
* @param structuredFieldReader the structured field reader
* @return a class representing the font descriptor
* @throws IOException if an I/O exception of some sort has occurred.
*/
private static FontDescriptor processFontDescriptor(
StructuredFieldReader structuredFieldReader) throws IOException {
byte[] fndData = structuredFieldReader.getNext(FONT_DESCRIPTOR_SF);
return new FontDescriptor(fndData);
}
/**
* Process the font control details using the structured field reader.
*
* @param structuredFieldReader
* the structured field reader
* @return the FontControl
* @throws IOException if an I/O exception of some sort has occurred.
*/
private FontControl processFontControl(StructuredFieldReader structuredFieldReader)
throws IOException {
byte[] fncData = structuredFieldReader.getNext(FONT_CONTROL_SF);
FontControl fontControl = null;
if (fncData != null) {
fontControl = new FontControl();
if (fncData[7] == (byte) 0x02) {
fontControl.setRelative(true);
}
int metricResolution = getUBIN(fncData, 9);
if (metricResolution == 1000) {
//Special case: 1000 units per em (rather than dpi)
fontControl.setUnitsPerEm(1000);
} else {
fontControl.setDpi(metricResolution / 10);
}
}
return fontControl;
}
/**
* Process the font orientation details from using the structured field
* reader.
*
* @param structuredFieldReader
* the structured field reader
* @return CharacterSetOrientation array
* @throws IOException if an I/O exception of some sort has occurred.
*/
private CharacterSetOrientation[] processFontOrientation(
StructuredFieldReader structuredFieldReader) throws IOException {
byte[] data = structuredFieldReader.getNext(FONT_ORIENTATION_SF);
int position = 0;
byte[] fnoData = new byte[26];
List orientations = new ArrayList();
// Read data, ignoring bytes 0 - 2
for (int index = 3; index < data.length; index++) {
// Build the font orientation record
fnoData[position] = data[index];
position++;
if (position == 26) {
position = 0;
int orientation = determineOrientation(fnoData[2]);
int spaceIncrement = getUBIN(fnoData, 8);
int emIncrement = getUBIN(fnoData, 14);
int nominalCharacterIncrement = getUBIN(fnoData, 20);
orientations.add(new CharacterSetOrientation(orientation, spaceIncrement,
emIncrement, nominalCharacterIncrement));
}
}
return orientations.toArray(EMPTY_CSO_ARRAY);
}
/**
* Populate the CharacterSetOrientation object in the suplied array with the
* font position details using the supplied structured field reader.
*
* @param structuredFieldReader
* the structured field reader
* @param characterSetOrientations
* the array of CharacterSetOrientation objects
* @param metricNormalizationFactor factor to apply to the metrics to get normalized
* font metric values
* @throws IOException if an I/O exception of some sort has occurred.
*/
private void processFontPosition(StructuredFieldReader structuredFieldReader,
CharacterSetOrientation[] characterSetOrientations, ValueNormalizer normalizer)
throws IOException {
byte[] data = structuredFieldReader.getNext(FONT_POSITION_SF);
int position = 0;
byte[] fpData = new byte[26];
int characterSetOrientationIndex = 0;
// Read data, ignoring bytes 0 - 2
for (int index = 3; index < data.length; index++) {
if (position < 22) {
// Build the font orientation record
fpData[position] = data[index];
if (position == 9) {
CharacterSetOrientation characterSetOrientation
= characterSetOrientations[characterSetOrientationIndex];
int xHeight = getSBIN(fpData, 2);
int capHeight = getSBIN(fpData, 4);
int ascHeight = getSBIN(fpData, 6);
int dscHeight = getSBIN(fpData, 8);
dscHeight = dscHeight * -1;
int underscoreWidth = getUBIN(fpData, 17);
int underscorePosition = getSBIN(fpData, 20);
characterSetOrientation.setXHeight(normalizer.normalize(xHeight));
characterSetOrientation.setCapHeight(normalizer.normalize(capHeight));
characterSetOrientation.setAscender(normalizer.normalize(ascHeight));
characterSetOrientation.setDescender(normalizer.normalize(dscHeight));
characterSetOrientation.setUnderscoreWidth(normalizer.normalize(underscoreWidth));
characterSetOrientation.setUnderscorePosition(normalizer.normalize(underscorePosition));
}
} else if (position == 22) {
position = 0;
characterSetOrientationIndex++;
fpData[position] = data[index];
}
position++;
}
}
private void processFontIndex(StructuredFieldReader structuredFieldReader, CharacterSetOrientation cso,
Map codepage, ValueNormalizer normalizer)
throws IOException {
byte[] data = structuredFieldReader.getNext(FONT_INDEX_SF);
int position = 0;
byte[] gcgid = new byte[8];
byte[] fiData = new byte[20];
String firstABCMismatch = null;
// Read data, ignoring bytes 0 - 2
for (int index = 3; index < data.length; index++) {
if (position < 8) {
gcgid[position] = data[index];
position++;
} else if (position < 27) {
fiData[position - 8] = data[index];
position++;
} else if (position == 27) {
fiData[position - 8] = data[index];
position = 0;
String gcgiString = new String(gcgid, AFPConstants.EBCIDIC_ENCODING);
String idx = codepage.get(gcgiString);
if (idx != null) {
char cidx = idx.charAt(0);
int width = getUBIN(fiData, 0);
int ascendHt = getSBIN(fiData, 2);
int descendDp = getSBIN(fiData, 4);
int a = getSBIN(fiData, 10);
int b = getUBIN(fiData, 12);
int c = getSBIN(fiData, 14);
int abc = a + b + c;
int diff = Math.abs(abc - width);
if (diff != 0 && width != 0) {
double diffPercent = 100 * diff / (double) width;
if (diffPercent > 2) {
if (LOG.isTraceEnabled()) {
LOG.trace(gcgiString + ": "
+ a + " + " + b + " + " + c + " = " + (a + b + c)
+ " but found: " + width);
}
if (firstABCMismatch == null) {
firstABCMismatch = gcgiString;
}
}
}
int normalizedWidth = normalizer.normalize(width);
int x0 = normalizer.normalize(a);
int y0 = normalizer.normalize(-descendDp);
int dx = normalizer.normalize(b);
int dy = normalizer.normalize(ascendHt + descendDp);
cso.setCharacterMetrics(cidx, normalizedWidth, new Rectangle(x0, y0, dx, dy));
}
}
}
if (LOG.isDebugEnabled() && firstABCMismatch != null) {
//Debug level because it usually is no problem.
LOG.debug("Font has metrics inconsitencies where A+B+C doesn't equal the"
+ " character increment. The first such character found: "
+ firstABCMismatch);
}
}
private static int getUBIN(byte[] data, int start) {
return ((data[start] & 0xFF) << 8) + (data[start + 1] & 0xFF);
}
private static int getSBIN(byte[] data, int start) {
int ubin = ((data[start] & 0xFF) << 8) + (data[start + 1] & 0xFF);
if ((ubin & 0x8000) != 0) {
//extend sign
return ubin | 0xFFFF0000;
} else {
return ubin;
}
}
private class FontControl {
private int dpi;
private int unitsPerEm;
private boolean isRelative;
public int getDpi() {
return dpi;
}
public void setDpi(int i) {
dpi = i;
}
public int getUnitsPerEm() {
return this.unitsPerEm;
}
public void setUnitsPerEm(int value) {
this.unitsPerEm = value;
}
public boolean isRelative() {
return isRelative;
}
public void setRelative(boolean b) {
isRelative = b;
}
}
private static class FontDescriptor {
private byte[] data;
public FontDescriptor(byte[] data) {
this.data = data;
}
public int getNominalFontSizeInMillipoints() {
int nominalFontSize = 100 * getUBIN(data, 39);
return nominalFontSize;
}
}
private static final class SingleByteLoader extends CharacterSetBuilder {
private static final SingleByteLoader INSTANCE = new SingleByteLoader();
private SingleByteLoader() {
super();
}
private static SingleByteLoader getInstance() {
return INSTANCE;
}
}
/**
* Double-byte (CID Keyed font (Type 0)) implementation of AFPFontReader.
*/
private static final class DoubleByteLoader extends CharacterSetBuilder {
private static final DoubleByteLoader INSTANCE = new DoubleByteLoader();
private DoubleByteLoader() {
}
static DoubleByteLoader getInstance() {
return INSTANCE;
}
@Override
protected Map loadCodePage(String codePage, String encoding,
AFPResourceAccessor accessor, AFPEventProducer eventProducer) throws IOException {
// Create the HashMap to store code page information
Map codePages = new HashMap();
InputStream inputStream = null;
try {
inputStream = super.openInputStream(accessor, codePage.trim(), eventProducer);
} catch (IOException e) {
eventProducer.codePageNotFound(this, e);
throw e;
}
try {
StructuredFieldReader structuredFieldReader = new StructuredFieldReader(inputStream);
byte[] data;
while ((data = structuredFieldReader.getNext(CHARACTER_TABLE_SF)) != null) {
int position = 0;
byte[] gcgiBytes = new byte[8];
byte[] charBytes = new byte[2];
// Read data, ignoring bytes 0 - 2
for (int index = 3; index < data.length; index++) {
if (position < 8) {
// Build the graphic character global identifier key
gcgiBytes[position] = data[index];
position++;
} else if (position == 9) {
// Set the character
charBytes[0] = data[index];
position++;
} else if (position == 10) {
position = 0;
// Set the character
charBytes[1] = data[index];
String gcgiString = new String(gcgiBytes,
AFPConstants.EBCIDIC_ENCODING);
String charString = new String(charBytes, encoding);
codePages.put(gcgiString, charString);
} else {
position++;
}
}
}
} finally {
super.closeInputStream(inputStream);
}
return codePages;
}
}
private static int determineOrientation(byte orientation) {
int degrees = 0;
switch (orientation) {
case 0x00:
degrees = 0;
break;
case 0x2D:
degrees = 90;
break;
case 0x5A:
degrees = 180;
break;
case (byte) 0x87:
degrees = 270;
break;
default:
throw new IllegalStateException("Invalid orientation: " + orientation);
}
return degrees;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy