All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.fop.pdf.PDFText Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* $Id: PDFText.java 1827168 2018-03-19 08:49:57Z ssteiner $ */

package org.apache.fop.pdf;

import java.io.ByteArrayOutputStream;

import java.util.Locale;

import org.apache.fop.util.CharUtilities;

/**
 * This class represents a simple number object. It also contains contains some
 * utility methods for outputting numbers to PDF.
 */
public class PDFText extends PDFObject {

    private static final char[] DIGITS
                               = {'0', '1', '2', '3', '4', '5', '6', '7',
                                  '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};

    private String text;

    /**
     * Returns the text.
     * @return the text
     */
    public String getText() {
        return this.text;
    }

    /**
     * Sets the text.
     * @param text the text
     */
    public void setText(String text) {
        this.text = text;
    }

    /**
     * {@inheritDoc}
     */
    protected String toPDFString() {
        if (getText() == null) {
            throw new IllegalArgumentException(
                "The text of this PDFText must not be empty");
        }
        StringBuffer sb = new StringBuffer(64);
        sb.append("(");
        sb.append(escapeText(getText()));
        sb.append(")");
        return sb.toString();
    }

    /**
     * Escape text (see 4.4.1 in PDF 1.3 specs)
     * @param text the text to encode
     * @return encoded text
     */
    public static final String escapeText(final String text) {
        return escapeText(text, false);
    }
    /**
     * Escape text (see 4.4.1 in PDF 1.3 specs)
     * @param text the text to encode
     * @param forceHexMode true if the output should follow the hex encoding rules
     * @return encoded text
     */
    public static final String escapeText(final String text, boolean forceHexMode) {
        if (text != null && text.length() > 0) {
            boolean unicode = false;
            boolean hexMode = false;
            if (forceHexMode) {
                hexMode = true;
            } else {
                for (int i = 0, c = text.length(); i < c; i++) {
                    if (text.charAt(i) >= 128) {
                        unicode = true;
                        hexMode = true;
                        break;
                    }
                }
            }

            if (hexMode) {
                final byte[] uniBytes;
                try {
                    uniBytes = text.getBytes("UTF-16");
                } catch (java.io.UnsupportedEncodingException uee) {
                    throw new RuntimeException("Incompatible VM", uee);
                }
                return toHex(uniBytes);
            } else {
                final StringBuffer result = new StringBuffer(text.length() * 2);
                result.append("(");
                final int l = text.length();

                if (unicode) {
                    // byte order marker (0xfeff)
                    result.append("\\376\\377");

                    for (int i = 0; i < l; i++) {
                        final char ch = text.charAt(i);
                        final int high = (ch & 0xff00) >>> 8;
                        final int low = ch & 0xff;
                        result.append("\\");
                        result.append(Integer.toOctalString(high));
                        result.append("\\");
                        result.append(Integer.toOctalString(low));
                    }
                } else {
                    for (int i = 0; i < l; i++) {
                        final char ch = text.charAt(i);
                        if (ch < 256) {
                            escapeStringChar(ch, result);
                        } else {
                            throw new IllegalStateException(
                            "Can only treat text in 8-bit ASCII/PDFEncoding");
                        }
                    }
                }
                result.append(")");
                return result.toString();
            }
        }
        return "()";
    }

    /**
     * Converts a byte array to a Hexadecimal String (3.2.3 in PDF 1.4 specs)
     * @param data the data to encode
     * @param brackets true if enclosing brackets should be included
     * @return String the resulting string
     */
    public static final String toHex(byte[] data, boolean brackets) {
        final StringBuffer sb = new StringBuffer(data.length * 2);
        if (brackets) {
            sb.append("<");
        }
        for (byte aData : data) {
            sb.append(DIGITS[(aData >>> 4) & 0x0F]);
            sb.append(DIGITS[aData & 0x0F]);
        }
        if (brackets) {
            sb.append(">");
        }
        return sb.toString();
    }

    /**
     * Converts a byte array to a Hexadecimal String (3.2.3 in PDF 1.4 specs)
     * @param data the data to encode
     * @return String the resulting string
     */
    public static final String toHex(byte[] data) {
        return toHex(data, true);
    }

    /**
     * Converts a String to UTF-16 (big endian).
     * @param text text to convert
     * @return byte[] UTF-16 stream
     */
    public static final byte[] toUTF16(String text) {
        try {
            return text.getBytes("UnicodeBig");
        } catch (java.io.UnsupportedEncodingException uee) {
            throw new RuntimeException("Incompatible VM", uee);
        }
    }

    /**
     * Convert a char to a multibyte hex representation
     * @param c character to encode
     * @return the encoded character
     */
    public static final String toUnicodeHex(char c) {
        final StringBuffer buf = new StringBuffer(4);
        final byte[] uniBytes;
        try {
            final char[] a = {c};
            uniBytes = new String(a).getBytes("UTF-16BE");
        } catch (java.io.UnsupportedEncodingException uee) {
            throw new RuntimeException("Incompatible VM", uee);
        }

        for (byte uniByte : uniBytes) {
            buf.append(DIGITS[(uniByte >>> 4) & 0x0F]);
            buf.append(DIGITS[uniByte & 0x0F]);
        }
        return buf.toString();
    }

    /**
     * Convert a char to a multibyte hex representation appending to string buffer.
     * The created string will be:
     * 
    *
  • 4-character string in case of non-BMP character
  • *
  • 6-character string in case of BMP character
  • *
* @param c character to encode * @param sb the string buffer to append output */ public static final void toUnicodeHex(int c, StringBuffer sb) { if (CharUtilities.isBmpCodePoint(c)) { sb.append(Integer.toHexString(c + 0x10000).substring(1).toUpperCase(Locale.US)); } else { sb.append(Integer.toHexString(c + 0x1000000).substring(1).toUpperCase(Locale.US)); } } /** * Escaped a String as described in section 4.4 in the PDF 1.3 specs. * @param s String to escape * @return String the escaped String */ public static final String escapeString(final String s) { if (s == null || s.length() == 0) { return "()"; } else { final StringBuffer sb = new StringBuffer(64); sb.append("("); for (int i = 0; i < s.length(); i++) { final char c = s.charAt(i); escapeStringChar(c, sb); } sb.append(")"); return sb.toString(); } } /** * Escapes a character conforming to the rules established in the PostScript * Language Reference (Search for "Literal Text Strings"). * @param c character to escape * @param target target StringBuffer to write the escaped character to */ public static final void escapeStringChar(final char c, final StringBuffer target) { if (c > 127) { target.append("\\"); target.append(Integer.toOctalString(c)); } else { switch (c) { case '\n': target.append("\\n"); break; case '\r': target.append("\\r"); break; case '\t': target.append("\\t"); break; case '\b': target.append("\\b"); break; case '\f': target.append("\\f"); break; case '\\': target.append("\\\\"); break; case '(': target.append("\\("); break; case ')': target.append("\\)"); break; default: target.append(c); } } } /** * Escape a byte array for output to PDF (Used for encrypted strings) * @param data data to encode * @return byte[] encoded data */ public static final byte[] escapeByteArray(byte[] data) { ByteArrayOutputStream bout = new ByteArrayOutputStream(data.length); bout.write((int)'('); for (final byte b : data) { switch (b) { case '\n': bout.write('\\'); bout.write('n'); break; case '\r': bout.write('\\'); bout.write('r'); break; case '\t': bout.write('\\'); bout.write('t'); break; case '\b': bout.write('\\'); bout.write('b'); break; case '\f': bout.write('\\'); bout.write('f'); break; case '\\': bout.write('\\'); bout.write('\\'); break; case '(': bout.write('\\'); bout.write('('); break; case ')': bout.write('\\'); bout.write(')'); break; default: bout.write(b); } } bout.write((int)')'); return bout.toByteArray(); } /** * Converts a text to PDF's "string" data type. Unsupported characters get converted to '?' * characters (similar to what the Java "US-ASCII" encoding does). * @see #toPDFString(CharSequence, char) * @param text the text to convert * @return the converted string */ public static String toPDFString(CharSequence text) { return toPDFString(text, '?'); } /** * Converts a text to PDF's "string" data type. Unsupported characters get converted to the * given replacement character. *

* The PDF library currently doesn't properly distinguish between the PDF * data types "string" and "text string", so we currently restrict "string" to US-ASCII, also * because "string" seems somewhat under-specified concerning the upper 128 bytes. * @param text the text to convert * @param replacement the replacement character used when substituting a character * @return the converted string */ public static String toPDFString(CharSequence text, char replacement) { StringBuffer sb = new StringBuffer(); for (int i = 0, c = text.length(); i < c; i++) { char ch = text.charAt(i); if (ch > 127) { //TODO Revisit the restriction to US-ASCII once "string" and "text string" are //"disentangled". sb.append(replacement); } else { sb.append(ch); } } return sb.toString(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy