All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pdfbox.cos.COSString Maven / Gradle / Ivy

Go to download

The Apache PDFBox library is an open source Java tool for working with PDF documents.

There is a newer version: 3.0.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.cos;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.pdfbox.encoding.PDFDocEncodingCharset;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.persistence.util.COSHEXTable;

/**
 * This represents a string object in a PDF document.
 *
 * @author Ben Litchfield
 * @version $Revision: 1.30 $
 */
public class COSString extends COSBase
{
    /**
     * Log instance.
     */
    private static final Log LOG = LogFactory.getLog(COSString.class);

    /**
     * One of the open string tokens.
     */
    public static final byte[] STRING_OPEN = new byte[] { 40 }; // "(".getBytes();
    /**
     * One of the close string tokens.
     */
    public static final byte[] STRING_CLOSE = new byte[] { 41 }; // ")".getBytes( "ISO-8859-1" );
    /**
     * One of the open string tokens.
     */
    public static final byte[] HEX_STRING_OPEN = new byte[] { 60 }; // "<".getBytes( "ISO-8859-1" );
    /**
     * One of the close string tokens.
     */
    public static final byte[] HEX_STRING_CLOSE = new byte[] { 62 }; // ">".getBytes( "ISO-8859-1" );
    /**
     * the escape character in strings.
     */
    public static final byte[] ESCAPE = new byte[] { 92 }; // "\\".getBytes( "ISO-8859-1" );

    /**
     * CR escape characters.
     */
    public static final byte[] CR_ESCAPE = new byte[] { 92, 114 }; // "\\r".getBytes( "ISO-8859-1" );
    /**
     * LF escape characters.
     */
    public static final byte[] LF_ESCAPE = new byte[] { 92, 110 }; // "\\n".getBytes( "ISO-8859-1" );
    /**
     * HT escape characters.
     */
    public static final byte[] HT_ESCAPE = new byte[] { 92, 116 }; // "\\t".getBytes( "ISO-8859-1" );
    /**
     * BS escape characters.
     */
    public static final byte[] BS_ESCAPE = new byte[] { 92, 98 }; // "\\b".getBytes( "ISO-8859-1" );
    /**
     * FF escape characters.
     */
    public static final byte[] FF_ESCAPE = new byte[] { 92, 102 }; // "\\f".getBytes( "ISO-8859-1" );

    private ByteArrayOutputStream out = null;
    private String str = null;

    /**
     * Forces the string to be serialized in hex form but not literal form, the default is to stream in literal form.
     */
    private boolean forceHexForm = false;

    /**
     * Constructor.
     */
    public COSString()
    {
        out = new ByteArrayOutputStream();
    }

    /**
     * Constructor.
     *
     * @param isDictionaryValue determines if this string represents a dictionary
     * @deprecated Not needed anymore. Use {@link #COSString()} instead. PDFBOX-1437
     */
    @Deprecated
    public COSString(boolean isDictionaryValue)
    {
        this();
    }

    /**
     * Explicit constructor for ease of manual PDF construction.
     *
     * @param value
     *            The string value of the object.
     */
    public COSString(String value)
    {
        try
        {
            boolean unicode16 = false;
            char[] chars = value.toCharArray();
            int length = chars.length;
            for (int i = 0; i < length; i++)
            {
                if (chars[i] > 255)
                {
                    unicode16 = true;
                    break;
                }
            }
            if (unicode16)
            {
                byte[] data = value.getBytes("UTF-16BE");
                out = new ByteArrayOutputStream(data.length + 2);
                out.write(0xFE);
                out.write(0xFF);
                out.write(data);
            }
            else
            {
                byte[] data = value.getBytes("ISO-8859-1");
                out = new ByteArrayOutputStream(data.length);
                out.write(data);
            }
        }
        catch (IOException ignore)
        {
            LOG.error(ignore,ignore);
            // should never happen
        }
    }

    /**
     * Explicit constructor for ease of manual PDF construction.
     *
     * @param value
     *            The string value of the object.
     */
    public COSString(byte[] value)
    {
        try
        {
            out = new ByteArrayOutputStream(value.length);
            out.write(value);
        }
        catch (IOException ignore)
        {
            LOG.error(ignore,ignore);
            // should never happen
        }
    }

    /**
     * Forces the string to be written in literal form instead of hexadecimal form.
     *
     * @param v
     *            if v is true the string will be written in literal form, otherwise it will be written in hexa if
     *            necessary.
     */

    public void setForceLiteralForm(boolean v)
    {
        forceHexForm = !v;
    }

    /**
     * Forces the string to be written in hexadecimal form instead of literal form.
     *
     * @param v
     *            if v is true the string will be written in hexadecimal form otherwise it will be written in literal if
     *            necessary.
     */

    public void setForceHexForm(boolean v)
    {
        forceHexForm = v;
    }

    /**
     * This will create a COS string from a string of hex characters.
     *
     * @param hex
     *            A hex string.
     * @return A cos string with the hex characters converted to their actual bytes.
     * @throws IOException
     *             If there is an error with the hex string.
     */
    public static COSString createFromHexString(String hex) throws IOException
    {
        return createFromHexString(hex, false);
    }

    /**
     * Creates a COS string from a string of hex characters, optionally ignoring malformed input.
     *
     * @param hex
     *            A hex string.
     * @param force
     *            flag to ignore malformed input
     * @return A cos string with the hex characters converted to their actual bytes.
     * @throws IOException
     *             If there is an error with the hex string.
     */
    public static COSString createFromHexString(String hex, boolean force) throws IOException
    {
        COSString retval = new COSString();
        StringBuilder hexBuffer = new StringBuilder(hex.trim());
        // if odd number then the last hex digit is assumed to be 0
        if (hexBuffer.length() % 2 != 0)
        {
            hexBuffer.append('0');
        }
        int length = hexBuffer.length();
        for (int i = 0; i < length; i += 2)
        {
            try
            {
                retval.append(Integer.parseInt(hexBuffer.substring(i, i + 2), 16));
            }
            catch (NumberFormatException e)
            {
                if (force)
                {
                    retval.append('?');
                }
                else
                {
                    IOException exception = new IOException("Invalid hex string: " + hex);
                    exception.initCause(e);
                    throw exception;
                }
            }
        }
        return retval;
    }

    /**
     * This will take this string and create a hex representation of the bytes that make the string.
     *
     * @return A hex string representing the bytes in this string.
     */
    public String getHexString()
    {
        StringBuilder retval = new StringBuilder(out.size() * 2);
        byte[] data = getBytes();
        int length = data.length;
        for (int i = 0; i < length; i++)
        {
            retval.append(COSHEXTable.HEX_TABLE[(data[i] + 256) % 256]);
        }

        return retval.toString();
    }

    /**
     * This will get the string that this object wraps.
     *
     * @return The wrapped string.
     */
    public String getString()
    {
        if (str != null)
        {
            return str;
        }
        String retval;
        Charset charset = PDFDocEncodingCharset.INSTANCE;
        byte[] data = getBytes();
        int start = 0;
        if (data.length > 2)
        {
            if (data[0] == (byte) 0xFF && data[1] == (byte) 0xFE)
            {
                charset = Charset.forName("UTF-16LE");
                start = 2;
            }
            else if (data[0] == (byte) 0xFE && data[1] == (byte) 0xFF)
            {
                charset = Charset.forName("UTF-16BE");
                start = 2;
            }
        }

        retval = toString(data, start, data.length - start, charset);
        str = retval;
        return retval;
    }

    private static String toString(byte[] data, int offset, int length, Charset charset)
    {
        //This is only needed until PDFBox switches to JavaSE-1.6
        //This could be just: return new String(data, offset, length, charset);
        CharBuffer charBuffer = charset.decode(ByteBuffer.wrap(data, offset, length));
        return charBuffer.toString();
    }

    /**
     * This will append a byte[] to the string.
     *
     * @param data
     *            The byte[] to add to this string.
     *
     * @throws IOException
     *             If an IO error occurs while writing the byte.
     */
    public void append(byte[] data) throws IOException
    {
        out.write(data);
        str = null;
    }

    /**
     * This will append a byte to the string.
     *
     * @param in
     *            The byte to add to this string.
     *
     * @throws IOException
     *             If an IO error occurs while writing the byte.
     */
    public void append(int in) throws IOException
    {
        out.write(in);
        str = null;
    }

    /**
     * This will reset the internal buffer.
     */
    public void reset()
    {
        out.reset();
        str = null;
    }

    /**
     * This will get the bytes of the string.
     *
     * @return A byte array that represents the string.
     */
    public byte[] getBytes()
    {
        return out.toByteArray();
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public String toString()
    {
        return "COSString{" + this.getString() + "}";
    }

    /**
     * This will output this string as a PDF object.
     *
     * @param output
     *            The stream to write to.
     * @throws IOException
     *             If there is an error writing to the stream.
     */
    public void writePDF(OutputStream output) throws IOException
    {
        boolean outsideASCII = false;
        // Lets first check if we need to escape this string.
        byte[] bytes = getBytes();
        int length = bytes.length;
        for (int i = 0; i < length && !outsideASCII; i++)
        {
            // if the byte is negative then it is an eight bit byte and is
            // outside the ASCII range.
            outsideASCII = bytes[i] < 0;
        }
        if (!outsideASCII && !forceHexForm)
        {
            output.write(STRING_OPEN);
            for (int i = 0; i < length; i++)
            {
                int b = (bytes[i] + 256) % 256;
                switch (b)
                {
                case '(':
                case ')':
                case '\\':
                {
                    output.write(ESCAPE);
                    output.write((byte) b);
                    break;
                }
                case 10: // LF
                {
                    output.write(LF_ESCAPE);
                    break;
                }
                case 13: // CR
                {
                    output.write(CR_ESCAPE);
                    break;
                }
                case '\t':
                {
                    output.write(HT_ESCAPE);
                    break;
                }
                case '\b':
                {
                    output.write(BS_ESCAPE);
                    break;
                }
                case '\f':
                {
                    output.write(FF_ESCAPE);
                    break;
                }
                default:
                {
                    output.write((byte) b);
                }
                }
            }
            output.write(STRING_CLOSE);
        }
        else
        {
            output.write(HEX_STRING_OPEN);
            for (int i = 0; i < length; i++)
            {
                output.write(COSHEXTable.TABLE[(bytes[i] + 256) % 256]);
            }
            output.write(HEX_STRING_CLOSE);
        }
    }

    /**
     * visitor pattern double dispatch method.
     *
     * @param visitor
     *            The object to notify when visiting this object.
     * @return any object, depending on the visitor implementation, or null
     * @throws COSVisitorException
     *             If an error occurs while visiting this object.
     */
    @Override
    public Object accept(ICOSVisitor visitor) throws COSVisitorException
    {
        return visitor.visitFromString(this);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public boolean equals(Object obj)
    {
        if (obj instanceof COSString)
        {
            COSString strObj = (COSString) obj;
            return this.getString().equals(strObj.getString()) && forceHexForm == strObj.forceHexForm;
        }
        return false;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public int hashCode()
    {
        int result = getString().hashCode();
        return result += forceHexForm ? 17 : 0;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy