org.apache.pdfbox.cos.COSString Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pdfbox Show documentation
Show all versions of pdfbox Show documentation
The Apache PDFBox library is an open source Java tool for working with PDF documents.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.cos;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.encoding.PDFDocEncodingCharset;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.persistence.util.COSHEXTable;
/**
* This represents a string object in a PDF document.
*
* @author Ben Litchfield
* @version $Revision: 1.30 $
*/
public class COSString extends COSBase
{
/**
* Log instance.
*/
private static final Log LOG = LogFactory.getLog(COSString.class);
/**
* One of the open string tokens.
*/
public static final byte[] STRING_OPEN = new byte[] { 40 }; // "(".getBytes();
/**
* One of the close string tokens.
*/
public static final byte[] STRING_CLOSE = new byte[] { 41 }; // ")".getBytes( "ISO-8859-1" );
/**
* One of the open string tokens.
*/
public static final byte[] HEX_STRING_OPEN = new byte[] { 60 }; // "<".getBytes( "ISO-8859-1" );
/**
* One of the close string tokens.
*/
public static final byte[] HEX_STRING_CLOSE = new byte[] { 62 }; // ">".getBytes( "ISO-8859-1" );
/**
* the escape character in strings.
*/
public static final byte[] ESCAPE = new byte[] { 92 }; // "\\".getBytes( "ISO-8859-1" );
/**
* CR escape characters.
*/
public static final byte[] CR_ESCAPE = new byte[] { 92, 114 }; // "\\r".getBytes( "ISO-8859-1" );
/**
* LF escape characters.
*/
public static final byte[] LF_ESCAPE = new byte[] { 92, 110 }; // "\\n".getBytes( "ISO-8859-1" );
/**
* HT escape characters.
*/
public static final byte[] HT_ESCAPE = new byte[] { 92, 116 }; // "\\t".getBytes( "ISO-8859-1" );
/**
* BS escape characters.
*/
public static final byte[] BS_ESCAPE = new byte[] { 92, 98 }; // "\\b".getBytes( "ISO-8859-1" );
/**
* FF escape characters.
*/
public static final byte[] FF_ESCAPE = new byte[] { 92, 102 }; // "\\f".getBytes( "ISO-8859-1" );
private ByteArrayOutputStream out = null;
private String str = null;
/**
* Forces the string to be serialized in hex form but not literal form, the default is to stream in literal form.
*/
private boolean forceHexForm = false;
/**
* Constructor.
*/
public COSString()
{
out = new ByteArrayOutputStream();
}
/**
* Constructor.
*
* @param isDictionaryValue determines if this string represents a dictionary
* @deprecated Not needed anymore. Use {@link #COSString()} instead. PDFBOX-1437
*/
@Deprecated
public COSString(boolean isDictionaryValue)
{
this();
}
/**
* Explicit constructor for ease of manual PDF construction.
*
* @param value
* The string value of the object.
*/
public COSString(String value)
{
try
{
boolean unicode16 = false;
char[] chars = value.toCharArray();
int length = chars.length;
for (int i = 0; i < length; i++)
{
if (chars[i] > 255)
{
unicode16 = true;
break;
}
}
if (unicode16)
{
byte[] data = value.getBytes("UTF-16BE");
out = new ByteArrayOutputStream(data.length + 2);
out.write(0xFE);
out.write(0xFF);
out.write(data);
}
else
{
byte[] data = value.getBytes("ISO-8859-1");
out = new ByteArrayOutputStream(data.length);
out.write(data);
}
}
catch (IOException ignore)
{
LOG.error(ignore,ignore);
// should never happen
}
}
/**
* Explicit constructor for ease of manual PDF construction.
*
* @param value
* The string value of the object.
*/
public COSString(byte[] value)
{
try
{
out = new ByteArrayOutputStream(value.length);
out.write(value);
}
catch (IOException ignore)
{
LOG.error(ignore,ignore);
// should never happen
}
}
/**
* Forces the string to be written in literal form instead of hexadecimal form.
*
* @param v
* if v is true the string will be written in literal form, otherwise it will be written in hexa if
* necessary.
*/
public void setForceLiteralForm(boolean v)
{
forceHexForm = !v;
}
/**
* Forces the string to be written in hexadecimal form instead of literal form.
*
* @param v
* if v is true the string will be written in hexadecimal form otherwise it will be written in literal if
* necessary.
*/
public void setForceHexForm(boolean v)
{
forceHexForm = v;
}
/**
* This will create a COS string from a string of hex characters.
*
* @param hex
* A hex string.
* @return A cos string with the hex characters converted to their actual bytes.
* @throws IOException
* If there is an error with the hex string.
*/
public static COSString createFromHexString(String hex) throws IOException
{
return createFromHexString(hex, false);
}
/**
* Creates a COS string from a string of hex characters, optionally ignoring malformed input.
*
* @param hex
* A hex string.
* @param force
* flag to ignore malformed input
* @return A cos string with the hex characters converted to their actual bytes.
* @throws IOException
* If there is an error with the hex string.
*/
public static COSString createFromHexString(String hex, boolean force) throws IOException
{
COSString retval = new COSString();
StringBuilder hexBuffer = new StringBuilder(hex.trim());
// if odd number then the last hex digit is assumed to be 0
if (hexBuffer.length() % 2 != 0)
{
hexBuffer.append('0');
}
int length = hexBuffer.length();
for (int i = 0; i < length; i += 2)
{
try
{
retval.append(Integer.parseInt(hexBuffer.substring(i, i + 2), 16));
}
catch (NumberFormatException e)
{
if (force)
{
retval.append('?');
}
else
{
IOException exception = new IOException("Invalid hex string: " + hex);
exception.initCause(e);
throw exception;
}
}
}
return retval;
}
/**
* This will take this string and create a hex representation of the bytes that make the string.
*
* @return A hex string representing the bytes in this string.
*/
public String getHexString()
{
StringBuilder retval = new StringBuilder(out.size() * 2);
byte[] data = getBytes();
int length = data.length;
for (int i = 0; i < length; i++)
{
retval.append(COSHEXTable.HEX_TABLE[(data[i] + 256) % 256]);
}
return retval.toString();
}
/**
* This will get the string that this object wraps.
*
* @return The wrapped string.
*/
public String getString()
{
if (str != null)
{
return str;
}
String retval;
Charset charset = PDFDocEncodingCharset.INSTANCE;
byte[] data = getBytes();
int start = 0;
if (data.length > 2)
{
if (data[0] == (byte) 0xFF && data[1] == (byte) 0xFE)
{
charset = Charset.forName("UTF-16LE");
start = 2;
}
else if (data[0] == (byte) 0xFE && data[1] == (byte) 0xFF)
{
charset = Charset.forName("UTF-16BE");
start = 2;
}
}
retval = toString(data, start, data.length - start, charset);
str = retval;
return retval;
}
private static String toString(byte[] data, int offset, int length, Charset charset)
{
//This is only needed until PDFBox switches to JavaSE-1.6
//This could be just: return new String(data, offset, length, charset);
CharBuffer charBuffer = charset.decode(ByteBuffer.wrap(data, offset, length));
return charBuffer.toString();
}
/**
* This will append a byte[] to the string.
*
* @param data
* The byte[] to add to this string.
*
* @throws IOException
* If an IO error occurs while writing the byte.
*/
public void append(byte[] data) throws IOException
{
out.write(data);
str = null;
}
/**
* This will append a byte to the string.
*
* @param in
* The byte to add to this string.
*
* @throws IOException
* If an IO error occurs while writing the byte.
*/
public void append(int in) throws IOException
{
out.write(in);
str = null;
}
/**
* This will reset the internal buffer.
*/
public void reset()
{
out.reset();
str = null;
}
/**
* This will get the bytes of the string.
*
* @return A byte array that represents the string.
*/
public byte[] getBytes()
{
return out.toByteArray();
}
/**
* {@inheritDoc}
*/
@Override
public String toString()
{
return "COSString{" + this.getString() + "}";
}
/**
* This will output this string as a PDF object.
*
* @param output
* The stream to write to.
* @throws IOException
* If there is an error writing to the stream.
*/
public void writePDF(OutputStream output) throws IOException
{
boolean outsideASCII = false;
// Lets first check if we need to escape this string.
byte[] bytes = getBytes();
int length = bytes.length;
for (int i = 0; i < length && !outsideASCII; i++)
{
// if the byte is negative then it is an eight bit byte and is
// outside the ASCII range.
outsideASCII = bytes[i] < 0;
}
if (!outsideASCII && !forceHexForm)
{
output.write(STRING_OPEN);
for (int i = 0; i < length; i++)
{
int b = (bytes[i] + 256) % 256;
switch (b)
{
case '(':
case ')':
case '\\':
{
output.write(ESCAPE);
output.write((byte) b);
break;
}
case 10: // LF
{
output.write(LF_ESCAPE);
break;
}
case 13: // CR
{
output.write(CR_ESCAPE);
break;
}
case '\t':
{
output.write(HT_ESCAPE);
break;
}
case '\b':
{
output.write(BS_ESCAPE);
break;
}
case '\f':
{
output.write(FF_ESCAPE);
break;
}
default:
{
output.write((byte) b);
}
}
}
output.write(STRING_CLOSE);
}
else
{
output.write(HEX_STRING_OPEN);
for (int i = 0; i < length; i++)
{
output.write(COSHEXTable.TABLE[(bytes[i] + 256) % 256]);
}
output.write(HEX_STRING_CLOSE);
}
}
/**
* visitor pattern double dispatch method.
*
* @param visitor
* The object to notify when visiting this object.
* @return any object, depending on the visitor implementation, or null
* @throws COSVisitorException
* If an error occurs while visiting this object.
*/
@Override
public Object accept(ICOSVisitor visitor) throws COSVisitorException
{
return visitor.visitFromString(this);
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj)
{
if (obj instanceof COSString)
{
COSString strObj = (COSString) obj;
return this.getString().equals(strObj.getString()) && forceHexForm == strObj.forceHexForm;
}
return false;
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode()
{
int result = getString().hashCode();
return result += forceHexForm ? 17 : 0;
}
}