All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.intarsys.pdf.cos.COSString Maven / Gradle / Ivy

/*
 * Copyright (c) 2007, intarsys consulting GmbH
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * - Redistributions of source code must retain the above copyright notice,
 *   this list of conditions and the following disclaimer.
 *
 * - Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 * - Neither the name of intarsys nor the names of its contributors may be used
 *   to endorse or promote products derived from this software without specific
 *   prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
package de.intarsys.pdf.cos;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Arrays;

import de.intarsys.pdf.encoding.PDFDocEncoding;
import de.intarsys.tools.hex.HexTools;

/**
 * The string representation for a pdf document
 * 
 */
public class COSString extends COSPrimitiveObject implements Comparable {
	private static String LINE_SEPARATOR = System.getProperty("line.separator"); //$NON-NLS-1$

	/**
	 * Create a {@link COSString} from bytes.
	 * 
	 * @param bytes
	 * @return The new {@link COSString}
	 */
	public static COSString create(byte[] bytes) {
		if (bytes == null) {
			throw new NullPointerException("COSString value can't be null"); //$NON-NLS-1$
		}
		return new COSString(bytes, null);
	}

	/**
	 * Create a {@link COSString} from bytes.
	 * 
	 * @param bytes
	 * @return The new {@link COSString}
	 */
	public static COSString create(byte[] bytes, String encoding) {
		if (bytes == null) {
			throw new NullPointerException("COSString value can't be null"); //$NON-NLS-1$
		}
		return new COSString(bytes, encoding);
	}

	/**
	 * Create a {@link COSString} from string.
	 * 
	 * @param string
	 * @return The new {@link COSString}
	 */
	public static COSString create(String string) {
		if (string == null) {
			throw new NullPointerException("COSString value can't be null"); //$NON-NLS-1$
		}
		if (PDFDocEncoding.UNIQUE.isEncodable(string)) {
			return new COSString(string, null);
		} else {
			return new COSString(string, "UTF-16BE");
		}
	}

	/**
	 * Create a {@link COSString} from string.
	 * 
	 * @param string
	 * @return The new {@link COSString}
	 */
	public static COSString create(String string, String encoding) {
		if (string == null) {
			throw new NullPointerException("COSString value can't be null"); //$NON-NLS-1$
		}
		return new COSString(string, encoding);
	}

	/**
	 * Create a {@link COSString} from bytes in hex representation.
	 * 
	 * @param bytes
	 * @return The new {@link COSString}
	 */
	public static COSString createHex(byte[] bytes) {
		if (bytes == null) {
			throw new NullPointerException("COSString value can't be null"); //$NON-NLS-1$
		}
		COSString result = new COSString(bytes, null);
		result.setHexMode(true);
		return result;
	}

	/**
	 * Create a {@link COSString} from bytes in hex representation.
	 * 
	 * @param bytes
	 * @return The new {@link COSString}
	 */
	public static COSString createHex(byte[] bytes, String encoding) {
		if (bytes == null) {
			throw new NullPointerException("COSString value can't be null"); //$NON-NLS-1$
		}
		COSString result = new COSString(bytes, encoding);
		result.setHexMode(true);
		return result;
	}

	/**
	 * Create a {@link COSString} from string in hex
	 * representation.
	 * 
	 * @param string
	 * @return The new {@link COSString}
	 */
	public static COSString createHex(String string) {
		if (string == null) {
			throw new NullPointerException("COSString value can't be null"); //$NON-NLS-1$
		}
		if (PDFDocEncoding.UNIQUE.isEncodable(string)) {
			COSString result = new COSString(string, null);
			result.setHexMode(true);
			return result;
		} else {
			COSString result = new COSString(string, "UTF-16BE");
			result.setHexMode(true);
			return result;
		}
	}

	/**
	 * Create a {@link COSString} from string in hex
	 * representation.
	 * 
	 * @param string
	 * @return The new {@link COSString}
	 */
	public static COSString createHex(String string, String encoding) {
		if (string == null) {
			throw new NullPointerException("COSString value can't be null"); //$NON-NLS-1$
		}
		COSString result = new COSString(string, encoding);
		result.setHexMode(true);
		return result;
	}

	/**
	 * Create a {@link COSString} from string, escaping all
	 * newlines.
	 * 
	 * @param string
	 * @return The new {@link COSString}
	 */
	public static COSString createMultiLine(String string) {
		return create(toPDFString(string));
	}

	public static String toJavaString(String value) {
		StringBuilder result = new StringBuilder();
		int length = value.length();
		boolean crFound = false;
		for (int i = 0; i < length; i++) {
			char c = value.charAt(i);
			if (c == '\r') {
				if (crFound) {
					result.append(LINE_SEPARATOR);
				}
				crFound = true;
			} else if (c == '\n') {
				result.append(LINE_SEPARATOR);
				crFound = false;
			} else {
				if (crFound) {
					result.append(LINE_SEPARATOR);
				}
				result.append(c);
				crFound = false;
			}
		}
		if (crFound) {
			result.append(LINE_SEPARATOR);
		}
		return result.toString();
	}

	public static String toPDFString(String value) {
		// transform line delimiter (CR, CR/LN, LN) to CR
		StringBuilder result = new StringBuilder();
		int length = value.length();
		boolean ignoreLn = false;
		for (int i = 0; i < length; i++) {
			char c = value.charAt(i);
			if (c == '\n') {
				if (!ignoreLn) {
					result.append('\r');
				}
			} else {
				result.append(c);
			}
			ignoreLn = c == '\r';
		}
		return result.toString();
	}

	private String encoding;

	// the bytes that make up the string
	private byte[] bytes;

	// cached hash code
	private int hash = 0;

	// flag if this string has to be written in hex representation
	private boolean hexMode = false;

	private byte offset = 0;

	// the decoded string representation of the bytes (lazy)
	private String string;

	protected COSString() {
		super();
	}

	protected COSString(byte[] bytes, String encoding) {
		super();
		this.bytes = bytes;
		this.encoding = encoding;
	}

	protected COSString(String string, String encoding) {
		super();
		this.string = string;
		this.encoding = encoding;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see
	 * de.intarsys.pdf.cos.COSObject#accept(de.intarsys.pdf.cos.ICOSObjectVisitor
	 * )
	 */
	@Override
	public java.lang.Object accept(ICOSObjectVisitor visitor)
			throws COSVisitorException {
		return visitor.visitFromString(this);
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see de.intarsys.pdf.cos.COSObject#asString()
	 */
	@Override
	public COSString asString() {
		return this;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see de.intarsys.pdf.cos.COSObject#basicToString()
	 */
	@Override
	protected String basicToString() {
		if (isHexMode()) {
			return "<" + hexStringValue() + ">"; //$NON-NLS-1$ //$NON-NLS-2$
		} else {
			return "(" + stringValue() + ")"; //$NON-NLS-1$ //$NON-NLS-2$
		}
	}

	/**
	 * The bytes that make up this string.
	 * 
	 * @return The bytes that make up this string.
	 */
	public byte[] byteValue() {
		if (bytes == null) {
			bytes = encode();
		}
		return bytes;
	}

	/**
	 * Reset the bytes for this {@link COSString}. You may need this if the
	 * encoding is changed during the {@link COSString} lifetime.
	 */
	public void clearBytes() {
		if (bytes == null) {
			return;
		}
		decode();
		bytes = null;
	}

	/**
	 * Reset the string value for this {@link COSString}. You may need this if
	 * the encoding is changed during the {@link COSString} lifetime.
	 */
	public void clearString() {
		if (string == null) {
			return;
		}
		encode();
		string = null;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see java.lang.Comparable#compareTo(java.lang.Object)
	 */
	public int compareTo(Object o) {
		if (!(o instanceof COSString)) {
			throw new ClassCastException("must compare with a COSString"); //$NON-NLS-1$
		}
		byte[] thisBytes = byteValue();
		byte[] otherBytes = ((COSString) o).byteValue();
		for (int i = 0; (i < thisBytes.length) && (i < otherBytes.length); i++) {
			if (thisBytes[i] < otherBytes[i]) {
				return -1;
			}
			if (thisBytes[i] > otherBytes[i]) {
				return 1;
			}
		}
		if (thisBytes.length < otherBytes.length) {
			return -1;
		}
		if (thisBytes.length > otherBytes.length) {
			return 1;
		}
		return 0;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see de.intarsys.pdf.cos.COSObject#copyBasic()
	 */
	@Override
	protected COSObject copyBasic() {
		COSString result = new COSString();
		result.bytes = this.bytes;
		result.hash = this.hash;
		result.hexMode = this.hexMode;
		result.string = this.string;
		result.encoding = this.encoding;
		result.offset = this.offset;
		return result;
	}

	/**
	 * decode the string from the byte value in the given encoding
	 * 
	 * @return the decoded string
	 */
	protected String decode() {
		// test for well known internal encodings
		testBuiltinEncoding();
		if (encoding != null) {
			try {
				return new String(bytes, offset, bytes.length - offset,
						encoding);
			} catch (UnsupportedEncodingException e) {
				return PDFDocEncoding.UNIQUE.decode(bytes);
			}
		}
		return PDFDocEncoding.UNIQUE.decode(bytes);
	}

	/**
	 * encode the string in byte representation using the correct encoding
	 * 
	 * @return the encoded string
	 */
	protected byte[] encode() {
		if (encoding != null) {
			if ("UTF-16BE".equals(encoding)) {
				byte[] tempBytes;
				try {
					tempBytes = string.getBytes("UTF-16BE"); //$NON-NLS-1$
				} catch (UnsupportedEncodingException e) {
					// strange... :-)
					return PDFDocEncoding.UNIQUE.encode(string);
				}
				byte[] resultBytes = new byte[tempBytes.length + 2];
				resultBytes[0] = (byte) 0xfe;
				resultBytes[1] = (byte) 0xff;
				System.arraycopy(tempBytes, 0, resultBytes, 2, tempBytes.length);
				return resultBytes;
			} else if ("UTF-16LE".equals(encoding)) {
				byte[] tempBytes;
				try {
					tempBytes = string.getBytes("UTF-16LE"); //$NON-NLS-1$
				} catch (UnsupportedEncodingException e) {
					// strange... :-)
					return PDFDocEncoding.UNIQUE.encode(string);
				}
				byte[] resultBytes = new byte[tempBytes.length + 2];
				resultBytes[0] = (byte) 0xff;
				resultBytes[1] = (byte) 0xfe;
				System.arraycopy(tempBytes, 0, resultBytes, 2, tempBytes.length);
				return resultBytes;
			} else {
				try {
					return string.getBytes(encoding);
				} catch (UnsupportedEncodingException e) {
					return PDFDocEncoding.UNIQUE.encode(string);
				}
			}
		}
		return PDFDocEncoding.UNIQUE.encode(string);
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see java.lang.Object#equals(java.lang.Object)
	 */
	@Override
	public boolean equals(Object o) {
		if (!(o instanceof COSString)) {
			return false;
		}
		return Arrays.equals(byteValue(), ((COSString) o).byteValue());
	}

	/**
	 * Lookup the custom encoding used by this string. This method returns null
	 * if the default PDF conventions are used.
	 * 

* Be warned: If you use a custom encoding, there is no way for standard PDF * tools to recover this information! * * @return Lookup the custom encoding used by this string */ public String getEncoding() { return encoding; } /* * (non-Javadoc) * * @see java.lang.Object#hashCode() */ @Override public int hashCode() { if (bytes == null) { return super.hashCode(); } int h = hash; if (h == 0) { for (int i = 0; i < bytes.length; i++) { h = (31 * h) + bytes[i]; } hash = h; } return h; } /** * Show a hex encoded representation of the strings content. * * @return Show a hex encoded representation of the strings content. */ public String hexStringValue() { return HexTools.bytesToHexString(byteValue()); } /** * true if this string has to be saved as hex representation * * @return true if this string has to be saved as hex * representation */ public boolean isHexMode() { return hexMode; } /** * A Java {@link String} with correctly expanded newlines. * * @return A Java {@link String} with correctly expanded newlines. */ public String multiLineStringValue() { return toJavaString(stringValue()); } /* * (non-Javadoc) * * @see de.intarsys.pdf.cos.COSObject#restoreState(java.lang.Object) */ @Override public void restoreState(Object object) { super.restoreState(object); COSString cosString = (COSString) object; this.bytes = cosString.bytes; this.hash = cosString.hash; this.hexMode = cosString.hexMode; this.string = cosString.string; } /* * (non-Javadoc) * * @see de.intarsys.tools.objectsession.ISaveStateSupport#saveState() */ public Object saveState() { COSString result = new COSString(); result.bytes = this.bytes; result.hash = this.hash; result.hexMode = this.hexMode; result.string = this.string; result.encoding = this.encoding; result.offset = this.offset; result.container = this.container.saveStateContainer(); return result; } /** * Set the {@link Charset} name to be used when decoding the * {@link COSString} bytes. If no encoding is present, the default PDF * conventions are used. *

* Be warned: If you use a custom encoding, there is no way for standard PDF * tools to recover this information! * * @param encoding */ public void setEncoding(String encoding) { this.encoding = encoding; } /** * Set the flag if this is written in hex representation * * @param newHexMode * true if this is written in hex representation */ public void setHexMode(boolean newHexMode) { hexMode = newHexMode; } /** * The Java {@link String} representation of the receiver * * @return The Java {@link String} representation of the receiver */ @Override public String stringValue() { if (string == null) { string = decode(); } return string; } protected void testBuiltinEncoding() { if (bytes.length < 2) { return; } if ((bytes[0] == (byte) 0xfe) && (bytes[1] == (byte) 0xff)) { offset = 2; encoding = "UTF-16BE"; //$NON-NLS-1$ } else if ((bytes[0] == (byte) 0xff) && (bytes[1] == (byte) 0xfe)) { // extend test to undocumented format used by some tools offset = 2; encoding = "UTF-16LE"; //$NON-NLS-1$ } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy