All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.fonts.UnicodeReader Maven / Gradle / Ivy

The newest version!
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/java-pdf-library-support/
 *
 * (C) Copyright 1997-2013, IDRsolutions and Contributors.
 *
 * 	This file is part of JPedal
 *
     This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


 *
 * ---------------
 * UnicodeReader.java
 * ---------------
 */
package org.jpedal.fonts;

import org.jpedal.utils.LogWriter;

public class UnicodeReader {

	private final static int[] powers = { 1, 16, 256, 256 * 16 };

	static final boolean debugUnicode = false;

	int ptr;

	byte[] data;

	boolean hasDoubleBytes = false;

	public UnicodeReader(byte[] data) {

		this.data = data;
	}

	/**
	 * read unicode translation table
	 */
	public String[] readUnicode() {

		if (this.data == null) return null;

		int defType = 0;

		if (debugUnicode) System.out.println(" Raw data============\n" + new String(this.data) + "\n=========================");

		// initialise unicode holder
		String[] unicodeMappings = new String[65536];

		int length = this.data.length;

		boolean inDef = false;

		// get stream of data
		try {

			// read values into lookup table
			while (true) {

				while (this.ptr < length && this.data[this.ptr] == 9)
					this.ptr++;

				if (this.ptr >= length) break;
				else
					if (this.ptr + 4 < length && this.data[this.ptr] == 'e' && this.data[this.ptr + 1] == 'n' && this.data[this.ptr + 2] == 'd'
							&& this.data[this.ptr + 3] == 'b' && this.data[this.ptr + 4] == 'f') {
						defType = 0;
						inDef = false;
					}
					else
						if (inDef) {

							if (debugUnicode) System.out.println("Read line");

							readLineValue(unicodeMappings, defType);
						}

				if (this.ptr >= length) {
					break;
				}
				else
					if (this.data[this.ptr] == 'b' && this.data[this.ptr + 1] == 'e' && this.data[this.ptr + 2] == 'g'
							&& this.data[this.ptr + 3] == 'i' && this.data[this.ptr + 4] == 'n' && this.data[this.ptr + 5] == 'b'
							&& this.data[this.ptr + 6] == 'f' && this.data[this.ptr + 7] == 'c' && this.data[this.ptr + 8] == 'h'
							&& this.data[this.ptr + 9] == 'a' && this.data[this.ptr + 10] == 'r') {

						defType = 1;
						this.ptr = this.ptr + 10;

						inDef = true;

					}
					else
						if (this.data[this.ptr] == 'b' && this.data[this.ptr + 1] == 'e' && this.data[this.ptr + 2] == 'g'
								&& this.data[this.ptr + 3] == 'i' && this.data[this.ptr + 4] == 'n' && this.data[this.ptr + 5] == 'b'
								&& this.data[this.ptr + 6] == 'f' && this.data[this.ptr + 7] == 'r' && this.data[this.ptr + 8] == 'a'
								&& this.data[this.ptr + 9] == 'n' && this.data[this.ptr + 10] == 'g' && this.data[this.ptr + 11] == 'e') {

							defType = 2;
							this.ptr = this.ptr + 11;

							inDef = true;
						}

				this.ptr++;
			}

		}
		catch (Exception e) {
			if (LogWriter.isOutput()) LogWriter.writeLog("Exception setting up text object " + e);

		}

		return unicodeMappings;
	}

	private void readLineValue(String[] unicodeMappings, int type) {

		int entryCount = type + 1;

		int dataLen = this.data.length;

		int raw;
		if (debugUnicode) System.out.println("in definition  " + type + " entryCount=" + entryCount);

		// read 2 values
		int[] value = new int[2000];
		boolean isMultipleValues = false;

		for (int vals = 0; vals < entryCount; vals++) {

			if (!isMultipleValues) {
				while (this.ptr < this.data.length && this.data[this.ptr] != '<') { // read up to

					if (vals == 2 && entryCount == 3 && this.data[this.ptr] == '[') { // mutiple values inside []

						type = 4;

						int ii = this.ptr;
						while (this.data[ii] != ']') {
							if (this.data[ii] == '<') entryCount++;

							ii++;
						}

						// needs to be 1 less to make it work
						entryCount--;

						// vals=entryCount;
						// break;
					}

					this.ptr++;
				}

				this.ptr++; // skip past
			}

			// find end
			int count = 0, charsFound = 0;

			while (this.ptr < dataLen && this.data[this.ptr] != '>') {

				if (this.data[this.ptr] != 10 && this.data[this.ptr] != 13 && this.data[this.ptr] != 32) charsFound++;

				this.ptr++;
				count++;

				// allow for multiple values
				if (charsFound == 5) {

					count = 4;
					this.ptr--;

					entryCount++;
					isMultipleValues = true;
					break;
				}
			}

			int pos = 0;

			for (int jj = 0; jj < count; jj++) {
				// convert to number
				while (true) {
					raw = this.data[this.ptr - 1 - jj];

					if (raw != 10 && raw != 13 && raw != 32) break;

					jj++;
				}

				if (raw >= 'A' && raw <= 'F') {
					raw = raw - 55;
				}
				else
					if (raw >= 'a' && raw <= 'f') {
						raw = raw - 87;
					}
					else
						if (raw >= '0' && raw <= '9') {
							raw = raw - 48;
						}
						else throw new RuntimeException("Unexpected number " + (char) raw);

				value[vals] = value[vals] + (raw * powers[pos]);

				if (pos == 3 && debugUnicode) System.out.println("read value (" + vals + ")=" + value[vals] + " (" + vals + ')' + " Hex="
						+ Integer.toHexString(value[vals]) + " char=" + (char) value[vals]);

				pos++;
			}
		}

		// roll to end end so works
		while (this.ptr < dataLen
				&& (this.data[this.ptr] == 62 || this.data[this.ptr] == 32 || this.data[this.ptr] == 10 || this.data[this.ptr] == 13 || this.data[this.ptr] == ']'))
			this.ptr++;

		this.ptr--;

		if (debugUnicode) System.out.println("fill entryCount=" + entryCount + " defType=" + type + " ");

		// put into array
		fillValues(unicodeMappings, entryCount, value, type);
	}

	private void fillValues(String[] unicodeMappings, int entryCount, int[] value, int type) {

		int intValue;

		switch (type) {

			case 1:

				if (entryCount == 2) {
					if (value[type] > 0) {
						unicodeMappings[value[0]] = String.valueOf((char) value[type]);
						if (value[0] > 255) this.hasDoubleBytes = true;
					}
					if (debugUnicode) System.out.println("2=" + unicodeMappings[value[0]]);

				}
				else {

					char str[] = new char[entryCount - 1];

					for (int aa = 0; aa < entryCount - 1; aa++)
						str[aa] = (char) value[type + aa];

					unicodeMappings[value[0]] = new String(str);
					if (value[0] > 255) this.hasDoubleBytes = true;

					if (debugUnicode) System.out.println("3=" + unicodeMappings[value[0]]);

				}

				break;

			case 4:

				// ptr++;

				int j = 2;
				for (int i = value[0]; i < value[1] + 1; i++) {

					if (entryCount > 1 && value[0] == value[1]) { // allow for <02> <02> [<0066006C>]

						unicodeMappings[i] = String.valueOf((char) (value[2]));
						if (i > 255) this.hasDoubleBytes = true;

						for (int jj = 1; jj < entryCount; jj++) {
							unicodeMappings[i] = unicodeMappings[i] + String.valueOf((char) (value[2 + jj]));
							if (i > 255) this.hasDoubleBytes = true;
						}
						// System.out.println("val="+value[0]+" "+unicodeMappings[i]+" value[0]="+value[0]+" value[1]="+value[1]+" value[2]="+value[2]+" value[3]="+value[3]);

					}
					else {
						// read next value

						intValue = value[j];
						j++;
						if (intValue > 0) { // ignore 0 to fix issue in Dalim files
							unicodeMappings[i] = String.valueOf((char) (intValue));
							if (i > 255) this.hasDoubleBytes = true;

							if (debugUnicode) System.out.println(i + "=" + unicodeMappings[i] + " (4)");

						}

						type = 0;
					}
				}

				break;

			default:

				for (int i = value[0]; i < value[1] + 1; i++) {
					intValue = value[type] + i - value[0];
					if (intValue > 0) { // ignore 0 to fix issue in Dalim files
						unicodeMappings[i] = String.valueOf((char) (intValue));
						if (i > 255) this.hasDoubleBytes = true;
					}
				}

				break;
		}
	}

	public boolean hasDoubleByteValues() {
		return this.hasDoubleBytes;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy