
org.jpedal.fonts.UnicodeReader Maven / Gradle / Ivy
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2017 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
@LICENSE@
*
* ---------------
* UnicodeReader.java
* ---------------
*/
package org.jpedal.fonts;
import org.jpedal.utils.LogWriter;
class UnicodeReader {
private static final int[] powers = {1, 16, 256, 256 * 16};
int dataLen;
final byte[] data;
boolean hasDoubleBytes;
UnicodeReader(final byte[] data) {
this.data = data;
if (data != null) {
dataLen = data.length;
}
}
/**
* read unicode translation table
*/
public String[] readUnicode() {
if (data == null) {
return null;
}
int defType = 0;
int ptr = 0;
//initialise unicode holder
final String[] unicodeMappings = new String[65536];
boolean inDef = false;
//get stream of data
try {
//read values into lookup table
while (true) {
while (ptr < dataLen && data[ptr] == 9) {
ptr++;
}
if (ptr >= dataLen) {
break;
} else if (ptr + 4 < dataLen && data[ptr] == 'e' && data[ptr + 1] == 'n' && data[ptr + 2] == 'd' && data[ptr + 3] == 'b' && data[ptr + 4] == 'f') {
defType = 0;
inDef = false;
} else if (inDef) {
ptr = readLineValue(unicodeMappings, defType, ptr);
}
if (ptr >= dataLen) {
break;
} else if (data[ptr] == 'b' && data[ptr + 1] == 'e' && data[ptr + 2] == 'g' && data[ptr + 3] == 'i' && data[ptr + 4] == 'n' &&
data[ptr + 5] == 'b' && data[ptr + 6] == 'f') {
if (data[ptr + 7] == 'c' && data[ptr + 8] == 'h' && data[ptr + 9] == 'a' && data[ptr + 10] == 'r') {
defType = 1;
ptr += 10;
inDef = true;
} else if (data[ptr + 7] == 'r' && data[ptr + 8] == 'a' && data[ptr + 9] == 'n' && data[ptr + 10] == 'g' && data[ptr + 11] == 'e') {
defType = 2;
ptr += 11;
inDef = true;
}
} else if ((ptr - 20) > 0 && data[ptr] == 'u' && data[ptr + 1] == 's' && data[ptr + 2] == 'e'
&& data[ptr + 3] == 'c' && data[ptr + 4] == 'm' && data[ptr + 5] == 'a' && data[ptr + 6] == 'p') {
final byte[] strBytes = new byte[20];
System.arraycopy(data, ptr - 19, strBytes, 0, 20);
final String str = new String(strBytes);
EncodingUCS2 ucs2 = null;
if (str.startsWith("/Adobe-Korea1-UCS2")) {
ucs2 = new EncodingUCS2("Adobe-Korea1-UCS2");
} else if (str.startsWith("/Adobe-Japan1-UCS2")) {
ucs2 = new EncodingUCS2("Adobe-Japan1-UCS2");
} else if (str.startsWith("/Adobe-CNS1-UCS2")) {
ucs2 = new EncodingUCS2("Adobe-CNS1-UCS2");
} else if (str.startsWith("/Adobe-GB1-UCS2")) {
ucs2 = new EncodingUCS2("Adobe-GB1-UCS2");
}
if (ucs2 != null) {
for (int i = 0; i < 65535; i++) {
unicodeMappings[i] = String.valueOf((char) ucs2.getUnicodeValue(i));
}
}
}
ptr++;
}
} catch (final Exception e) {
LogWriter.writeLog("Exception setting up text object " + e);
}
return unicodeMappings;
}
private int readLineValue(final String[] unicodeMappings, int type, int ptr) {
int entryCount = type + 1;
//read 2 values
final int[][] value = new int[2000][4];
boolean isMultipleValues = false;
for (int vals = 0; vals < entryCount; vals++) {
if (!isMultipleValues) {
while (ptr < dataLen && data[ptr] != '<') { //read up to
if (vals == 2 && entryCount == 3 && data[ptr] == '[') { //mutiple values inside []
type = 4;
int ii = ptr;
while (data[ii] != ']') {
if (data[ii] == '<') {
entryCount++;
}
ii++;
}
//needs to be 1 less to make it work
entryCount--;
}
ptr++;
}
ptr++; //skip past
}
//find end
int count = 0, charsFound = 0;
while (ptr < dataLen && data[ptr] != '>') {
if (data[ptr] != 10 && data[ptr] != 13 && data[ptr] != 32) {
charsFound++;
}
ptr++;
count++;
//allow for multiple values
if (charsFound == 5 && type != 4) {
count = 4;
ptr--;
entryCount++;
isMultipleValues = true;
break;
}
}
int byteAccessed = 0;
while (count > 0) {
final int nextVal = getNextVal(ptr, count);
value[vals][byteAccessed] = nextVal;
byteAccessed++;
count -= 4;
}
}
//roll to end end so works
while (ptr < dataLen && (data[ptr] == 62 || data[ptr] == 32 || data[ptr] == 10 || data[ptr] == 13 || data[ptr] == ']')) {
ptr++;
}
ptr--;
//put into array
fillValues(unicodeMappings, entryCount, value, type);
return ptr;
}
private int getNextVal(final int ptr, int count) {
int disp = 0;
if (count > 4) {
count = 4;
disp = 4;
}
int raw;
int pos = 0;
int nextVal = 0;
for (int jj = 0; jj < count; jj++) {
//convert to number
while (true) {
raw = data[ptr - 1 - jj - disp];
if (raw != 10 && raw != 13 && raw != 32) {
break;
}
jj++;
}
if (raw >= 'A' && raw <= 'F') {
raw -= 55;
} else if (raw >= 'a' && raw <= 'f') {
raw -= 87;
} else if (raw >= '0' && raw <= '9') {
raw -= 48;
} else {
throw new RuntimeException("Unexpected number " + (char) raw);
}
nextVal += (raw * powers[pos]);
pos++;
}
return nextVal;
}
private void fillValues(final String[] unicodeMappings, final int entryCount, final int[][] value, final int type) {
int val;
switch (type) {
case 1: //single value mapping onto 1 or more values
if (value[0][0] > 255) {
hasDoubleBytes = true;
}
final char[] str = new char[entryCount - 1];
for (int aa = 0; aa < entryCount - 1; aa++) {
str[aa] = (char) value[1 + aa][0];
}
unicodeMappings[value[0][0]] = new String(str);
break;
case 2: //range of values mapping onto 1 or more values
for (int i = value[0][0]; i < value[1][0] + 1; i++) {
if (i > 255) {
hasDoubleBytes = true;
}
final int disp = i - value[0][0];
val = value[2][0] + disp;
if (val > 0) { //ignore 0 to fix issue in Dalim files
if (unicodeMappings[i] == null) {
unicodeMappings[i] = String.valueOf((char) val);
} else {
unicodeMappings[i] += String.valueOf((char) val);
}
}
}
break;
case 4: //corner case
int j = 2;
for (int i = value[0][0]; i < value[1][0] + 1; i++) {
if (i > 255) {
hasDoubleBytes = true;
}
if (value[0][0] == value[1][0]) { //allow for <02> <02> [<0066006C>]
setValue(i, 2, value, unicodeMappings, 0);
} else { //read next value
setValue(i, j, value, unicodeMappings, 0);
}
j++;
}
break;
}
}
static void setValue(final int i, final int j, final int[][] value, final String[] unicodeMappings, final int offset) {
int val;
for (int jj = 0; jj < 4; jj++) {
val = value[j][jj] + offset;
if (val > 0) {
if (unicodeMappings[i] == null) {
unicodeMappings[i] = String.valueOf((char) val);
} else {
unicodeMappings[i] += String.valueOf((char) val);
}
}
}
}
public boolean hasDoubleByteValues() {
return hasDoubleBytes;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy