All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.fonts.UnicodeReader Maven / Gradle / Ivy

The newest version!
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2017 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
 @LICENSE@
 *
 * ---------------
 * UnicodeReader.java
 * ---------------
 */
package org.jpedal.fonts;

import org.jpedal.utils.LogWriter;

class UnicodeReader {

    private static final int[] powers = {1, 16, 256, 256 * 16};

    int dataLen;

    final byte[] data;

    boolean hasDoubleBytes;

    UnicodeReader(final byte[] data) {

        this.data = data;

        if (data != null) {
            dataLen = data.length;
        }
    }

    /**
     * read unicode translation table
     */
    public String[] readUnicode() {

        if (data == null) {
            return null;
        }

        int defType = 0;
        int ptr = 0;

        //initialise unicode holder
        final String[] unicodeMappings = new String[65536];

        boolean inDef = false;

        //get stream of data
        try {

            //read values into lookup table
            while (true) {

                while (ptr < dataLen && data[ptr] == 9) {
                    ptr++;
                }

                if (ptr >= dataLen) {
                    break;
                } else if (ptr + 4 < dataLen && data[ptr] == 'e' && data[ptr + 1] == 'n' && data[ptr + 2] == 'd' && data[ptr + 3] == 'b' && data[ptr + 4] == 'f') {
                    defType = 0;
                    inDef = false;
                } else if (inDef) {
                    ptr = readLineValue(unicodeMappings, defType, ptr);
                }

                if (ptr >= dataLen) {
                    break;
                } else if (data[ptr] == 'b' && data[ptr + 1] == 'e' && data[ptr + 2] == 'g' && data[ptr + 3] == 'i' && data[ptr + 4] == 'n' &&
                        data[ptr + 5] == 'b' && data[ptr + 6] == 'f') {

                    if (data[ptr + 7] == 'c' && data[ptr + 8] == 'h' && data[ptr + 9] == 'a' && data[ptr + 10] == 'r') {
                        defType = 1;
                        ptr += 10;

                        inDef = true;
                    } else if (data[ptr + 7] == 'r' && data[ptr + 8] == 'a' && data[ptr + 9] == 'n' && data[ptr + 10] == 'g' && data[ptr + 11] == 'e') {
                        defType = 2;
                        ptr += 11;

                        inDef = true;
                    }
                } else if ((ptr - 20) > 0 && data[ptr] == 'u' && data[ptr + 1] == 's' && data[ptr + 2] == 'e'
						&& data[ptr + 3] == 'c' && data[ptr + 4] == 'm' && data[ptr + 5] == 'a' && data[ptr + 6] == 'p') {
					final byte[] strBytes = new byte[20];
					System.arraycopy(data, ptr - 19, strBytes, 0, 20);
					final String str = new String(strBytes);
					EncodingUCS2 ucs2 = null;
					if (str.startsWith("/Adobe-Korea1-UCS2")) {
						ucs2 = new EncodingUCS2("Adobe-Korea1-UCS2");
					} else if (str.startsWith("/Adobe-Japan1-UCS2")) {
						ucs2 = new EncodingUCS2("Adobe-Japan1-UCS2");
					} else if (str.startsWith("/Adobe-CNS1-UCS2")) {
						ucs2 = new EncodingUCS2("Adobe-CNS1-UCS2");
					} else if (str.startsWith("/Adobe-GB1-UCS2")) {
						ucs2 = new EncodingUCS2("Adobe-GB1-UCS2");
					}
					if (ucs2 != null) {
						for (int i = 0; i < 65535; i++) {
							unicodeMappings[i] = String.valueOf((char) ucs2.getUnicodeValue(i));
						}
					}
				}

                ptr++;
            }

        } catch (final Exception e) {
            LogWriter.writeLog("Exception setting up text object " + e);
        }

        return unicodeMappings;
    }
	
    private int readLineValue(final String[] unicodeMappings, int type, int ptr) {

        int entryCount = type + 1;

        //read 2 values
        final int[][] value = new int[2000][4];
        boolean isMultipleValues = false;

        for (int vals = 0; vals < entryCount; vals++) {

            if (!isMultipleValues) {
                while (ptr < dataLen && data[ptr] != '<') { //read up to

                    if (vals == 2 && entryCount == 3 && data[ptr] == '[') { //mutiple values inside []

                        type = 4;

                        int ii = ptr;
                        while (data[ii] != ']') {
                            if (data[ii] == '<') {
                                entryCount++;
                            }

                            ii++;
                        }

                        //needs to be 1 less to make it work
                        entryCount--;

                    }

                    ptr++;
                }

                ptr++; //skip past
            }

            //find end
            int count = 0, charsFound = 0;

            while (ptr < dataLen && data[ptr] != '>') {

                if (data[ptr] != 10 && data[ptr] != 13 && data[ptr] != 32) {
                    charsFound++;
                }

                ptr++;
                count++;

                //allow for multiple values
                if (charsFound == 5 && type != 4) {

                    count = 4;
                    ptr--;

                    entryCount++;
                    isMultipleValues = true;
                    break;
                }
            }

            int byteAccessed = 0;
            while (count > 0) {

                final int nextVal = getNextVal(ptr, count);

                value[vals][byteAccessed] = nextVal;

                byteAccessed++;

                count -= 4;
            }
        }

        //roll to end end so works
        while (ptr < dataLen && (data[ptr] == 62 || data[ptr] == 32 || data[ptr] == 10 || data[ptr] == 13 || data[ptr] == ']')) {
            ptr++;
        }

        ptr--;

        //put into array
        fillValues(unicodeMappings, entryCount, value, type);

        return ptr;
    }

    private int getNextVal(final int ptr, int count) {

        int disp = 0;
        if (count > 4) {
            count = 4;
            disp = 4;
        }

        int raw;
        int pos = 0;
        int nextVal = 0;
        for (int jj = 0; jj < count; jj++) {
            //convert to number
            while (true) {
                raw = data[ptr - 1 - jj - disp];

                if (raw != 10 && raw != 13 && raw != 32) {
                    break;
                }

                jj++;
            }

            if (raw >= 'A' && raw <= 'F') {
                raw -= 55;
            } else if (raw >= 'a' && raw <= 'f') {
                raw -= 87;
            } else if (raw >= '0' && raw <= '9') {
                raw -= 48;
            } else {
                throw new RuntimeException("Unexpected number " + (char) raw);
            }

            nextVal += (raw * powers[pos]);

            pos++;
        }
        return nextVal;
    }

    private void fillValues(final String[] unicodeMappings, final int entryCount, final int[][] value, final int type) {

        int val;

        switch (type) {

            case 1: //single value mapping onto 1 or more values

                if (value[0][0] > 255) {
                    hasDoubleBytes = true;
                }

                final char[] str = new char[entryCount - 1];

                for (int aa = 0; aa < entryCount - 1; aa++) {
                    str[aa] = (char) value[1 + aa][0];
                }

                unicodeMappings[value[0][0]] = new String(str);

                break;

            case 2: //range of values mapping onto 1 or more values

                for (int i = value[0][0]; i < value[1][0] + 1; i++) {
                    if (i > 255) {
                        hasDoubleBytes = true;
                    }

                    final int disp = i - value[0][0];
                    val = value[2][0] + disp;
                    if (val > 0) { //ignore  0 to fix issue in Dalim files
                        if (unicodeMappings[i] == null) {
                            unicodeMappings[i] = String.valueOf((char) val);
                        } else {
                            unicodeMappings[i] += String.valueOf((char) val);
                        }
                    }
                }

                break;

            case 4: //corner case

                int j = 2;
                for (int i = value[0][0]; i < value[1][0] + 1; i++) {
                    if (i > 255) {
                        hasDoubleBytes = true;
                    }

                    if (value[0][0] == value[1][0]) { //allow for <02> <02> [<0066006C>]
                        setValue(i, 2, value, unicodeMappings, 0);
                    } else { //read next value
                        setValue(i, j, value, unicodeMappings, 0);
                    }
                    j++;
                }

                break;
        }
    }

    static void setValue(final int i, final int j, final int[][] value, final String[] unicodeMappings, final int offset) {

        int val;

        for (int jj = 0; jj < 4; jj++) {
            val = value[j][jj] + offset;
            if (val > 0) {
                if (unicodeMappings[i] == null) {
                    unicodeMappings[i] = String.valueOf((char) val);
                } else {
                    unicodeMappings[i] += String.valueOf((char) val);
                }
            }
        }
    }

    public boolean hasDoubleByteValues() {
        return hasDoubleBytes;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy