All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.fonts.CodeSpaceRange Maven / Gradle / Ivy

/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2017 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
 @LICENSE@
 *
 * ---------------
 * CodeSpaceRange.java
 * ---------------
 */
package org.jpedal.fonts;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.StringTokenizer;
import java.util.zip.Inflater;
import org.jpedal.utils.LogWriter;

/**
 * utility class for holding code space ranges so it is easy to find the byte
 * width of each characters
 */
public class CodeSpaceRange {

	private static final String[] ALL_CMAPS = {
		// CNS
		"Adobe-CNS1-0", "0,14335", "2", "CNS",
		"Adobe-CNS1-1", "0,17407", "2", "CNS",
		"Adobe-CNS1-2", "0,17663", "2", "CNS",
		"Adobe-CNS1-3", "0,18943", "2", "CNS",
		"Adobe-CNS1-4", "0,19199", "2", "CNS",
		"Adobe-CNS1-5", "0,19199", "2", "CNS",
		"Adobe-CNS1-6", "0,19199", "2", "CNS",
		"Adobe-CNS1-UCS2", "0,65535", "2", "CNS",
		"B5-H", "0,128,41280,65278", "1,2", "CNS",
		"B5-V", "0,128,41280,65278", "1,2", "CNS",
		"B5pc-H", "0,128,41280,64766,253,255", "1,2", "CNS",
		"B5pc-V", "0,128,41280,64766,253,255", "1,2", "CNS",
		"CNS-EUC-H", "0,128,2392957345,2392981246,2393022881,2393046782,2393088417,2393112318,41377,65278", "1,2,4", "CNS",
		"CNS-EUC-V", "0,128,2392957345,2392981246,2393022881,2393046782,2393088417,2393112318,41377,65278", "1,2,4", "CNS",
		"CNS1-H", "8481,32382", "2", "CNS",
		"CNS1-V", "8481,32382", "2", "CNS",
		"CNS2-H", "8481,32382", "2", "CNS",
		"CNS2-V", "8481,32382", "2", "CNS",
		"ETen-B5-H", "0,128,41280,65278", "1,2", "CNS",
		"ETen-B5-V", "0,128,41280,65278", "1,2", "CNS",
		"ETenms-B5-H", "0,65535", "2", "CNS",
		"ETenms-B5-V", "0,65535", "2", "CNS",
		"ETHK-B5-H", "0,128,34624,65278", "1,2", "CNS",
		"ETHK-B5-V", "0,128,34624,65278", "1,2", "CNS",
		"HKdla-B5-H", "0,128,41280,65278", "1,2", "CNS",
		"HKdla-B5-V", "0,128,41280,65278", "1,2", "CNS",
		"HKdlb-B5-H", "0,128,36416,65278", "1,2", "CNS",
		"HKdlb-B5-V", "0,128,36416,65278", "1,2", "CNS",
		"HKgccs-B5-H", "0,128,35392,65278", "1,2", "CNS",
		"HKgccs-B5-V", "0,128,35392,65278", "1,2", "CNS",
		"HKm314-B5-H", "0,128,41280,65278", "1,2", "CNS",
		"HKm314-B5-V", "0,128,41280,65278", "1,2", "CNS",
		"HKm471-B5-H", "0,128,41280,65278", "1,2", "CNS",
		"HKm471-B5-V", "0,128,41280,65278", "1,2", "CNS",
		"HKscs-B5-H", "0,128,34624,65278", "1,2", "CNS",
		"HKscs-B5-V", "0,128,34624,65278", "1,2", "CNS",
		"UniCNS-UCS2-H", "0,55295,57344,65535", "2", "CNS",
		"UniCNS-UCS2-V", "0,55295,57344,65535", "2", "CNS",
		"UniCNS-UTF16-H", "0,55295,3623934976,3690979327,57344,65535", "2,4", "CNS",
		"UniCNS-UTF16-V", "0,55295,3623934976,3690979327,57344,65535", "2,4", "CNS",
		"UniCNS-UTF32-H", "0,1114111", "4", "CNS",
		"UniCNS-UTF32-V", "0,1114111", "4", "CNS",
		"UniCNS-UTF8-H", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "CNS",
		"UniCNS-UTF8-V", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "CNS",
		// GB
		"Adobe-GB1-0", "0,7935", "2", "GB",
		"Adobe-GB1-1", "0,9983", "2", "GB",
		"Adobe-GB1-2", "0,22271", "2", "GB",
		"Adobe-GB1-3", "0,22527", "2", "GB",
		"Adobe-GB1-4", "0,29183", "2", "GB",
		"Adobe-GB1-5", "0,30463", "2", "GB",
		"Adobe-GB1-UCS2", "0,65535", "2", "GB",
		"GB-EUC-H", "0,128,41377,65278", "1,2", "GB",
		"GB-EUC-V", "0,65535", "2", "GB",
		"GB-H", "8481,32382", "2", "GB",
		"GB-V", "8481,32382", "2", "GB",
		"GBK-EUC-H", "0,128,33088,65278", "1,2", "GB",
		"GBK-EUC-V", "0,128,33088,65278", "1,2", "GB",
		"GBK2K-H", "0,127,2167439664,4265213497,33088,65278", "1,2,4", "GB",
		"GBK2K-V", "0,127,2167439664,4265213497,33088,65278", "1,2,4", "GB",
		"GBKp-EUC-H", "0,128,33088,65278", "1,2", "GB",
		"GBKp-EUC-V", "0,128,33088,65278", "1,2", "GB",
		"GBpc-EUC-H", "0,128,41377,64766,253,255", "1,2", "GB",
		"GBpc-EUC-V", "0,128,41377,64766,253,255", "1,2", "GB",
		"GBT-EUC-H", "0,128,41377,65278", "1,2", "GB",
		"GBT-EUC-V", "0,128,41377,65278", "1,2", "GB",
		"GBT-H", "8481,32382", "2", "GB",
		"GBT-V", "8481,32382", "2", "GB",
		"GBTpc-EUC-H", "0,128,41377,64766,253,255", "1,2", "GB",
		"GBTpc-EUC-V", "0,128,41377,64766,253,255", "1,2", "GB",
		"UniGB-UCS2-H", "0,55295,57344,65535", "2", "GB",
		"UniGB-UCS2-V", "0,55295,57344,65535", "2", "GB",
		"UniGB-UTF16-H", "0,55295,3623934976,3690979327,57344,65535", "2,4", "GB",
		"UniGB-UTF16-V", "0,55295,3623934976,3690979327,57344,65535", "2,4", "GB",
		"UniGB-UTF32-H", "0,1114111", "4", "GB",
		"UniGB-UTF32-V", "0,1114111", "4", "GB",
		"UniGB-UTF8-H", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "GB",
		"UniGB-UTF8-V", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "GB",
		// JAPAN 
		"78-EUC-H", "0,128,36512,36575,41377,65278", "1,2", "JAPAN",
		"78-EUC-V", "0,128,36512,36575,41377,65278", "1,2", "JAPAN",
		"78-H", "8481,32382", "2", "JAPAN",
		"78-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"78-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"78-V", "8481,32382", "2", "JAPAN",
		"78ms-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"78ms-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"83pv-RKSJ-H", "0,128,33088,40956,160,223,57408,64764,253,255", "1,2", "JAPAN",
		"90ms-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"90ms-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"90msp-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"90msp-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"90pv-RKSJ-H", "0,128,33088,40956,160,223,57408,64764,253,255", "1,2", "JAPAN",
		"90pv-RKSJ-V", "0,128,33088,40956,160,223,57408,64764,253,255", "1,2", "JAPAN",
		"Add-H", "8481,32382", "2", "JAPAN",
		"Add-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"Add-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"Add-V", "8481,32382", "2", "JAPAN",
		"Adobe-Japan1-0", "0,8447", "2", "JAPAN",
		"Adobe-Japan1-1", "0,8447", "2", "JAPAN",
		"Adobe-Japan1-2", "0,8959", "2", "JAPAN",
		"Adobe-Japan1-3", "0,9471", "2", "JAPAN",
		"Adobe-Japan1-4", "0,15615", "2", "JAPAN",
		"Adobe-Japan1-5", "0,20479", "2", "JAPAN",
		"Adobe-Japan1-6", "0,23295", "2", "JAPAN",
		"Adobe-Japan1-UCS2", "0,65535", "2", "JAPAN",
		"EUC-H", "0,128,36512,36575,41377,65278", "1,2", "JAPAN",
		"EUC-V", "0,128,36512,36575,41377,65278", "1,2", "JAPAN",
		"Ext-H", "8481,32382", "2", "JAPAN",
		"Ext-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"Ext-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"Ext-V", "8481,32382", "2", "JAPAN",
		"H", "8481,32382", "2", "JAPAN",
		"Hankaku", "0,255", "1", "JAPAN",
		"Hiragana", "0,255", "1", "JAPAN",
		"Katakana", "0,255", "1", "JAPAN",
		"NWP-H", "8481,32382", "2", "JAPAN",
		"NWP-V", "8481,32382", "2", "JAPAN",
		"RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "JAPAN",
		"Roman", "0,255", "1", "JAPAN",
		"UniJIS-UCS2-H", "0,55295,57344,65535", "2", "JAPAN",
		"UniJIS-UCS2-HW-H", "0,65535", "2", "JAPAN",
		"UniJIS-UCS2-HW-V", "0,65535", "2", "JAPAN",
		"UniJIS-UCS2-V", "0,55295,57344,65535", "2", "JAPAN",
		"UniJIS-UTF16-H", "0,55295,3623934976,3690979327,57344,65535", "2,4", "JAPAN",
		"UniJIS-UTF16-V", "0,55295,3623934976,3690979327,57344,65535", "2,4", "JAPAN",
		"UniJIS-UTF32-H", "0,1114111", "4", "JAPAN",
		"UniJIS-UTF32-V", "0,1114111", "4", "JAPAN",
		"UniJIS-UTF8-H", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "JAPAN",
		"UniJIS-UTF8-V", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "JAPAN",
		"UniJIS2004-UTF16-H", "0,55295,3623934976,3690979327,57344,65535", "2,4", "JAPAN",
		"UniJIS2004-UTF16-V", "0,55295,3623934976,3690979327,57344,65535", "2,4", "JAPAN",
		"UniJIS2004-UTF32-H", "0,1114111", "4", "JAPAN",
		"UniJIS2004-UTF32-V", "0,1114111", "4", "JAPAN",
		"UniJIS2004-UTF8-H", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "JAPAN",
		"UniJIS2004-UTF8-V", "0,65535", "2", "JAPAN",
		"UniJISPro-UCS2-HW-V", "0,65535", "2", "JAPAN",
		"UniJISPro-UCS2-V", "0,65535", "2", "JAPAN",
		"UniJISPro-UTF8-V", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "JAPAN",
		"UniJISX0213-UTF32-H", "0,1114111", "4", "JAPAN",
		"UniJISX0213-UTF32-V", "0,1114111", "4", "JAPAN",
		"UniJISX02132004-UTF32-H", "0,1114111", "4", "JAPAN",
		"UniJISX02132004-UTF32-V", "0,1114111", "4", "JAPAN",
		"V", "8481,32382", "2", "JAPAN",
		"WP-Symbol", "0,255", "1", "JAPAN",
		//KOREA
		"Adobe-Korea1-0", "0,9471", "2", "KOREA",
		"Adobe-Korea1-1", "0,18175", "2", "KOREA",
		"Adobe-Korea1-2", "0,18431", "2", "KOREA",
		"Adobe-Korea1-UCS2", "0,65535", "2", "KOREA",
		"KSC-EUC-H", "0,128,41377,65278", "1,2", "KOREA",
		"KSC-EUC-V", "0,128,41377,65278", "1,2", "KOREA",
		"KSC-H", "8481,32382", "2", "KOREA",
		"KSC-Johab-H", "0,128,33857,54270,55345,57086,57393,63998", "1,2", "KOREA",
		"KSC-Johab-V", "0,128,33857,54270,55345,57086,57393,63998", "1,2", "KOREA",
		"KSC-V", "8481,32382", "2", "KOREA",
		"KSCms-UHC-H", "0,128,33089,65278", "1,2", "KOREA",
		"KSCms-UHC-HW-H", "0,128,33089,65278", "1,2", "KOREA",
		"KSCms-UHC-HW-V", "0,128,33089,65278", "1,2", "KOREA",
		"KSCms-UHC-V", "0,128,33089,65278", "1,2", "KOREA",
		"KSCpc-EUC-H", "0,132,41281,65022,254,255", "1,2", "KOREA",
		"KSCpc-EUC-V", "0,132,41281,65022,254,255", "1,2", "KOREA",
		"UniKS-UCS2-H", "0,55295,57344,65535", "2", "KOREA",
		"UniKS-UCS2-V", "0,55295,57344,65535", "2", "KOREA",
		"UniKS-UTF16-H", "0,55295,3623934976,3690979327,57344,65535", "2,4", "KOREA",
		"UniKS-UTF16-V", "0,55295,3623934976,3690979327,57344,65535", "2,4", "KOREA",
		"UniKS-UTF32-H", "0,1114111", "4", "KOREA",
		"UniKS-UTF32-V", "0,1114111", "4", "KOREA",
		"UniKS-UTF8-H", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "KOREA",
		"UniKS-UTF8-V", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "KOREA"
	};

	private final boolean[] bps = new boolean[8];
	private long[] range;

	public final int[] uniMap = new int[65536];
	public final int[] cidMap = new int[65536];
	public boolean hasEncoding;
	public String encname;

	public CodeSpaceRange(final String name) {
		encname = name;
		final int len = ALL_CMAPS.length;
		for (int i = 0; i < len; i += 4) {
			if (ALL_CMAPS[i].equals(name)) {
				String[] temp = ALL_CMAPS[i + 1].split(",");
				range = new long[temp.length];
				for (int j = 0; j < temp.length; j++) {
					range[j] = Long.parseLong(temp[j]);
				}
				temp = ALL_CMAPS[i + 2].split(",");
				for (final String temp1 : temp) {
					bps[Integer.parseInt(temp1)] = true;
				}
				final String enc = ALL_CMAPS[i + 3];
				updateUnicode(enc);

				if (name.endsWith("V")) {
					final String cmap = name.substring(0, name.length() - 1) + "H";
					updateCMAP(cmap);
				}

				updateCMAP(name);
				hasEncoding = true;
			}
		}
	}

	private void updateCMAP(final String name) {
		if (name.equals("Adobe-CNS1-UCS2") || name.equals("Adobe-Japan1-UCS2")
				|| name.equals("Adobe-GB1-UCS2") || name.equals("Adobe-Korea1-UCS2")) {
			for (int i = 0; i < 65536; i++) {
				cidMap[i] = i;
			}
			return;
		}

		final ClassLoader loader = this.getClass().getClassLoader();
		final InputStream input_stream;
		try {
			input_stream = loader.getResourceAsStream("org/jpedal/res/pdf/" + name + ".flate");
			if (input_stream != null) {
				final ByteArrayOutputStream bos = new ByteArrayOutputStream();
				int nRead;
				final byte[] data = new byte[4096];
				while ((nRead = input_stream.read(data, 0, data.length)) != -1) {
					bos.write(data, 0, nRead);
				}
				input_stream.close();
				final byte[] cmapData = decompress(bos.toByteArray());
				final CmapSpec spec = new CmapSpec(cmapData);
				int start, end, cid;
				for (final Long key : spec.cidRange.keySet()) {
					start = (int) (key >>> 32);
					end = (int) (key & 0xffffffffL);
					cid = spec.cidRange.get(key);
					for (int i = start; i <= end; i++) {
						if (i >= 0 && i < 65536) {
							cidMap[i] = cid;
						}
						cid++;
					}
				}
			} else {
				System.out.println("input stream not found " + name);
			}

		} catch (final Exception e) {
			e.printStackTrace();
			LogWriter.writeLog("Exception: " + e.getMessage());
		}

	}

	private void updateUnicode(final String ucsName) {

		final ClassLoader loader = this.getClass().getClassLoader();
		String line;
		final BufferedReader input_stream;

		try {

			if (ucsName.equals("JAPAN")) {
				input_stream = new BufferedReader(
						new InputStreamReader(loader.getResourceAsStream("org/jpedal/res/pdf/ADOBE_JAPAN_UCS2.cfg")));
			} else if (ucsName.equals("GB")) {
				input_stream = new BufferedReader(
						new InputStreamReader(loader.getResourceAsStream("org/jpedal/res/pdf/ADOBE_GB_UCS2.cfg")));
			} else if (ucsName.equals("KOREA")) {
				input_stream = new BufferedReader(
						new InputStreamReader(loader.getResourceAsStream("org/jpedal/res/pdf/ADOBE_KOREA_UCS2.cfg")));
			} else if (ucsName.equals("CNS")) {
				input_stream = new BufferedReader(
						new InputStreamReader(loader.getResourceAsStream("org/jpedal/res/pdf/ADOBE_CNS_UCS2.cfg")));
			} else {
				input_stream = null;
			}
			String ss, ee, uu;
			int si, ei, ui;
			while (true) {
				line = input_stream.readLine();
				if (line == null) {
					break;
				}
				if (!line.startsWith("#")) {
					final StringTokenizer values = new StringTokenizer(line);
					ss = values.nextToken();
					ee = values.nextToken();
					uu = values.nextToken();

					si = Integer.parseInt(ss, 16);
					ei = Integer.parseInt(ee, 16);
					ui = Integer.parseInt(uu, 16);
					int p = 0;
					for (int i = si; i <= ei; i++) {
						final long vv = ui + p;
						if (vv >= 0 && vv < 65536) {
							uniMap[i] = (int) vv;
						}
						p++;
					}
				}
			}
			input_stream.close();
		} catch (final Exception e) {
			e.printStackTrace();
			LogWriter.writeLog("Exception: " + e.getMessage());
		}
	}

	public boolean isInCodeSpaceRange(final int v, final int byteLen) {
		if (bps[byteLen]) {
			for (int i = 0; i < range.length; i += 2) {
				if (v >= range[i] && v <= range[i + 1]) {
					return true;
				}
			}
		}
		return false;
	}

	private static byte[] decompress(final byte[] data) throws Exception {
		final Inflater inflater = new Inflater();
		inflater.setInput(data);
		final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(data.length);
		final byte[] buffer = new byte[1024];
		while (!inflater.finished()) {
			final int count = inflater.inflate(buffer);
			outputStream.write(buffer, 0, count);
		}
		outputStream.close();
		final byte[] output = outputStream.toByteArray();
		return output;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy