All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.fonts.CodeSpaceRange Maven / Gradle / Ivy

There is a newer version: 7.15.25
Show newest version
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2017 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
 @LICENSE@
 *
 * ---------------
 * CodeSpaceRange.java
 * ---------------
 */
package org.jpedal.fonts;

/**
 * utility class for holding code space ranges so it is easy to find the byte
 * width of each characters
 */
public class CodeSpaceRange {
	
	private static final String[] ALL_CMAPS = {
		// CNS
		"Adobe-CNS1-0", "0,14335", "2", "xxx",
		"Adobe-CNS1-1", "0,17407", "2", "xxx",
		"Adobe-CNS1-2", "0,17663", "2", "xxx",
		"Adobe-CNS1-3", "0,18943", "2", "xxx",
		"Adobe-CNS1-4", "0,19199", "2", "xxx",
		"Adobe-CNS1-5", "0,19199", "2", "xxx",
		"Adobe-CNS1-6", "0,19199", "2", "xxx",
		"Adobe-CNS1-UCS2", "0,65535", "2", "CNSUCS2",
		"B5-H", "0,128,41280,65278", "1,2", "B5",
		"B5-V", "0,128,41280,65278", "1,2", "B5",
		"B5pc-H", "0,128,41280,64766,253,255", "1,2", "B5",
		"B5pc-V", "0,128,41280,64766,253,255", "1,2", "B5",
		"CNS-EUC-H", "0,128,2392957345,2392981246,2393022881,2393046782,2393088417,2393112318,41377,65278", "1,2,4", "xxx",
		"CNS-EUC-V", "0,128,2392957345,2392981246,2393022881,2393046782,2393088417,2393112318,41377,65278", "1,2,4", "xxx",
		"CNS1-H", "8481,32382", "2", "ISO2022CN",
		"CNS1-V", "8481,32382", "2", "ISO2022CN",
		"CNS2-H", "8481,32382", "2", "ISO2022CN",
		"CNS2-V", "8481,32382", "2", "ISO2022CN",
		"ETen-B5-H", "0,128,41280,65278", "1,2", "B5",
		"ETen-B5-V", "0,128,41280,65278", "1,2", "B5",
		"ETenms-B5-H", "0,65535", "2", "B5",
		"ETenms-B5-V", "0,65535", "2", "B5",
		"ETHK-B5-H", "0,128,34624,65278", "1,2", "B5",
		"ETHK-B5-V", "0,128,34624,65278", "1,2", "B5",
		"HKdla-B5-H", "0,128,41280,65278", "1,2", "B5",
		"HKdla-B5-V", "0,128,41280,65278", "1,2", "B5",
		"HKdlb-B5-H", "0,128,36416,65278", "1,2", "B5",
		"HKdlb-B5-V", "0,128,36416,65278", "1,2", "B5",
		"HKgccs-B5-H", "0,128,35392,65278", "1,2", "B5",
		"HKgccs-B5-V", "0,128,35392,65278", "1,2", "B5",
		"HKm314-B5-H", "0,128,41280,65278", "1,2", "B5",
		"HKm314-B5-V", "0,128,41280,65278", "1,2", "B5",
		"HKm471-B5-H", "0,128,41280,65278", "1,2", "B5",
		"HKm471-B5-V", "0,128,41280,65278", "1,2", "B5",
		"HKscs-B5-H", "0,128,34624,65278", "1,2", "B5",
		"HKscs-B5-V", "0,128,34624,65278", "1,2", "B5",
		"UniCNS-UCS2-H", "0,55295,57344,65535", "2", "xxx",
		"UniCNS-UCS2-V", "0,55295,57344,65535", "2", "xxx",
		"UniCNS-UTF16-H", "0,55295,3623934976,3690979327,57344,65535", "2,4", "xxx",
		"UniCNS-UTF16-V", "0,55295,3623934976,3690979327,57344,65535", "2,4", "xxx",
		"UniCNS-UTF32-H", "0,1114111", "4", "xxx",
		"UniCNS-UTF32-V", "0,1114111", "4", "xxx",
		"UniCNS-UTF8-H", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "xxx",
		"UniCNS-UTF8-V", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "xxx",
		// GB
		"Adobe-GB1-0", "0,7935", "2", "xxx",
		"Adobe-GB1-1", "0,9983", "2", "xxx",
		"Adobe-GB1-2", "0,22271", "2", "xxx",
		"Adobe-GB1-3", "0,22527", "2", "xxx",
		"Adobe-GB1-4", "0,29183", "2", "xxx",
		"Adobe-GB1-5", "0,30463", "2", "xxx",
		"Adobe-GB1-UCS2", "0,65535", "2", "GBUCS2",
		"GB-EUC-H", "0,128,41377,65278", "1,2", "GB2312",
		"GB-EUC-V", "0,65535", "2", "GB2312",
		"GB-H", "8481,32382", "2", "xxx",
		"GB-V", "8481,32382", "2", "xxx",
		"GBK-EUC-H", "0,128,33088,65278", "1,2", "GBK",
		"GBK-EUC-V", "0,128,33088,65278", "1,2", "GBK",
		"GBK2K-H", "0,127,2167439664,4265213497,33088,65278", "1,2,4", "GBK",
		"GBK2K-V", "0,127,2167439664,4265213497,33088,65278", "1,2,4", "GBK",
		"GBKp-EUC-H", "0,128,33088,65278", "1,2", "GBK",
		"GBKp-EUC-V", "0,128,33088,65278", "1,2", "GBK",
		"GBpc-EUC-H", "0,128,41377,64766,253,255", "1,2", "GB2312",
		"GBpc-EUC-V", "0,128,41377,64766,253,255", "1,2", "GB2312",
		"GBT-EUC-H", "0,128,41377,65278", "1,2", "xxx",
		"GBT-EUC-V", "0,128,41377,65278", "1,2", "xxx",
		"GBT-H", "8481,32382", "2", "xxx",
		"GBT-V", "8481,32382", "2", "xxx",
		"GBTpc-EUC-H", "0,128,41377,64766,253,255", "1,2", "GB2312",
		"GBTpc-EUC-V", "0,128,41377,64766,253,255", "1,2", "GB2312",
		"UniGB-UCS2-H", "0,55295,57344,65535", "2", "xxx",
		"UniGB-UCS2-V", "0,55295,57344,65535", "2", "xxx",
		"UniGB-UTF16-H", "0,55295,3623934976,3690979327,57344,65535", "2,4", "xxx",
		"UniGB-UTF16-V", "0,55295,3623934976,3690979327,57344,65535", "2,4", "xxx",
		"UniGB-UTF32-H", "0,1114111", "4", "xxx",
		"UniGB-UTF32-V", "0,1114111", "4", "xxx",
		"UniGB-UTF8-H", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "xxx",
		"UniGB-UTF8-V", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "xxx",
		// JAPAN 
		"78-EUC-H", "0,128,36512,36575,41377,65278", "1,2", "EUCJP",
		"78-EUC-V", "0,128,36512,36575,41377,65278", "1,2", "EUCJP",
		"78-H", "8481,32382", "2", "ISO2022JP",
		"78-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"78-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"78-V", "8481,32382", "2", "ISO2022JP",
		"78ms-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"78ms-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"83pv-RKSJ-H", "0,128,33088,40956,160,223,57408,64764,253,255", "1,2", "SJIS",
		"90ms-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"90ms-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"90msp-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"90msp-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"90pv-RKSJ-H", "0,128,33088,40956,160,223,57408,64764,253,255", "1,2", "SJIS",
		"90pv-RKSJ-V", "0,128,33088,40956,160,223,57408,64764,253,255", "1,2", "SJIS",
		"Add-H", "8481,32382", "2", "ISO2022JP",
		"Add-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"Add-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"Add-V", "8481,32382", "2", "ISO2022JP",
		"Adobe-Japan1-0", "0,8447", "2", "xxx",
		"Adobe-Japan1-1", "0,8447", "2", "xxx",
		"Adobe-Japan1-2", "0,8959", "2", "xxx",
		"Adobe-Japan1-3", "0,9471", "2", "xxx",
		"Adobe-Japan1-4", "0,15615", "2", "xxx",
		"Adobe-Japan1-5", "0,20479", "2", "xxx",
		"Adobe-Japan1-6", "0,23295", "2", "xxx",
		"Adobe-Japan1-UCS2", "0,65535", "2", "JAPANUCS2",
		"EUC-H", "0,128,36512,36575,41377,65278", "1,2", "EUCJP",
		"EUC-V", "0,128,36512,36575,41377,65278", "1,2", "EUCJP",
		"Ext-H", "8481,32382", "2", "ISO2022JP",
		"Ext-RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "xxx",
		"Ext-RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "xxx",
		"Ext-V", "8481,32382", "2", "ISO2022JP",
		"H", "8481,32382", "2", "ISO2022JP",
		"Hankaku", "0,255", "1", "xxx",
		"Hiragana", "0,255", "1", "xxx",
		"Katakana", "0,255", "1", "xxx",
		"NWP-H", "8481,32382", "2", "ISO2022JP",
		"NWP-V", "8481,32382", "2", "ISO2022JP",
		"RKSJ-H", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"RKSJ-V", "0,128,33088,40956,160,223,57408,64764", "1,2", "SJIS",
		"Roman", "0,255", "1", "xxx",
		"UniJIS-UCS2-H", "0,55295,57344,65535", "2", "xxx",
		"UniJIS-UCS2-HW-H", "0,65535", "2", "xxx",
		"UniJIS-UCS2-HW-V", "0,65535", "2", "xxx",
		"UniJIS-UCS2-V", "0,55295,57344,65535", "2", "xxx",
		"UniJIS-UTF16-H", "0,55295,3623934976,3690979327,57344,65535", "2,4", "UNIJIS",
		"UniJIS-UTF16-V", "0,55295,3623934976,3690979327,57344,65535", "2,4", "UNIJIS",
		"UniJIS-UTF32-H", "0,1114111", "4", "xxx",
		"UniJIS-UTF32-V", "0,1114111", "4", "xxx",
		"UniJIS-UTF8-H", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "xxx",
		"UniJIS-UTF8-V", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "xxx",
		"UniJIS2004-UTF16-H", "0,55295,3623934976,3690979327,57344,65535", "2,4", "xxx",
		"UniJIS2004-UTF16-V", "0,55295,3623934976,3690979327,57344,65535", "2,4", "xxx",
		"UniJIS2004-UTF32-H", "0,1114111", "4", "xxx",
		"UniJIS2004-UTF32-V", "0,1114111", "4", "xxx",
		"UniJIS2004-UTF8-H", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "xxx",
		"UniJIS2004-UTF8-V", "0,65535", "2", "xxx",
		"UniJISPro-UCS2-HW-V", "0,65535", "2", "xxx",
		"UniJISPro-UCS2-V", "0,65535", "2", "xxx",
		"UniJISPro-UTF8-V", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "xxx",
		"UniJISX0213-UTF32-H", "0,1114111", "4", "xxx",
		"UniJISX0213-UTF32-V", "0,1114111", "4", "xxx",
		"UniJISX02132004-UTF32-H", "0,1114111", "4", "xxx",
		"UniJISX02132004-UTF32-V", "0,1114111", "4", "xxx",
		"V", "8481,32382", "2", "ISO2022JP",
		"WP-Symbol", "0,255", "1", "xxx",
		//KOREA
		"Adobe-Korea1-0", "0,9471", "2", "xxx",
		"Adobe-Korea1-1", "0,18175", "2", "xxx",
		"Adobe-Korea1-2", "0,18431", "2", "xxx",
		"Adobe-Korea1-UCS2", "0,65535", "2", "KOREAUCS2",
		"KSC-EUC-H", "0,128,41377,65278", "1,2", "EUCKR",
		"KSC-EUC-V", "0,128,41377,65278", "1,2", "EUCKR",
		"KSC-H", "8481,32382", "2", "xxx",
		"KSC-Johab-H", "0,128,33857,54270,55345,57086,57393,63998", "1,2", "JOHAB",
		"KSC-Johab-V", "0,128,33857,54270,55345,57086,57393,63998", "1,2", "JOHAB",
		"KSC-V", "8481,32382", "2", "xxx",
		"KSCms-UHC-H", "0,128,33089,65278", "1,2", "EUCKR",
		"KSCms-UHC-HW-H", "0,128,33089,65278", "1,2", "EUCKR",
		"KSCms-UHC-HW-V", "0,128,33089,65278", "1,2", "EUCKR",
		"KSCms-UHC-V", "0,128,33089,65278", "1,2", "EUCKR",
		"KSCpc-EUC-H", "0,132,41281,65022,254,255", "1,2", "EUCKR",
		"KSCpc-EUC-V", "0,132,41281,65022,254,255", "1,2", "EUCKR",
		"UniKS-UCS2-H", "0,55295,57344,65535", "2", "xxx",
		"UniKS-UCS2-V", "0,55295,57344,65535", "2", "xxx",
		"UniKS-UTF16-H", "0,55295,3623934976,3690979327,57344,65535", "2,4", "xxx",
		"UniKS-UTF16-V", "0,55295,3623934976,3690979327,57344,65535", "2,4", "xxx",
		"UniKS-UTF32-H", "0,1114111", "4", "xxx",
		"UniKS-UTF32-V", "0,1114111", "4", "xxx",
		"UniKS-UTF8-H", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "xxx",
		"UniKS-UTF8-V", "0,127,49280,57279,14712960,15712191,4034953344,4156538815", "1,2,3,4", "xxx"
	};
	
	private final boolean[] bps = new boolean[8];
	private long[] range;
	private CmapEncoding encoding;
	
	public CodeSpaceRange(final String name) {
		final int len = ALL_CMAPS.length;
		for (int i = 0; i < len; i += 4) {
			if (ALL_CMAPS[i].equals(name) && !ALL_CMAPS[i + 3].equals("xxx")) {
				String[] temp = ALL_CMAPS[i + 1].split(",");
				range = new long[temp.length];
				for (int j = 0; j < temp.length; j++) {
					range[j] = Long.parseLong(temp[j]);
				}
				temp = ALL_CMAPS[i + 2].split(",");
				for (final String temp1 : temp) {
					bps[Integer.parseInt(temp1)] = true;
				}
				final String enc = ALL_CMAPS[i + 3];
				if (enc.equals("SJIS")) {
					encoding = new EncodingSJIS();
				} else if (enc.equals("GBK")) {
					encoding = new EncodingGBK();
				} else if (enc.equals("B5")) {
					encoding = new EncodingBIG5();
				} else if (enc.equals("EUCJP")) {
					encoding = new EncodingEUCJP();
				} else if (enc.equals("ISO2022JP")) {
					encoding = new EncodingISO2022JP();
				} else if (enc.equals("GB2312")) {
					encoding = new EncodingISO2022JP();
				} else if (enc.equals("JOHAB")) {
					encoding = new EncodingJOHAB();
				} else if (enc.equals("EUCKR")) {
					encoding = new EncodingEUCKR();
				} else if (enc.equals("UNIJIS")) {
					encoding = new EncodingUNIJIS();
				} else if (enc.equals("JAPANUCS2") || enc.equals("KOREAUCS2")
						|| enc.equals("GBUCS2") || enc.equals("CNSUCS2")) {
					encoding = new EncodingUCS2(name);
				} else {
					range = null;
				}
			}
		}
		
	}
	
	public boolean isInCodeSpaceRange(final int v, final int byteLen) {
		if (bps[byteLen]) {
			for (int i = 0; i < range.length; i += 2) {
				if (v >= range[i] && v <= range[i + 1]) {
					return true;
				}
			}
		}
		return false;
	}
	
	public CmapEncoding getEncoding() {
		return encoding;
	}
	
	public long[] getRange() {
		return range;
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy