All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.fonts.CmapSpec Maven / Gradle / Ivy

The newest version!
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2017 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
 @LICENSE@
 *
 * ---------------
 * CmapSpec.java
 * ---------------
 */
package org.jpedal.fonts;

import java.util.TreeMap;

/**
 * reads adobe cmap files for encoding and mapping
 *
 */
public final class CmapSpec {

	private static final byte[] CHAR256 = {
		//      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F,
		1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1
		1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2
		4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 2, 0, 2, 0, // 3
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // C
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // D
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // E
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F    
	};

	private final byte[] cmapData;
	private long[][] codeSpaceRange;
	private final TreeMap notDefRange = new TreeMap();
	public final TreeMap cidRange = new TreeMap();

	private final TreeMap bfRange = new TreeMap();
	private CmapEncoding cmapEncoding;

	private int p;
	private final int len;

	public CmapSpec(final byte[] cmapData) {
		this.cmapData = cmapData;
		len = cmapData.length;
		codeSpaceRange = new long[1][2];
		codeSpaceRange[0][1] = 65535;

		int count;

		while (p < len) {
			if ((cmapData[p] & 0xff) == 37) { // % comment marker
				skipLine();
				continue;
			}
			final String line = getNextLine().trim();
			if (line.isEmpty()) {
				continue;
			}
			if (line.contains("begincodespacerange")) {	// <00> <80>
				final String[] nn = line.split(" ");
				count = Integer.parseInt(nn[0]);
				codeSpaceRange = new long[count][2];
				for (int i = 0; i < count; i++) {
					final long start = getIntOrHex();
					final long end = getIntOrHex();
					codeSpaceRange[i][0] = start;
					codeSpaceRange[i][1] = end;
				}
			} else if (line.contains("begincidrange")) { // <8141> <8142> 7887
				final String[] nn = line.split(" ");
				count = Integer.parseInt(nn[0]);
				for (int i = 0; i < count; i++) {
					final long start = getIntOrHex();
					final long end = getIntOrHex();
					final int cid = (int) getIntOrHex();
					final long v = (start << 32L) | end;
					cidRange.put(v, cid);
				}
			} else if (line.contains("begincidchar")) {	// <8143> 8286
				final String[] nn = line.split(" ");
				count = Integer.parseInt(nn[0]);
				for (int i = 0; i < count; i++) {
					final long start = getIntOrHex();
					final int cid = (int) getIntOrHex();
					final long v = (start << 32L) | start;
					cidRange.put(v, cid);
				}
			} else if (line.contains("beginnotdefrange")) {	// <00> <1f>1
				final String[] nn = line.split(" ");
				count = Integer.parseInt(nn[0]);
				for (int i = 0; i < count; i++) {
					final long start = getIntOrHex();
					final long end = getIntOrHex();
					final int cid = (int) getIntOrHex();
					final long v = (start << 32L) | end;
					notDefRange.put(v, cid);
				}
			} else if (line.contains("beginnotdefchar")) {	// <8143> 8286
				final String[] nn = line.split(" ");
				count = Integer.parseInt(nn[0]);
				for (int i = 0; i < count; i++) {
					final long start = getIntOrHex();
					final int cid = (int) getIntOrHex();
					final long v = (start << 32L) | start;
					notDefRange.put(v, cid);
				}
			} else if (line.contains("beginbfrange")) { // <8141> <8142> 7887
				final String[] nn = line.split(" ");
				count = Integer.parseInt(nn[0]);
				for (int i = 0; i < count; i++) {
					final long start = getIntOrHex();
					final long end = getIntOrHex();
					final int cid = (int) getIntOrHex();
					final long v = (start << 32L) | end;
					bfRange.put(v, cid);
				}
			} else if (line.contains("beginbfchar")) {	// <8143> 8286
				final String[] nn = line.split(" ");
				count = Integer.parseInt(nn[0]);
				for (int i = 0; i < count; i++) {
					final long start = getIntOrHex();
					final int cid = (int) getIntOrHex();
					final long v = (start << 32L) | start;
					bfRange.put(v, cid);
				}
			}
		}
	}

	public boolean isInCodeSpaceRange(final int v) {
		for (final long[] ls : codeSpaceRange) {
			if (v >= ls[0] && v <= ls[1]) {
				return true;
			}
		}
		return false;
	}

	public int getCIDValue(final int unicode) {
		for (final Long key : cidRange.keySet()) {
			final int start = (int) (key >>> 32);
			final int end = (int) (key & 0xffffffffL);
			if (unicode >= start && unicode <= end) {
				final int v = cidRange.get(key);
				return v + (unicode - start);
			}
		}
		return 0;
	}

	public int getUnicodeValue(final int cid) {
		for (final Long key : bfRange.keySet()) {
			final int start = (int) (key >>> 32);
			final int end = (int) (key & 0xffffffffL);
			if (cid >= start && cid <= end) {
				final int v = bfRange.get(key);
				return v + (cid - start);
			}
		}
		return 0;
	}

	private long getIntOrHex() {
		int v;
		final StringBuilder sb = new StringBuilder();
		while (p < len) {
			v = cmapData[p] & 0xff;
			if (v == 60) {
				p++;
				while (p < len) {
					v = cmapData[p++] & 0xff;
					if (v == 62) {
						break;
					}
					sb.append((char) v);
				}
				String str = sb.toString();
				if (str.length() > 4) {
					str = str.substring(0, 4);
				}
				return Long.parseLong(str, 16);
//				return Long.parseLong(sb.toString(), 16);
			} else if (isDigit(v)) {
				while (p < len) {
					v = cmapData[p++] & 0xff;
					if (!isDigit(v)) {
						break;
					}
					sb.append((char) v);
				}
				return Long.parseLong(sb.toString());
			} else {
				p++;
			}
		}
		return 0;
	}

	private String getNextLine() {
		final StringBuilder bb = new StringBuilder();
		int v = cmapData[p++] & 0xff;
		OUTER:
		while (p < len) {
			switch (v) {
				case 0xd:
					if (p < len && (cmapData[p] & 0xff) == 0xa) {
						p++;
					}
					break OUTER;
				case 0xa:
					break OUTER;
				default:
					bb.append((char) v);
					v = cmapData[p++] & 0xff;
					break;
			}
		}
		return bb.toString();
	}

	private void skipLine() {
		int v = cmapData[p++] & 0xff;
		while (p < len) {
			switch (v) {
				case -1:
				case 0xa:
					return;
				case 0xd:
					if (p < len && (cmapData[p] & 0xff) == 0xa) {
						p++;
					}
					return;
				default:
					v = cmapData[p++] & 0xff;
			}
		}
	}

	private static boolean isDigit(final int ch) {
		return CHAR256[ch] == 4;
	}

	public CmapEncoding getCmapEncoding() {
		return cmapEncoding;
	}

	public void setCmapEncoding(final CmapEncoding cmapEncoding) {
		this.cmapEncoding = cmapEncoding;
	}

	@Override
	public String toString() {
		final StringBuilder sb = new StringBuilder();
		sb.append("codespacerange \n");
		for (final long[] codeSpaceRange1 : codeSpaceRange) {
			sb.append(codeSpaceRange1[0]).append(" ... ").append(codeSpaceRange1[1]).append('\n');
		}
		sb.append("cidrange \n");
		for (final Long key : cidRange.keySet()) {
			final int start = (int) (key >>> 32);
			final int end = (int) (key & 0xffffffffL);
			final int v = cidRange.get(key);
			sb.append(start).append(" ... ").append(end).append(" ==> ").append(v).append('\n');
		}
		return sb.toString();
	}

	public void printTableCIDtoUNI() {
		for (final Long key : bfRange.keySet()) {
			final int start = (int) (key >>> 32);
			final int end = (int) (key & 0xffffffffL);
			final long uniStart = bfRange.get(key);

			final String ss = Long.toHexString(start);
			final String ee = Long.toHexString(end);
			final String uu = Long.toHexString(uniStart);
			System.out.println(ss + ' ' + ee + ' ' + uu);
		}
	}

}

//	public static boolean isEOL(final int ch) {
//		return ch == 0xa || ch == 0xd;
//	}
//
//	public static boolean isDelimiter(final int ch) {
//		return CHAR256[ch] == 2;
//	}
//
//	public static boolean isComment(final int ch) {
//		return ch == 0x25;
//	}
//	
//	public static boolean isWhiteSpace(final int ch) {
//		return CHAR256[ch] == 1;
//	}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy