All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ggasoftware.parso.BinDecompressor Maven / Gradle / Ivy

Go to download

A lightweight library to parse sas7bdat files. Supports 'CHAR' compression.

The newest version!
package com.ggasoftware.parso;

import java.util.Arrays;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Implementation of the BIN compression algorithm which corresponds to the literal "SASYZCR2". 
 * Refer the documentation for further details.
 * It follows the general contract provided by the interface Decompressor. 
 * 
 * @author dzhelezov
 *
 */
public final class BinDecompressor implements Decompressor {
	private static final Logger log = LoggerFactory
			.getLogger(BinDecompressor.class);

	public static final BinDecompressor instance = new BinDecompressor();

	private BinDecompressor() {
		// prevent multiple instances
	}
	
	/**
	 * As described in the documentation, first 16 bits indicate which blocks are compressed.
	 * Next, each block is preceded by a marker which may consist of one, two or three bytes.
	 * This marker contains the information which compression is used (BIN or simple RLE) and 
	 * the block length.  
	 */
	@Override
	public byte[] decompressRow(int pageoffset, int srcLength,
			int resultLength, byte[] page) {

		byte[] srcRow = Arrays.copyOfRange(page, pageoffset, srcLength + pageoffset);
		byte[] outRow = new byte[resultLength];
		int srcOffset = 0;
		int outOffset = 0;

		while (srcOffset < srcRow.length - 2) {
			
			//read the two bytes prefix and interpret it as a 16-bit string.
			byte[] prefixBits = bytesAsBits(srcRow, srcOffset, 2);

			srcOffset += 2;
			for (int bitIndex = 0; (bitIndex < 16) && (srcOffset < srcRow.length); bitIndex++) {

				// if the byte for this chunk is set to 0, then just copy one
				// byte as is. This byte is not relevant for the compression
				if (prefixBits[bitIndex] == 0) {
					outRow = ensureCapacity(outRow, outOffset);
					outRow[outOffset] = srcRow[srcOffset];
					srcOffset++;
					outOffset++;
					continue;
				}

				byte markerByte = srcRow[srcOffset];
				byte nextByte = srcRow[srcOffset + 1]; // the second byte may play different roles

				if (isShortRLE(markerByte, nextByte)) {
					int length = getLengthOfRLEPattern(markerByte);
					outRow = ensureCapacity(outRow, outOffset + length);

					byte[] pattern = cloneByte(nextByte, length);
					
					System.arraycopy(pattern, 0, outRow, outOffset, length);
					outOffset += length;
					srcOffset += 2;
					continue;
				}

				if (isSingleByteMarker(markerByte)
						&& !(((byte) (nextByte & (byte) 0xF0)) == (((byte) (nextByte << 4)) & (byte) 0xF0))) {

					int length = getLengthOfOneBytePattern(markerByte);
					outRow = ensureCapacity(outRow, outOffset + length);

					int backOffset = getOffsetForOneBytePattern(markerByte);
					System.arraycopy(outRow, outOffset - backOffset, outRow,
							outOffset, length);

					srcOffset++;
					outOffset += length;
					continue;
				}

				byte[] twoBytesMarker = Arrays.copyOfRange(srcRow, srcOffset,
						srcOffset + 2);
				if (isTwoBytesMarker(twoBytesMarker)) {
					int length = getLengthOfTwoBytesPattern(twoBytesMarker);

					outRow = ensureCapacity(outRow, outOffset + length);

					int backOffset = getOffsetForTwoBytesPattern(twoBytesMarker);
					System.arraycopy(outRow, outOffset - backOffset, outRow,
							outOffset, length);

					srcOffset += 2;
					outOffset += length;
					continue;
				}

				byte[] threeBytesMarker = Arrays.copyOfRange(srcRow, srcOffset,
						srcOffset + 3);

				if (isThreeBytesMarker(threeBytesMarker)) {
					int type = (byte) ((threeBytesMarker[0] >> 4) & (byte) 0x0F);
					int backOffset = 0;
					if (type == 2) {
						backOffset = getOffsetForThreeBytesPattern(threeBytesMarker);
					}
					int length = getLengthOfThreeBytesPattern(
							type, threeBytesMarker);
					outRow = ensureCapacity(outRow, outOffset + length);

					byte[] pattern;
					if (type == 1) { //RLE pattern
						pattern = cloneByte(threeBytesMarker[2], length);
					} else  { //Base-offset pattern
						pattern = Arrays.copyOfRange(outRow, outOffset - backOffset, 
								outOffset - backOffset + length);
					}

					System.arraycopy(pattern, 0, outRow, outOffset, length);
					srcOffset += 3;
					outOffset += length;
					continue;
		
				} else {
					log.error("Unknown marker " + srcRow[srcOffset]
							+ " at offset  " + srcOffset);
					return srcRow;
				}
			}
		}
		return outRow;
	}

	private boolean isShortRLE(byte firstByteofCB, byte nextByteAfterCB) {
		switch ((byte) firstByteofCB) {
		case (byte) 0x00:
		case (byte) 0x01:
		case (byte) 0x02:
		case (byte) 0x03:
		case (byte) 0x04:
		case (byte) 0x05:
			return true;
		default:
			return false;
		}

	}

	private int getLengthOfRLEPattern(byte firstByteofCB) {
		if (firstByteofCB <= 0x05) {
			return firstByteofCB + 3;
		}
		return 0;
	}

	private boolean isSingleByteMarker(byte firstByteofCB) {
		switch (firstByteofCB) {
		case 0x02:
		case 0x04:
		case 0x06:
		case 0x08:
		case 0x0A:
			return true;
		default:
			return false;
		}
	}

	private int getLengthOfOneBytePattern(byte firstByteofCB) {
		return (isSingleByteMarker(firstByteofCB)) ? firstByteofCB + 14 : 0;
	}

	private int getOffsetForOneBytePattern(byte firstByteofCB) {
		switch (firstByteofCB) {
		case 0x08:
			return 24;
		case 0x0A:
			return 40;
		default:
			return 0;
		}
	}

	private boolean isTwoBytesMarker(byte[] doubleBytesCB) {
		return (byte) ((doubleBytesCB[0] >> 4) & 0xF) > 2;
	}

	private int getLengthOfTwoBytesPattern(byte[] doubleBytesCB) {
		return (byte) ((doubleBytesCB[0] >> 4) & 0xF);
	}

	private int getOffsetForTwoBytesPattern(byte[] doubleBytesCB) {
		int offset = 3 + (byte) (doubleBytesCB[0] & 0xF)
				+ (doubleBytesCB[1] * 16);
		return offset;
	}

	private boolean isThreeBytesMarker(byte[] threeByteMarker) {
		byte flag = (byte) (threeByteMarker[0] >> 4);
		return ((flag & 0xF) == 2) || ((flag & 0xF) == 1);
	}

	private int getLengthOfThreeBytesPattern(int type,
			byte[] threeByteMarker) {
		switch (type) {
		case 1:
			return 19 + (byte) (threeByteMarker[0] & 0xF)
					+ (threeByteMarker[1] * 16);
		case 2:
			return ((int) threeByteMarker[2] + 16);
		}
		return 0;
	}

	private int getOffsetForThreeBytesPattern(byte[] tripleBytesCB) {
		int offset = 3 + tripleBytesCB[0] & 0xF + (tripleBytesCB[1] * 16);
		return offset;
	}

	private byte[] ensureCapacity(byte[] src, int capacity) {
		if (capacity >= src.length) {
			return Arrays.copyOf(src, Math.max(capacity, 2 * src.length));
		}
		return src;
	}

	private byte[] bytesAsBits(byte[] src, int offset, int length) {
		byte[] result = new byte[length * 8];
		
		for (int i = 0; i < length; i++) {
			byte b = src[offset + i];
			for (int bit = 0; bit <= 7; bit++) { 
				//we read the bits from right to left,
				// so the index in the result array is (7-bit) + offset
				result[8 * i + (7 - bit)] = (byte) (((b & (1 << bit)) == 0) ? 0 : 1);
			}
		}
		return result;
	}

	private byte[] cloneByte(byte b, int length) {
		byte[] result = new byte[length];
		Arrays.fill(result, b);
		return result;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy