All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.jarnbjo.jsnappy.MapBasedCompressor Maven / Gradle / Ivy

Go to download

Java impmenentation of compression algorithm Snappy from Google. This is a Avast a.s. fork of http://code.google.com/p/jsnappy/ , it adds bytecompressor layer compatibility.

The newest version!
package de.jarnbjo.jsnappy;

class MapBasedCompressor {

	public static final int DEFAULT_MAX_OFFSET = 64*1024;
	
	static Buffer compress(byte[] in, int offset, int length, Buffer out, boolean useFirstHit) {

		if(out == null) {
			out = new Buffer(length * 6 / 5);
		}
		else {
			out.ensureCapacity(length * 6 / 5);
		}

		byte[] target = out.getData();
		int targetIndex = 0;
		int lasthit = offset;

		int l = length;
		while(l>0) {
			if(l>=128) {
				target[targetIndex++] = (byte)(0x80 | (l&0x7f));
			}
			else {
				target[targetIndex++] = (byte)l;
			}
			l >>= 7;
		}

		IntListHashMap ilhm = new IntListHashMap(Math.max(1, length / 13));

		for(int i = offset; i+4 < length && i < offset+4; i++) {
			ilhm.put(toInt(in, i), i);
		}

		for(int i = offset+4; i < offset + length; i++) {
			Hit h = search(in, i, length, ilhm, useFirstHit);
			if(i+4 < offset + length) {
				ilhm.put(toInt(in, i), i);
			}
			if(h != null) {
				if(lasthit < i) {
					int len = i - lasthit - 1;
					if (len < 60) {
						target[targetIndex++] = (byte)(len<<2);
					}
					else if (len < 0x100) {
						target[targetIndex++] = (byte)(60<<2);
						target[targetIndex++] = (byte)len;
					}
					else if (len < 0x10000) {
						target[targetIndex++] = (byte)(61<<2);
						target[targetIndex++] = (byte)len;
						target[targetIndex++] = (byte)(len>>8);
					}
					else if (len < 0x1000000) {
						target[targetIndex++] = (byte)(62<<2);
						target[targetIndex++] = (byte)len;
						target[targetIndex++] = (byte)(len>>8);
						target[targetIndex++] = (byte)(len>>16);
					}
					else {
						target[targetIndex++] = (byte)(63<<2);
						target[targetIndex++] = (byte)len;
						target[targetIndex++] = (byte)(len>>8);
						target[targetIndex++] = (byte)(len>>16);
						target[targetIndex++] = (byte)(len>>24);
					}
					System.arraycopy(in, lasthit, target, targetIndex, i-lasthit);
					targetIndex += i - lasthit;
					lasthit = i;
				}
				if(h.length <= 11 && h.offset < 2048) {
					target[targetIndex] = 1;
					target[targetIndex] |= ((h.length-4)<<2);
					target[targetIndex++] |= (h.offset>>3)&0xe0;
					target[targetIndex++] = (byte)(h.offset&0xff);
				}
				else if (h.offset < 65536) {
					target[targetIndex] = 2;
					target[targetIndex++] |= ((h.length-1)<<2);
					target[targetIndex++] = (byte)(h.offset);
					target[targetIndex++] = (byte)(h.offset>>8);
				}
				else {
					target[targetIndex] = 3;
					target[targetIndex++] |= ((h.length-1)<<2);
					target[targetIndex++] = (byte)(h.offset);
					target[targetIndex++] = (byte)(h.offset>>8);
					target[targetIndex++] = (byte)(h.offset>>16);
					target[targetIndex++] = (byte)(h.offset>>24);
				}
				for(; i < lasthit; i++) {
					if(i + 4 < in.length) {
						ilhm.put(toInt(in, i), i);
					}
				}
				lasthit = i + h.length;
				while(i>8);
			}
			else if (len < 0x1000000) {
				target[targetIndex++] = (byte)(62<<2);
				target[targetIndex++] = (byte)len;
				target[targetIndex++] = (byte)(len>>8);
				target[targetIndex++] = (byte)(len>>16);
			}
			else {
				target[targetIndex++] = (byte)(63<<2);
				target[targetIndex++] = (byte)len;
				target[targetIndex++] = (byte)(len>>8);
				target[targetIndex++] = (byte)(len>>16);
				target[targetIndex++] = (byte)(len>>24);
			}
			System.arraycopy(in, lasthit, target, targetIndex, length - lasthit);
			targetIndex += length - lasthit;
		}

		out.setLength(targetIndex);
		return out;
	}

	private static Hit search(byte[] source, int index, int length, IntListHashMap map, boolean useFirstHit) {

		if(index + 4 >= length) {
			// We won't search for backward references if there are less than
			// four bytes left to encode, since no relevant compression can be
			// achieved and the map used to store possible back references uses
			// a four byte key.
			return null;
		}

		if(index > 0 &&
				source[index] == source[index-1] &&
				source[index] == source[index+1] &&
		        source[index] == source[index+2] &&
		        source[index] == source[index+3]) {

			// at least five consecutive bytes, so we do
			// run-length-encoding of the last four
			// (three bytes are required for the encoding,
			// so less than four bytes cannot be compressed)

			int len = 0;
			for(int i = index; len < 64 && i < length && source[index] == source[i]; i++, len++);
			return new Hit(1, len);
		}

		if(useFirstHit) {
			int fp = map.getFirstHit(toInt(source, index), index-4);
			if(fp < 0) {
				return null;
			}
			int offset = index - fp;
			int l = 0;
			for(int o = fp, io = index; io < length && source[o] == source[io] && o < index && l < 64; o++, io++) {
				l++;
			}
			return new Hit(offset, l);
		}
		else {
			IntIterator ii = map.getReverse(toInt(source, index));
			if(ii == null) {
				return null;
			}
	
			Hit res = null;
	
			while(ii.next()) {
				int fp = ii.get();
				int offset = index - fp;
				if(offset > DEFAULT_MAX_OFFSET) {
					break;
				}
				if(offset >= 4) {
					int l = 0;
					for(int o = index - offset, io = index; io < length && source[o] == source[io] && o < index && l < 64; o++, io++) {
						l++;
					}
					if (res == null) {
						res = new Hit();
						res.offset = offset;
						res.length = l;
					}
					else if(l > res.length) {
						res.offset = offset;
						res.length = l;
					}
				}
			}
	
			return res;
		}
	}

	private static int toInt(byte[] data, int offset) {
		return
			((data[offset]&0xff)<<24) |
			((data[offset+1]&0xff)<<16) |
			((data[offset+2]&0xff)<<8) |
			(data[offset+3]&0xff);
	}

	private static class Hit {
		int offset, length;
		Hit() {
		}
		Hit(int offset, int length) {
			this.offset = offset;
			this.length = length;
		}
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy