All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ning.compress.lzf.impl.UnsafeChunkEncoderBE Maven / Gradle / Ivy

Go to download

Compression codec for LZF encoding for particularly encoding/decoding, with reasonable compression. Compressor is basic Lempel-Ziv codec, without Huffman (deflate/gzip) or statistical post-encoding. See "http://oldhome.schmorp.de/marc/liblzf.html" for more on original LZF package.

There is a newer version: 1.1.2
Show newest version
package com.ning.compress.lzf.impl;

import com.ning.compress.BufferRecycler;
import com.ning.compress.lzf.LZFChunk;

/**
 * Implementation to use on Big-Endian architectures.
 */
@SuppressWarnings("restriction")
public final class UnsafeChunkEncoderBE
    extends UnsafeChunkEncoder
{
    public UnsafeChunkEncoderBE(int totalLength) {
        super(totalLength);
    }

    public UnsafeChunkEncoderBE(int totalLength, boolean bogus) {
        super(totalLength, bogus);
    }
    public UnsafeChunkEncoderBE(int totalLength, BufferRecycler bufferRecycler) {
        super(totalLength, bufferRecycler);
    }

    public UnsafeChunkEncoderBE(int totalLength, BufferRecycler bufferRecycler, boolean bogus) {
        super(totalLength, bufferRecycler, bogus);
    }


    @Override
    protected int tryCompress(byte[] in, int inPos, int inEnd, byte[] out, int outPos)
    {
        final int[] hashTable = _hashTable;
        int literals = 0;
        inEnd -= TAIL_LENGTH;
        final int firstPos = inPos; // so that we won't have back references across block boundary

        int seen = _getInt(in, inPos) >> 16;
        
        while (inPos < inEnd) {
            seen = (seen << 8) + (in[inPos + 2] & 255);

            int off = hash(seen);
            int ref = hashTable[off];
            hashTable[off] = inPos;

            // First expected common case: no back-ref (for whatever reason)
            if ((ref >= inPos) // can't refer forward (i.e. leftovers)
                    || (ref < firstPos) // or to previous block
                    || (off = inPos - ref) > MAX_OFF
                    || ((seen << 8) != (_getInt(in, ref-1) << 8))) {
                ++inPos;
                ++literals;
                if (literals == LZFChunk.MAX_LITERAL) {
                    outPos = _copyFullLiterals(in, inPos, out, outPos);
                    literals = 0;
                }
                continue;
            }
            // match
            int maxLen = inEnd - inPos + 2;
            if (maxLen > MAX_REF) {
                maxLen = MAX_REF;
            }
            if (literals > 0) {
                outPos = _copyPartialLiterals(in, inPos, out, outPos, literals);
                literals = 0;
            }
            int len = _findMatchLength(in, ref+3, inPos+3, ref+maxLen);
            
            --off; // was off by one earlier
            if (len < 7) {
                out[outPos++] = (byte) ((off >> 8) + (len << 5));
            } else {
                out[outPos++] = (byte) ((off >> 8) + (7 << 5));
                out[outPos++] = (byte) (len - 7);
            }
            out[outPos++] = (byte) off;
            inPos += len;
            seen = _getInt(in, inPos);
            hashTable[hash(seen >> 8)] = inPos;
            ++inPos;
            hashTable[hash(seen)] = inPos;
            ++inPos;
        }
        // try offlining the tail
        return _handleTail(in, inPos, inEnd+4, out, outPos, literals);
    }

    private final static int _getInt(final byte[] in, final int inPos) {
        return unsafe.getInt(in, BYTE_ARRAY_OFFSET + inPos);
    }

    /*
    ///////////////////////////////////////////////////////////////////////
    // Methods for finding length of a back-reference
    ///////////////////////////////////////////////////////////////////////
     */
    
    private final static int _findMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1)
    {
        // Expect at least 8 bytes to check for fast case; offline others
        if ((ptr1 + 8) >= maxPtr1) { // rare case, offline
            return _findTailMatchLength(in, ptr1, ptr2, maxPtr1);
        }
        // short matches common, so start with specialized comparison
        // NOTE: we know that we have 4 bytes of slack before end, so this is safe:
        int i1 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr1);
        int i2 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr2);
        if (i1 != i2) {
            return 1 + _leadingBytes(i1, i2);
        }
        ptr1 += 4;
        ptr2 += 4;

        i1 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr1);
        i2 = unsafe.getInt(in, BYTE_ARRAY_OFFSET + ptr2);
        if (i1 != i2) {
            return 5 + _leadingBytes(i1, i2);
        }
        return _findLongMatchLength(in, ptr1+4, ptr2+4, maxPtr1);
    }

    private final static int _findLongMatchLength(final byte[] in, int ptr1, int ptr2, final int maxPtr1)
    {
        final int base = ptr1 - 9;
        // and then just loop with longs if we get that far
        final int longEnd = maxPtr1-8;
        while (ptr1 <= longEnd) {
            long l1 = unsafe.getLong(in, BYTE_ARRAY_OFFSET + ptr1);
            long l2 = unsafe.getLong(in, BYTE_ARRAY_OFFSET + ptr2);
            if (l1 != l2) {
                return ptr1 - base + _leadingBytes(l1, l2);
            }
            ptr1 += 8;
            ptr2 += 8;
        }
        // or, if running out of runway, handle last bytes with loop-de-loop...
        while (ptr1 < maxPtr1 && in[ptr1] == in[ptr2]) {
            ++ptr1;
            ++ptr2;
        }
        return ptr1 - base; // i.e. 
    }

    /* With Big-Endian, in-memory layout is "natural", so what we consider
     * leading is also leading for in-register.
     */
    
    private final static int _leadingBytes(int i1, int i2) {
        return Integer.numberOfLeadingZeros(i1 ^ i2) >> 3;
    }
    
    private final static int _leadingBytes(long l1, long l2) {
        return Long.numberOfLeadingZeros(l1 ^ l2) >> 3;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy