All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.korlibs.compression.lzo.LzoRawCompressor.kt Maven / Gradle / Ivy

The newest version!
package korlibs.compression.lzo

import korlibs.compression.lzo.internal.*

@ExperimentalStdlibApi
object LzoRawCompressor {
    const val LAST_LITERAL_SIZE = 5
    const val MIN_MATCH = 4
    private const val MAX_INPUT_SIZE = 0x7E000000 /* 2 113 929 216 bytes */
    private const val HASH_LOG = 12
    private const val MIN_TABLE_SIZE = 16
    const val MAX_TABLE_SIZE = 1 shl HASH_LOG
    private const val COPY_LENGTH = 8
    private const val MATCH_FIND_LIMIT = COPY_LENGTH + MIN_MATCH
    private const val MIN_LENGTH = MATCH_FIND_LIMIT + 1
    private const val ML_BITS = 4
    private const val RUN_BITS = 8 - ML_BITS
    private const val RUN_MASK = (1 shl RUN_BITS) - 1
    private const val MAX_DISTANCE = 49152 - 1
    private const val SKIP_TRIGGER = 6 /* Increase this value ==> compression run slower on incompressible data */
    private fun hash(value: Long, mask: Int): Int {
        // Multiplicative hash. It performs the equivalent to
        // this computation:
        //
        //  value * frac(a)
        //
        // for some real number 'a' with a good & random mix
        // of 1s and 0s in its binary representation
        //
        // For performance, it does it using fixed point math
        return (value * 889523592379L ushr 28 and mask.toLong()).toInt()
    }

    fun maxCompressedLength(sourceLength: Int): Int {
        return sourceLength + sourceLength / 255 + 16
    }

    fun compress(
        inputBase: ByteArray,
        inputAddress: Int,
        inputLength: Int,
        outputBase: ByteArray,
        outputAddress: Int,
        maxOutputLength: Int,
        table: IntArray = IntArray(MAX_TABLE_SIZE)
    ): Int {
        val tableSize = computeTableSize(inputLength)
        table.fill(0)
        val mask = tableSize - 1
        require(inputLength <= MAX_INPUT_SIZE) { "Max input length exceeded" }
        require(maxOutputLength >= maxCompressedLength(inputLength)) {
            "Max output length must be larger than " + maxCompressedLength(inputLength)
        }

        // nothing compresses to nothing
        if (inputLength == 0) {
            return 0
        }
        var input = inputAddress
        var output = outputAddress
        val inputLimit = inputAddress + inputLength
        val matchFindLimit = inputLimit - MATCH_FIND_LIMIT
        val matchLimit = inputLimit - LAST_LITERAL_SIZE
        if (inputLength < MIN_LENGTH) {
            output = emitLastLiteral(true, outputBase, output, inputBase, input, inputLimit - input)
            return (output - outputAddress)
        }
        var anchor = input

        // First Byte
        // put position in hash
        table[hash(inputBase.getS64LE(input), mask)] = (input - inputAddress)
        input++
        var nextHash = hash(inputBase.getS64LE(input), mask)
        var done = false
        var firstLiteral = true
        do {
            var nextInputIndex = input
            var findMatchAttempts = 1 shl SKIP_TRIGGER
            var step = 1

            // find 4-byte match
            var matchIndex: Int
            do {
                val hash = nextHash
                input = nextInputIndex
                nextInputIndex += step
                step = findMatchAttempts++ ushr SKIP_TRIGGER
                if (nextInputIndex > matchFindLimit) {
                    output = emitLastLiteral(firstLiteral, outputBase, output, inputBase, anchor, inputLimit - anchor)
                    return (output - outputAddress)
                }

                // get position on hash
                matchIndex = inputAddress + table[hash]
                nextHash = hash(inputBase.getS64LE(nextInputIndex), mask)

                // put position on hash
                table[hash] = (input - inputAddress)
            } while (inputBase.getS32LE(matchIndex) != inputBase.getS32LE(input) || matchIndex + MAX_DISTANCE < input
            )

            // catch up
            while (input > anchor && matchIndex > inputAddress && inputBase.getU8(input - 1) == inputBase.getU8(
                    matchIndex - 1
                )
            ) {
                --input
                --matchIndex
            }
            val literalLength = (input - anchor)
            output = emitLiteral(firstLiteral, inputBase, anchor, outputBase, output, literalLength)
            firstLiteral = false

            // next match
            while (true) {
                val offset = (input - matchIndex)

                // find match length
                input += MIN_MATCH
                val matchLength = count(inputBase, input, matchIndex + MIN_MATCH, matchLimit)
                input += matchLength

                // write copy command
                output = emitCopy(outputBase, output, offset, matchLength + MIN_MATCH)
                anchor = input

                // are we done?
                if (input > matchFindLimit) {
                    done = true
                    break
                }
                val position = input - 2

                table[hash(inputBase.getS64LE(position), mask)] = (position - inputAddress)

                // Test next position
                val hash = hash(inputBase.getS64LE(input), mask)
                matchIndex = inputAddress + table[hash]
                table[hash] = (input - inputAddress)
                if (matchIndex + MAX_DISTANCE < input || inputBase.getS32LE(matchIndex) != inputBase.getS32LE(input)) {
                    input++
                    nextHash = hash(inputBase.getS64LE(input), mask)
                    break
                }

                // go for another match
            }
        } while (!done)

        // Encode Last Literals
        output = emitLastLiteral(false, outputBase, output, inputBase, anchor, inputLimit - anchor)
        return (output - outputAddress)
    }

    private fun count(inputBase: ByteArray, start: Int, matchStart: Int, matchLimit: Int): Int {
        var matchStart = matchStart
        var current = start

        // first, compare long at a time
        while (current < matchLimit - (LzoConstants.SIZE_OF_LONG - 1)) {
            val diff: Long = inputBase.getS64LE(matchStart) xor inputBase.getS64LE(current)
            if (diff != 0L) {
                current += (diff.countTrailingZeroBits() shr 3)
                return (current - start)
            }
            current += LzoConstants.SIZE_OF_LONG
            matchStart += LzoConstants.SIZE_OF_LONG
        }
        if (current < matchLimit - (LzoConstants.SIZE_OF_INT - 1) && inputBase.getU32LE(matchStart) == inputBase.getU32LE(
                current
            )
        ) {
            current += LzoConstants.SIZE_OF_INT
            matchStart += LzoConstants.SIZE_OF_INT
        }
        if (current < matchLimit - (LzoConstants.SIZE_OF_SHORT - 1) && inputBase.getU16LE(matchStart) == inputBase.getU16LE(
                current
            )
        ) {
            current += LzoConstants.SIZE_OF_SHORT
            matchStart += LzoConstants.SIZE_OF_SHORT
        }
        if (current < matchLimit && inputBase.getU8(matchStart) == inputBase.getU8(current)
        ) {
            ++current
        }
        return (current - start)
    }

    private fun emitLastLiteral(
        firstLiteral: Boolean,
        outputBase: ByteArray,
        output: Int,
        inputBase: ByteArray,
        inputAddress: Int,
        literalLength: Int
    ): Int {
        var output = output
        output = encodeLiteralLength(firstLiteral, outputBase, output, literalLength)
        arraycopy(inputBase, inputAddress, outputBase, output, literalLength)
        output += literalLength

        // write stop command
        // this is a 0b0001_HMMM command with a zero match offset
        outputBase.set8(output++, 17)
        outputBase.set16LE(output, 0)
        output += LzoConstants.SIZE_OF_SHORT
        return output
    }

    private fun emitLiteral(
        firstLiteral: Boolean,
        inputBase: ByteArray,
        input: Int,
        outputBase: ByteArray,
        output: Int,
        literalLength: Int
    ): Int {
        var input = input
        var output = output
        output = encodeLiteralLength(firstLiteral, outputBase, output, literalLength)
        val outputLimit = output + literalLength
        do {
            outputBase.set64LE(output, inputBase.getS64LE(input))
            input += LzoConstants.SIZE_OF_LONG
            output += LzoConstants.SIZE_OF_LONG
        } while (output < outputLimit)
        return outputLimit
    }

    private fun encodeLiteralLength(
        firstLiteral: Boolean,
        outBase: ByteArray,
        output: Int,
        length: Int
    ): Int {
        var output = output
        var length = length
        if (firstLiteral && length < 0xFF - 17) {
            outBase.set8(output++, (length + 17))
        } else if (length < 4) {
            // Small literals are encoded in the low two bits trailer of the previous command.  The
            // trailer is a little endian short, so we need to adjust the byte 2 back in the output.
            outBase.set8(output - 2, outBase.getU8(output - 2) or length)
        } else {
            length -= 3
            if (length > RUN_MASK) {
                outBase.set8(output++, 0)
                var remaining = length - RUN_MASK
                while (remaining > 255) {
                    outBase.set8(output++, 0)
                    remaining -= 255
                }
                outBase.set8(output++, remaining)
            } else {
                outBase.set8(output++, length)
            }
        }
        return output
    }

    private fun emitCopy(outputBase: ByteArray, output: Int, matchOffset: Int, matchLength: Int): Int {
        var output = output
        var matchOffset = matchOffset
        var matchLength = matchLength
        require(!(matchOffset > MAX_DISTANCE || matchOffset < 1)) { "Unsupported copy offset: $matchOffset" }

        // use short command for small copy with small offset
        if (matchLength <= 8 && matchOffset <= 2048) {
            // 0bMMMP_PPLL 0bPPPP_PPPP

            // encodes matchLength and matchOffset - 1
            matchLength--
            matchOffset--
            outputBase.set8(output++, (matchLength shl 5 or (matchOffset and 7 shl 2)))
            outputBase.set8(output++, (matchOffset ushr 3))
            return output
        }

        // lzo encodes matchLength - 2
        matchLength -= 2
        if (matchOffset >= 1 shl 15) {
            // 0b0001_1MMM (0bMMMM_MMMM)* 0bPPPP_PPPP_PPPP_PPLL
            output = encodeMatchLength(outputBase, output, matchLength, 7, 24)
        } else if (matchOffset > 1 shl 14) {
            // 0b0001_0MMM (0bMMMM_MMMM)* 0bPPPP_PPPP_PPPP_PPLL
            output = encodeMatchLength(outputBase, output, matchLength, 7, 16)
        } else {
            // 0b001M_MMMM (0bMMMM_MMMM)* 0bPPPP_PPPP_PPPP_PPLL
            output = encodeMatchLength(outputBase, output, matchLength, 31, 32)

            // this command encodes matchOffset - 1
            matchOffset--
        }
        output = encodeOffset(outputBase, output, matchOffset)
        return output
    }

    private fun encodeOffset(outputBase: ByteArray, outputAddress: Int, offset: Int): Int {
        outputBase.set16LE(outputAddress, (offset shl 2))
        return outputAddress + 2
    }

    private fun encodeMatchLength(
        outputBase: ByteArray,
        output: Int,
        matchLength: Int,
        baseMatchLength: Int,
        command: Int
    ): Int {
        var output = output
        if (matchLength <= baseMatchLength) {
            outputBase.set8(output++, command or matchLength)
        } else {
            outputBase.set8(output++, command)
            var remaining = (matchLength - baseMatchLength).toLong()
            while (remaining > 510) {
                outputBase.set16LE(output, 0)
                output += LzoConstants.SIZE_OF_SHORT
                remaining -= 510
            }
            if (remaining > 255) {
                outputBase.set8(output++, 0)
                remaining -= 255
            }
            outputBase.set8(output++, remaining)
        }
        return output
    }

    private fun computeTableSize(inputSize: Int): Int {
        // smallest power of 2 larger than inputSize
        val target: Int = (inputSize - 1).takeHighestOneBit() shl 1

        // keep it between MIN_TABLE_SIZE and MAX_TABLE_SIZE
        return kotlin.math.max(kotlin.math.min(target, MAX_TABLE_SIZE), MIN_TABLE_SIZE)
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy