All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.korlibs.compression.deflate.Deflater.kt Maven / Gradle / Ivy

The newest version!
package korlibs.compression.deflate

import kotlin.math.*

// @TODO: Use sync API, suspend but do not use asynchronous stuff. Simplify interface for input and output
// @TODO: Act like a sequence of status (waiting for filling input, waiting for filling output, etc.)
// @TODO: Use = sequence { yield(NEED_MORE_INPUT) }
internal open class DeflaterPortable(val windowBits: Int) : IDeflaterInternal {
    override suspend fun compress(
        i: DeflaterBitReader,
        o: DeflaterAsyncOutputStream,
        level: Float
    ) {
        while (i.hasAvailable()) {
            val available = i.getAvailable()
            val chunkSize = min(available, 0xFFFFL).toInt()
            o.write8(if (chunkSize >= available) 1 else 0)
            o.write16LE(chunkSize)
            o.write16LE(chunkSize.inv())
            //for (n in 0 until chunkSize) o.write8(i.readU8())
            o.writeBytes(i.readBytesExact(chunkSize))
        }
    }

    override suspend fun uncompress(reader: DeflaterBitReader, out: DeflaterAsyncOutputStream) {
        //println("reader.bigChunkSize=${reader.bigChunkSize}, reader.readWithSize=${reader.readWithSize}")
        val sout = SlidingWindowWithOutput(SlidingWindow(windowBits), out, reader.bigChunkSize, reader.readWithSize)
        var lastBlock = false
        val tempTree = HuffmanTree()
        val tempDist = HuffmanTree()
        val codeLenCodeLen = IntArray(32)
        val lengths = IntArray(512)
        //println("uncompress[0]")
        while (!lastBlock) {
            sout.flushIfRequired()
            reader.prepareBigChunkIfRequired()
            //println("uncompress[1]")

            lastBlock = reader.sreadBit()
            val blockType = reader.readBits(2)
            if (blockType !in 0..2) error("invalid bit")

            //println("lastBlock=$lastBlock, btype=$blockType")

            if (blockType == 0) {
                //println("uncompress[2]")
                val len = reader.su16LE()
                val nlen = reader.su16LE()
                val nnlen = nlen.inv() and 0xFFFF
                if (len != nnlen) error("Invalid deflate stream: len($len) != ~nlen($nnlen) :: nlen=$nlen")
                val bytes = reader.abytes(len)
                sout.putOut(bytes, 0, len)
                sout.flushIfRequired()
            } else {
                //println("uncompress[3]")
                val tree: HuffmanTree
                val dist: HuffmanTree
                if (blockType == 1) {
                    tree = FIXED_TREE
                    dist = FIXED_DIST
                }
                else {
                    val hlit = reader.readBits(5) + 257
                    val hdist = reader.readBits(5) + 1
                    val hclen = reader.readBits(4) + 4
                    codeLenCodeLen.fill(0)
                    for (i in 0 until hclen) codeLenCodeLen[HCLENPOS[i]] = reader.readBits(3)
                    //console.info(codeLenCodeLen);
                    tempTree.setFromLengths(codeLenCodeLen)
                    val codeLen = tempTree
                    val hlithdist = hlit + hdist
                    var n = 0
                    lengths.fill(0)
                    while (n < hlithdist) {
                        val value = reader.read(codeLen)
                        if (value !in 0..18) error("Invalid")

                        val len = when (value) {
                            16 -> reader.readBits(2) + 3
                            17 -> reader.readBits(3) + 3
                            18 -> reader.readBits(7) + 11
                            else -> 1
                        }
                        val vv = when (value) {
                            16 -> lengths[n - 1]
                            17 -> 0
                            18 -> 0
                            else -> value
                        }

                        lengths.fill(vv, n, n + len)
                        n += len
                    }
                    tempTree.setFromLengths(lengths, 0, hlit)
                    tempDist.setFromLengths(lengths, hlit, hlithdist)
                    tree = tempTree
                    dist = tempDist
                }
                while (true) {
                    // @TODO: Read in parallel while decoding
                    reader.prepareBigChunkIfRequired()
                    val value = tree.read(reader)
                    if (value == 256) break
                    if (value < 256) {
                        sout.putOut(value.toByte())
                    } else {
                        val zlenof = value - 257
                        val lengthExtra = reader.readBits(LEN_EXTRA[zlenof])
                        val distanceData = reader.read(dist)
                        val distanceExtra = reader.readBits(DIST_EXTRA[distanceData])
                        val distance = DIST_BASE[distanceData] + distanceExtra
                        val length = LEN_BASE[zlenof] + lengthExtra
                        sout.getPutCopyOut(distance, length)
                    }
                    sout.flushIfRequired()
                }
            }
        }
        //println("uncompress[4]")
        sout.flushIfRequired(finish = true)
        //println("uncompress[5]")
    }

    private inline fun DeflaterBitReader.read(tree: HuffmanTree): Int = tree.read(this)

    internal class SlidingWindowWithOutput(
        val sliding: SlidingWindow,
        val out: DeflaterAsyncOutputStream,
        val flushSize: Int = FLUSH_SIZE,
        val extraSize: Int = EXTRA_SIZE
    ) {
        companion object {
            const val FLUSH_SIZE = 8 * 1024 * 1024
            const val EXTRA_SIZE = 128 * 1024
            //const val FLUSH_SIZE = 4 * 1024
            //const val EXTRA_SIZE = 4 * 1024
        }

        // @TODO: Optimize with buffering and copying
        val bab = FixedSizeByteArrayBuilder(flushSize + extraSize)
        //val bab = ByteArrayBuilder(flushSize + EXTRA_SIZE)

        val output get() = bab.size
        val mustFlush get() = bab.size >= flushSize

        fun getPutCopyOut(distance: Int, length: Int) {
            //print("LZ: distance=$distance, length=$length   :: ")
            for (n in 0 until length) {
                val v = sliding.getPut(distance)
                bab.appendFast(v.toByte())
                //print("$v,")
            }
            //println()
        }

        fun putOut(bytes: ByteArray, offset: Int, len: Int) {
            //print("BYTES: $len ::")
            bab.append(bytes, offset, len)
            sliding.putBytes(bytes, offset, len)
            //for (n in 0 until len) print("${bytes[offset + n].toUnsigned()},")
            //println()
        }

        fun putOut(byte: Byte) {
            //println("BYTE: $byte")
            bab.appendFast(byte)
            sliding.put(byte.unsigned)
        }

        suspend inline fun flush() {
            //print("FLUSH[$finish][${bab.size}]")
            //for (n in 0 until bab.size) print("${bab.data[n]},")
            //println()
            out.write(bab.data, 0, bab.size)
            bab.clear()
        }

        suspend inline fun flushIfRequired(finish: Boolean = false) {
            if (finish || mustFlush) flush()
        }
    }

    companion object : DeflaterPortable(15) {
        private val FIXED_TREE: HuffmanTree = HuffmanTree().setFromLengths(IntArray(288).apply {
            for (n in 0..143) this[n] = 8
            for (n in 144..255) this[n] = 9
            for (n in 256..279) this[n] = 7
            for (n in 280..287) this[n] = 8
        })
        private val FIXED_DIST: HuffmanTree = HuffmanTree().setFromLengths(IntArray(32) { 5 })

        private val LEN_EXTRA = intArrayOf(
            0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0
        )

        private val LEN_BASE = intArrayOf(
            3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
            15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
            67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0
        )

        private val DIST_EXTRA = intArrayOf(
            0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
        )

        private val DIST_BASE = intArrayOf(
            1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
            257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0
        )

        private val HCLENPOS = intArrayOf(16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15)
    }

    internal class HuffmanTree {
        companion object {
            private const val INVALID_VALUE = -1
            private const val INCOMPLETE_VALUE = -2
            private const val NIL = 1023
            private const val MAX_LEN = 16
            private const val MAX_CODES = 288

            //private const val ENABLE_EXPERIMENTAL_FAST_READ = false
            private const val ENABLE_EXPERIMENTAL_FAST_READ = true
            private const val ENABLE_EXPERIMENTAL_FAST_READ_V2 = true
            //private const val ENABLE_EXPERIMENTAL_FAST_READ = false

            //private const val FAST_BITS = 9
            private const val FAST_BITS = 10
            //private const val FAST_BITS = 11
            //private const val FAST_BITS = 12
            //private const val FAST_BITS = 14
        }

        //private val data = IntArray(3 * 1024)

        private val value = IntArray(1024)
        private val left = IntArray(1024)
        private val right = IntArray(1024)

        private var nodeOffset = 0
        private var root: Int = NIL
        private var ncodes: Int = 0

        // Low half-word contains the value, High half-word contains the len
        val FAST_INFO = IntArray(1 shl FAST_BITS) { INVALID_VALUE }
        val FAST_NODE = IntArray(1 shl FAST_BITS) { 0 }

        //var fastReadCount = 0
        //var slowReadCount = 0

        fun read(reader: DeflaterBitReader): Int {
            if (ENABLE_EXPERIMENTAL_FAST_READ) reader.ensureBits(FAST_BITS)
            var node = this.root
            if (ENABLE_EXPERIMENTAL_FAST_READ && reader.bitsavailable >= FAST_BITS) {
                //println("${reader.bitsavailable} >= $FAST_BITS")
                val bits = reader.peekBits(FAST_BITS)
                val raw = FAST_INFO[bits]
                val value = raw.toShort().toInt()
                val len = raw shr 16
                if (len > 0) {
                    //println("BITS1[raw=${raw.hex}]: len=$len, value=$value")
                    reader.skipBits(len)
                    if (value == INCOMPLETE_VALUE) {
                        node = FAST_NODE[bits]
                    } else {
                        //fastReadCount++
                        return value
                    }
                }
            }
            do {
                node = if (reader.sreadBit()) node.right else node.left
            } while (node != NIL && node.value == INVALID_VALUE)
            //println("BITS2: ${node.value}")
            //slowReadCount++
            return node.value
        }

        private fun resetAlloc() {
            nodeOffset = 0
        }

        private fun alloc(value: Int, left: Int, right: Int): Int {
            return (nodeOffset++).apply {
                //[email protected][0 + this] = value
                //[email protected][1024 + this] = left
                //[email protected][2048 + this] = right
                [email protected][this] = value
                [email protected][this] = left
                [email protected][this] = right
            }
        }

        //private inline val Int.value get() = [email protected][0 + this]
        //private inline val Int.left get() = [email protected][1024 + this]
        //private inline val Int.right get() = [email protected][2048 + this]

        private inline val Int.value get() = [email protected][this]
        private inline val Int.left get() = [email protected][this]
        private inline val Int.right get() = [email protected][this]

        private fun allocLeaf(value: Int): Int = alloc(value, NIL, NIL)
        private fun allocNode(left: Int, right: Int): Int = alloc(INVALID_VALUE, left, right)

        private val COUNTS = IntArray(MAX_LEN + 1)
        private val OFFSETS = IntArray(MAX_LEN + 1)
        private val COFFSET = IntArray(MAX_LEN + 1)
        private val CODES = IntArray(MAX_CODES)

        fun setFromLengths(codeLengths: IntArray, start: Int = 0, end: Int = codeLengths.size): HuffmanTree {
            var oldOffset = 0
            var oldCount = 0
            val ncodes = end - start

            //println("fastReadCount=$fastReadCount, slowReadCount=$slowReadCount")
            //fastReadCount = 0
            //slowReadCount = 0

            resetAlloc()

            COUNTS.fill(0)

            // Compute the count of codes per length
            for (n in start until end) {
                val codeLen = codeLengths[n]
                if (codeLen !in 0..MAX_LEN) error("Invalid HuffmanTree.codeLengths $codeLen")
                COUNTS[codeLen]++
            }

            // Compute the disposition using the counts per length
            var currentOffset = 0
            for (n in 0 until MAX_LEN) {
                val count = COUNTS[n]
                OFFSETS[n] = currentOffset
                COFFSET[n] = currentOffset
                currentOffset += count
            }

            // Place elements in the computed disposition
            for (n in start until end) {
                val codeLen = codeLengths[n]
                CODES[COFFSET[codeLen]++] = n - start
            }

            for (i in MAX_LEN downTo 1) {
                val newOffset = nodeOffset

                val OFFSET = OFFSETS[i]
                val SIZE = COUNTS[i]
                for (j in 0 until SIZE) allocLeaf(CODES[OFFSET + j])
                for (j in 0 until oldCount step 2) allocNode(oldOffset + j, oldOffset + j + 1)

                oldOffset = newOffset
                oldCount = SIZE + oldCount / 2
                if (oldCount >= 2 && oldCount % 2 != 0) {
                    error("This canonical code does not represent a Huffman code tree: $oldCount")
                }
            }
            if (oldCount != 2) {
                error("This canonical code does not represent a Huffman code tree")
            }

            this.root = allocNode(nodeOffset - 2, nodeOffset - 1)
            this.ncodes = ncodes

            if (ENABLE_EXPERIMENTAL_FAST_READ) {
                computeFastLookup()
            }

            return this
        }

        // @TODO: Optimize this
        private fun computeFastLookup() {
            if (ENABLE_EXPERIMENTAL_FAST_READ_V2) {
                FAST_INFO.fill(INVALID_VALUE)
                computeEncodedValues(root, 0, 0)
            } else {
                //println("computeEncodedValues: " + computeEncodedValues(root, 0, 0))
                //println("FAST_INFO.size=${FAST_INFO.size}")
                for (value in FAST_INFO.indices) {
                    var node = this.root
                    var bitcount = 0
                    do {
                        node = if (value.extractBool(bitcount++)) node.right else node.left
                    } while (node != NIL && node.value == INVALID_VALUE)
                    FAST_INFO[value] = if (bitcount > FAST_BITS) -1 else node.value or (bitcount shl 16)
                }
            }
        }

        private fun computeEncodedValues(node: Int, encoded: Int, encodedBits: Int) {
            if (node.value == INVALID_VALUE) {
                if (encodedBits < FAST_BITS) {
                    computeEncodedValues(node.left, encoded, encodedBits + 1)
                    computeEncodedValues(node.right, encoded or (1 shl encodedBits), encodedBits + 1)
                } else {
                    writeVariants(encoded, encodedBits, node, INCOMPLETE_VALUE)
                }
            } else {
                writeVariants(encoded, encodedBits, node, node.value)
                //println("encoded=$encoded, encodedBits=$encodedBits, nvalue=$nvalue, rangeCount=$rangeCount")
            }
        }

        private fun writeVariants(encoded: Int, encodedBits: Int, node: Int, nvalue: Int) {
            val encodedInfo = (nvalue and 0xFFFF) or (encodedBits shl 16)
            val rangeCount = 1 shl (FAST_BITS - encodedBits)
            for (n in 0 until rangeCount) {
                val i = encoded or (n shl encodedBits)
                FAST_INFO[i] = encodedInfo
                FAST_NODE[i] = node
            }
        }
    }

    internal class SlidingWindow(val nbits: Int) {
        val data = ByteArray(1 shl nbits)
        val mask = data.size - 1
        var pos = 0

        fun get(offset: Int): Int {
            return data[(pos - offset) and mask].toInt() and 0xFF
        }

        fun getPut(offset: Int): Int = put(get(offset))

        fun put(value: Int): Int {
            data[pos] = value.toByte()
            pos = (pos + 1) and mask
            return value
        }

        // @TODO: Optimize?
        fun putBytes(bytes: ByteArray, offset: Int, len: Int) {
            for (n in 0 until len) put(bytes[offset + n].unsigned)
        }
    }

    internal class FixedSizeByteArrayBuilder(public val data: ByteArray) {
        public val capacity: Int get() = data.size
        public var size: Int = 0
            private set
        public constructor(size: Int) : this(ByteArray(size))

        public fun clear() {
            size = 0
        }

        public fun append(array: ByteArray, offset: Int = 0, len: Int = array.size - offset) {
            arraycopy(array, offset, this.data, size, len)
            this.size += len
        }

        public fun appendFast(v: Byte) {
            data[size++] = v
        }

        public inline fun append(v: Byte): FixedSizeByteArrayBuilder {
            appendFast(v)
            return this
        }

        public fun append(vararg v: Byte): Unit = append(v)
        public fun append(vararg v: Int) {
            for (n in 0 until v.size) this.data[this.size + n] = v[n].toByte()
            this.size += v.size
        }

        public fun toByteArray(): ByteArray = data.copyOf(size)
    }
}

private inline fun Int.extractBool(offset: Int): Boolean = extract1(offset) != 0
private inline fun Int.extract1(offset: Int): Int = (this ushr offset) and 0b1
private val Byte.unsigned: Int get() = this.toInt() and 0xFF
private fun Int.signExtend(bits: Int): Int = (this shl (32 - bits)) shr (32 - bits) // Int.SIZE_BITS
private inline val Int.unsigned: Long get() = this.toLong() and 0xFFFFFFFFL
private fun arraycopy(src: ByteArray, srcPos: Int, dst: ByteArray, dstPos: Int, size: Int) {
    src.copyInto(dst, dstPos, srcPos, srcPos + size)
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy