All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.tukaani.xz.lzma.LZMAEncoderNormal Maven / Gradle / Ivy

The newest version!
/*
 * LZMAEncoderNormal
 *
 * Authors: Lasse Collin 
 *          Igor Pavlov 
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */

package org.tukaani.xz.lzma;

import org.tukaani.xz.lz.LZEncoder;
import org.tukaani.xz.lz.Matches;
import org.tukaani.xz.rangecoder.RangeEncoder;

final class LZMAEncoderNormal extends LZMAEncoder {
    private static final int OPTS = 4096;

    private static int EXTRA_SIZE_BEFORE = OPTS;
    private static int EXTRA_SIZE_AFTER = OPTS;

    private final Optimum[] opts = new Optimum[OPTS];
    private int optCur = 0;
    private int optEnd = 0;

    private Matches matches;

    // These are fields solely to avoid allocating the objects again and
    // again on each function call.
    private final int[] repLens = new int[REPS];
    private final State nextState = new State();

    static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) {
        return LZEncoder.getMemoryUsage(dictSize,
                   Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE),
                   EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf)
               + OPTS * 64 / 1024;
    }

    LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb,
                             int dictSize, int extraSizeBefore,
                             int niceLen, int mf, int depthLimit) {
        super(rc, LZEncoder.getInstance(dictSize,
                                        Math.max(extraSizeBefore,
                                                 EXTRA_SIZE_BEFORE),
                                        EXTRA_SIZE_AFTER,
                                        niceLen, MATCH_LEN_MAX,
                                        mf, depthLimit),
              lc, lp, pb, dictSize, niceLen);

        for (int i = 0; i < OPTS; ++i)
            opts[i] = new Optimum();
    }

    public void reset() {
        optCur = 0;
        optEnd = 0;
        super.reset();
    }

    /**
     * Converts the opts array from backward indexes to forward indexes.
     * Then it will be simple to get the next symbol from the array
     * in later calls to getNextSymbol().
     */
    private int convertOpts() {
        optEnd = optCur;

        int optPrev = opts[optCur].optPrev;

        do {
            Optimum opt = opts[optCur];

            if (opt.prev1IsLiteral) {
                opts[optPrev].optPrev = optCur;
                opts[optPrev].backPrev = -1;
                optCur = optPrev--;

                if (opt.hasPrev2) {
                    opts[optPrev].optPrev = optPrev + 1;
                    opts[optPrev].backPrev = opt.backPrev2;
                    optCur = optPrev;
                    optPrev = opt.optPrev2;
                }
            }

            int temp = opts[optPrev].optPrev;
            opts[optPrev].optPrev = optCur;
            optCur = optPrev;
            optPrev = temp;
        } while (optCur > 0);

        optCur = opts[0].optPrev;
        back = opts[optCur].backPrev;
        return optCur;
    }

    int getNextSymbol() {
        // If there are pending symbols from an earlier call to this
        // function, return those symbols first.
        if (optCur < optEnd) {
            int len = opts[optCur].optPrev - optCur;
            optCur = opts[optCur].optPrev;
            back = opts[optCur].backPrev;
            return len;
        }

        assert optCur == optEnd;
        optCur = 0;
        optEnd = 0;
        back = -1;

        if (readAhead == -1)
            matches = getMatches();

        // Get the number of bytes available in the dictionary, but
        // not more than the maximum match length. If there aren't
        // enough bytes remaining to encode a match at all, return
        // immediately to encode this byte as a literal.
        int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX);
        if (avail < MATCH_LEN_MIN)
            return 1;

        // Get the lengths of repeated matches.
        int repBest = 0;
        for (int rep = 0; rep < REPS; ++rep) {
            repLens[rep] = lz.getMatchLen(reps[rep], avail);

            if (repLens[rep] < MATCH_LEN_MIN) {
                repLens[rep] = 0;
                continue;
            }

            if (repLens[rep] > repLens[repBest])
                repBest = rep;
        }

        // Return if the best repeated match is at least niceLen bytes long.
        if (repLens[repBest] >= niceLen) {
            back = repBest;
            skip(repLens[repBest] - 1);
            return repLens[repBest];
        }

        // Initialize mainLen and mainDist to the longest match found
        // by the match finder.
        int mainLen = 0;
        int mainDist = 0;
        if (matches.count > 0) {
            mainLen = matches.len[matches.count - 1];
            mainDist = matches.dist[matches.count - 1];

            // Return if it is at least niceLen bytes long.
            if (mainLen >= niceLen) {
                back = mainDist + REPS;
                skip(mainLen - 1);
                return mainLen;
            }
        }

        int curByte = lz.getByte(0);
        int matchByte = lz.getByte(reps[0] + 1);

        // If the match finder found no matches and this byte cannot be
        // encoded as a repeated match (short or long), we must be return
        // to have the byte encoded as a literal.
        if (mainLen < MATCH_LEN_MIN && curByte != matchByte
                && repLens[repBest] < MATCH_LEN_MIN)
            return 1;


        int pos = lz.getPos();
        int posState = pos & posMask;

        // Calculate the price of encoding the current byte as a literal.
        {
            int prevByte = lz.getByte(1);
            int literalPrice = literalEncoder.getPrice(curByte, matchByte,
                                                       prevByte, pos, state);
            opts[1].set1(literalPrice, 0, -1);
        }

        int anyMatchPrice = getAnyMatchPrice(state, posState);
        int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);

        // If it is possible to encode this byte as a short rep, see if
        // it is cheaper than encoding it as a literal.
        if (matchByte == curByte) {
            int shortRepPrice = getShortRepPrice(anyRepPrice,
                                                 state, posState);
            if (shortRepPrice < opts[1].price)
                opts[1].set1(shortRepPrice, 0, 0);
        }

        // Return if there is neither normal nor long repeated match. Use
        // a short match instead of a literal if is is possible and cheaper.
        optEnd = Math.max(mainLen, repLens[repBest]);
        if (optEnd < MATCH_LEN_MIN) {
            assert optEnd == 0 : optEnd;
            back = opts[1].backPrev;
            return 1;
        }


        // Update the lookup tables for distances and lengths before using
        // those price calculation functions. (The price function above
        // don't need these tables.)
        updatePrices();

        // Initialize the state and reps of this position in opts[].
        // updateOptStateAndReps() will need these to get the new
        // state and reps for the next byte.
        opts[0].state.set(state);
        System.arraycopy(reps, 0, opts[0].reps, 0, REPS);

        // Initialize the prices for latter opts that will be used below.
        for (int i = optEnd; i >= MATCH_LEN_MIN; --i)
            opts[i].reset();

        // Calculate the prices of repeated matches of all lengths.
        for (int rep = 0; rep < REPS; ++rep) {
            int repLen = repLens[rep];
            if (repLen < MATCH_LEN_MIN)
                continue;

            int longRepPrice = getLongRepPrice(anyRepPrice, rep,
                                               state, posState);
            do {
                int price = longRepPrice + repLenEncoder.getPrice(repLen,
                                                                  posState);
                if (price < opts[repLen].price)
                    opts[repLen].set1(price, 0, rep);
            } while (--repLen >= MATCH_LEN_MIN);
        }

        // Calculate the prices of normal matches that are longer than rep0.
        {
            int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN);
            if (len <= mainLen) {
                int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
                                                           state);

                // Set i to the index of the shortest match that is
                // at least len bytes long.
                int i = 0;
                while (len > matches.len[i])
                    ++i;

                while (true) {
                    int dist = matches.dist[i];
                    int price = getMatchAndLenPrice(normalMatchPrice,
                                                    dist, len, posState);
                    if (price < opts[len].price)
                        opts[len].set1(price, 0, dist + REPS);

                    if (len == matches.len[i])
                        if (++i == matches.count)
                            break;

                    ++len;
                }
            }
        }


        avail = Math.min(lz.getAvail(), OPTS - 1);

        // Get matches for later bytes and optimize the use of LZMA symbols
        // by calculating the prices and picking the cheapest symbol
        // combinations.
        while (++optCur < optEnd) {
            matches = getMatches();
            if (matches.count > 0
                    && matches.len[matches.count - 1] >= niceLen)
                break;

            --avail;
            ++pos;
            posState = pos & posMask;

            updateOptStateAndReps();
            anyMatchPrice = opts[optCur].price
                            + getAnyMatchPrice(opts[optCur].state, posState);
            anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state);

            calc1BytePrices(pos, posState, avail, anyRepPrice);

            if (avail >= MATCH_LEN_MIN) {
                int startLen = calcLongRepPrices(pos, posState,
                                                 avail, anyRepPrice);
                if (matches.count > 0)
                    calcNormalMatchPrices(pos, posState, avail,
                                          anyMatchPrice, startLen);
            }
        }

        return convertOpts();
    }

    /**
     * Updates the state and reps for the current byte in the opts array.
     */
    private void updateOptStateAndReps() {
        int optPrev = opts[optCur].optPrev;
        assert optPrev < optCur;

        if (opts[optCur].prev1IsLiteral) {
            --optPrev;

            if (opts[optCur].hasPrev2) {
                opts[optCur].state.set(opts[opts[optCur].optPrev2].state);
                if (opts[optCur].backPrev2 < REPS)
                    opts[optCur].state.updateLongRep();
                else
                    opts[optCur].state.updateMatch();
            } else {
                opts[optCur].state.set(opts[optPrev].state);
            }

            opts[optCur].state.updateLiteral();
        } else {
            opts[optCur].state.set(opts[optPrev].state);
        }

        if (optPrev == optCur - 1) {
            // Must be either a short rep or a literal.
            assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1;

            if (opts[optCur].backPrev == 0)
                opts[optCur].state.updateShortRep();
            else
                opts[optCur].state.updateLiteral();

            System.arraycopy(opts[optPrev].reps, 0,
                             opts[optCur].reps, 0, REPS);
        } else {
            int back;
            if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) {
                optPrev = opts[optCur].optPrev2;
                back = opts[optCur].backPrev2;
                opts[optCur].state.updateLongRep();
            } else {
                back = opts[optCur].backPrev;
                if (back < REPS)
                    opts[optCur].state.updateLongRep();
                else
                    opts[optCur].state.updateMatch();
            }

            if (back < REPS) {
                opts[optCur].reps[0] = opts[optPrev].reps[back];

                int rep;
                for (rep = 1; rep <= back; ++rep)
                    opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1];

                for (; rep < REPS; ++rep)
                    opts[optCur].reps[rep] = opts[optPrev].reps[rep];
            } else {
                opts[optCur].reps[0] = back - REPS;
                System.arraycopy(opts[optPrev].reps, 0,
                                 opts[optCur].reps, 1, REPS - 1);
            }
        }
    }

    /**
     * Calculates prices of a literal, a short rep, and literal + rep0.
     */
    private void calc1BytePrices(int pos, int posState,
                                 int avail, int anyRepPrice) {
        // This will be set to true if using a literal or a short rep.
        boolean nextIsByte = false;

        int curByte = lz.getByte(0);
        int matchByte = lz.getByte(opts[optCur].reps[0] + 1);

        // Try a literal.
        int literalPrice = opts[optCur].price
                + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1),
                                          pos, opts[optCur].state);
        if (literalPrice < opts[optCur + 1].price) {
            opts[optCur + 1].set1(literalPrice, optCur, -1);
            nextIsByte = true;
        }

        // Try a short rep.
        if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur
                                      || opts[optCur + 1].backPrev != 0)) {
            int shortRepPrice = getShortRepPrice(anyRepPrice,
                                                 opts[optCur].state,
                                                 posState);
            if (shortRepPrice <= opts[optCur + 1].price) {
                opts[optCur + 1].set1(shortRepPrice, optCur, 0);
                nextIsByte = true;
            }
        }

        // If neither a literal nor a short rep was the cheapest choice,
        // try literal + long rep0.
        if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) {
            int lenLimit = Math.min(niceLen, avail - 1);
            int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit);

            if (len >= MATCH_LEN_MIN) {
                nextState.set(opts[optCur].state);
                nextState.updateLiteral();
                int nextPosState = (pos + 1) & posMask;
                int price = literalPrice
                            + getLongRepAndLenPrice(0, len,
                                                    nextState, nextPosState);

                int i = optCur + 1 + len;
                while (optEnd < i)
                    opts[++optEnd].reset();

                if (price < opts[i].price)
                    opts[i].set2(price, optCur, 0);
            }
        }
    }

    /**
     * Calculates prices of long rep and long rep + literal + rep0.
     */
    private int calcLongRepPrices(int pos, int posState,
                                  int avail, int anyRepPrice) {
        int startLen = MATCH_LEN_MIN;
        int lenLimit = Math.min(avail, niceLen);

        for (int rep = 0; rep < REPS; ++rep) {
            int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit);
            if (len < MATCH_LEN_MIN)
                continue;

            while (optEnd < optCur + len)
                opts[++optEnd].reset();

            int longRepPrice = getLongRepPrice(anyRepPrice, rep,
                                               opts[optCur].state, posState);

            for (int i = len; i >= MATCH_LEN_MIN; --i) {
                int price = longRepPrice
                            + repLenEncoder.getPrice(i, posState);
                if (price < opts[optCur + i].price)
                    opts[optCur + i].set1(price, optCur, rep);
            }

            if (rep == 0)
                startLen = len + 1;

            int len2Limit = Math.min(niceLen, avail - len - 1);
            int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep],
                                      len2Limit);

            if (len2 >= MATCH_LEN_MIN) {
                // Rep
                int price = longRepPrice
                            + repLenEncoder.getPrice(len, posState);
                nextState.set(opts[optCur].state);
                nextState.updateLongRep();

                // Literal
                int curByte = lz.getByte(len, 0);
                int matchByte = lz.getByte(0); // lz.getByte(len, len)
                int prevByte = lz.getByte(len, 1);
                price += literalEncoder.getPrice(curByte, matchByte, prevByte,
                                                 pos + len, nextState);
                nextState.updateLiteral();

                // Rep0
                int nextPosState = (pos + len + 1) & posMask;
                price += getLongRepAndLenPrice(0, len2,
                                               nextState, nextPosState);

                int i = optCur + len + 1 + len2;
                while (optEnd < i)
                    opts[++optEnd].reset();

                if (price < opts[i].price)
                    opts[i].set3(price, optCur, rep, len, 0);
            }
        }

        return startLen;
    }

    /**
     * Calculates prices of a normal match and normal match + literal + rep0.
     */
    private void calcNormalMatchPrices(int pos, int posState, int avail,
                                       int anyMatchPrice, int startLen) {
        // If the longest match is so long that it would not fit into
        // the opts array, shorten the matches.
        if (matches.len[matches.count - 1] > avail) {
            matches.count = 0;
            while (matches.len[matches.count] < avail)
                ++matches.count;

            matches.len[matches.count++] = avail;
        }

        if (matches.len[matches.count - 1] < startLen)
            return;

        while (optEnd < optCur + matches.len[matches.count - 1])
            opts[++optEnd].reset();

        int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
                                                   opts[optCur].state);

        int match = 0;
        while (startLen > matches.len[match])
            ++match;

        for (int len = startLen; ; ++len) {
            int dist = matches.dist[match];

            // Calculate the price of a match of len bytes from the nearest
            // possible distance.
            int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice,
                                                       dist, len, posState);
            if (matchAndLenPrice < opts[optCur + len].price)
                opts[optCur + len].set1(matchAndLenPrice,
                                        optCur, dist + REPS);

            if (len != matches.len[match])
                continue;

            // Try match + literal + rep0. First get the length of the rep0.
            int len2Limit = Math.min(niceLen, avail - len - 1);
            int len2 = lz.getMatchLen(len + 1, dist, len2Limit);

            if (len2 >= MATCH_LEN_MIN) {
                nextState.set(opts[optCur].state);
                nextState.updateMatch();

                // Literal
                int curByte = lz.getByte(len, 0);
                int matchByte = lz.getByte(0); // lz.getByte(len, len)
                int prevByte = lz.getByte(len, 1);
                int price = matchAndLenPrice
                        + literalEncoder.getPrice(curByte, matchByte,
                                                  prevByte, pos + len,
                                                  nextState);
                nextState.updateLiteral();

                // Rep0
                int nextPosState = (pos + len + 1) & posMask;
                price += getLongRepAndLenPrice(0, len2,
                                               nextState, nextPosState);

                int i = optCur + len + 1 + len2;
                while (optEnd < i)
                    opts[++optEnd].reset();

                if (price < opts[i].price)
                    opts[i].set3(price, optCur, dist + REPS, len, 0);
            }

            if (++match == matches.count)
                break;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy