META-INF.modules.java.base.classes.com.sun.java.util.jar.pack.PopulationCoding Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of java.base Show documentation
Show all versions of java.base Show documentation
Bytecoder java.base Module
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.java.util.jar.pack;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import static com.sun.java.util.jar.pack.Constants.*;
/**
* Population-based coding.
* See the section "Encodings of Uncorrelated Values" in the Pack200 spec.
* @author John Rose
*/
// This tactic alone reduces the final zipped rt.jar by about a percent.
class PopulationCoding implements CodingMethod {
Histogram vHist; // histogram of all values
int[] fValues; // list of favored values
int fVlen; // inclusive max index
long[] symtab; // int map of favored value -> token [1..#fValues]
CodingMethod favoredCoding;
CodingMethod tokenCoding;
CodingMethod unfavoredCoding;
int L = -1; //preferred L value for tokenCoding
public void setFavoredValues(int[] fValues, int fVlen) {
// Note: {f} is allFavoredValues[1..fvlen], not [0..fvlen-1].
// This is because zero is an exceptional favored value index.
assert(fValues[0] == 0); // must be empty
assert(this.fValues == null); // do not do this twice
this.fValues = fValues;
this.fVlen = fVlen;
if (L >= 0) {
setL(L); // reassert
}
}
public void setFavoredValues(int[] fValues) {
int lfVlen = fValues.length-1;
setFavoredValues(fValues, lfVlen);
}
public void setHistogram(Histogram vHist) {
this.vHist = vHist;
}
public void setL(int L) {
this.L = L;
if (L >= 0 && fValues != null && tokenCoding == null) {
tokenCoding = fitTokenCoding(fVlen, L);
assert(tokenCoding != null);
}
}
public static Coding fitTokenCoding(int fVlen, int L) {
// Find the smallest B s.t. (B,H,0) covers fVlen.
if (fVlen < 256)
// H/L do not matter when B==1
return BandStructure.BYTE1;
Coding longest = BandStructure.UNSIGNED5.setL(L);
if (!longest.canRepresentUnsigned(fVlen))
return null; // failure; L is too sharp and fVlen too large
Coding tc = longest;
for (Coding shorter = longest; ; ) {
shorter = shorter.setB(shorter.B()-1);
if (shorter.umax() < fVlen)
break;
tc = shorter; // shorten it by reducing B
}
return tc;
}
public void setFavoredCoding(CodingMethod favoredCoding) {
this.favoredCoding = favoredCoding;
}
public void setTokenCoding(CodingMethod tokenCoding) {
this.tokenCoding = tokenCoding;
this.L = -1;
if (tokenCoding instanceof Coding && fValues != null) {
Coding tc = (Coding) tokenCoding;
if (tc == fitTokenCoding(fVlen, tc.L()))
this.L = tc.L();
// Otherwise, it's a non-default coding.
}
}
public void setUnfavoredCoding(CodingMethod unfavoredCoding) {
this.unfavoredCoding = unfavoredCoding;
}
public int favoredValueMaxLength() {
if (L == 0)
return Integer.MAX_VALUE;
else
return BandStructure.UNSIGNED5.setL(L).umax();
}
public void resortFavoredValues() {
Coding tc = (Coding) tokenCoding;
// Make a local copy before reordering.
fValues = BandStructure.realloc(fValues, 1+fVlen);
// Resort favoredValues within each byte-size cadre.
int fillp = 1; // skip initial zero
for (int n = 1; n <= tc.B(); n++) {
int nmax = tc.byteMax(n);
if (nmax > fVlen)
nmax = fVlen;
if (nmax < tc.byteMin(n))
break;
int low = fillp;
int high = nmax+1;
if (high == low) continue;
assert(high > low)
: high+"!>"+low;
assert(tc.getLength(low) == n)
: n+" != len("+(low)+") == "+
tc.getLength(low);
assert(tc.getLength(high-1) == n)
: n+" != len("+(high-1)+") == "+
tc.getLength(high-1);
int midTarget = low + (high-low)/2;
int mid = low;
// Divide the values into cadres, and sort within each.
int prevCount = -1;
int prevLimit = low;
for (int i = low; i < high; i++) {
int val = fValues[i];
int count = vHist.getFrequency(val);
if (prevCount != count) {
if (n == 1) {
// For the single-byte encoding, keep strict order
// among frequency groups.
Arrays.sort(fValues, prevLimit, i);
} else if (Math.abs(mid - midTarget) >
Math.abs(i - midTarget)) {
// Find a single inflection point
// close to the middle of the byte-size cadre.
mid = i;
}
prevCount = count;
prevLimit = i;
}
}
if (n == 1) {
Arrays.sort(fValues, prevLimit, high);
} else {
// Sort up to the midpoint, if any.
Arrays.sort(fValues, low, mid);
Arrays.sort(fValues, mid, high);
}
assert(tc.getLength(low) == tc.getLength(mid));
assert(tc.getLength(low) == tc.getLength(high-1));
fillp = nmax+1;
}
assert(fillp == fValues.length);
// Reset symtab.
symtab = null;
}
public int getToken(int value) {
if (symtab == null)
symtab = makeSymtab();
int pos = Arrays.binarySearch(symtab, (long)value << 32);
if (pos < 0) pos = -pos-1;
if (pos < symtab.length && value == (int)(symtab[pos] >>> 32))
return (int)symtab[pos];
else
return 0;
}
public int[][] encodeValues(int[] values, int start, int end) {
// Compute token sequence.
int[] tokens = new int[end-start];
int nuv = 0;
for (int i = 0; i < tokens.length; i++) {
int val = values[start+i];
int tok = getToken(val);
if (tok != 0)
tokens[i] = tok;
else
nuv += 1;
}
// Compute unfavored value sequence.
int[] unfavoredValues = new int[nuv];
nuv = 0; // reset
for (int i = 0; i < tokens.length; i++) {
if (tokens[i] != 0) continue; // already covered
int val = values[start+i];
unfavoredValues[nuv++] = val;
}
assert(nuv == unfavoredValues.length);
return new int[][]{ tokens, unfavoredValues };
}
private long[] makeSymtab() {
long[] lsymtab = new long[fVlen];
for (int token = 1; token <= fVlen; token++) {
lsymtab[token-1] = ((long)fValues[token] << 32) | token;
}
// Index by value:
Arrays.sort(lsymtab);
return lsymtab;
}
private Coding getTailCoding(CodingMethod c) {
while (c instanceof AdaptiveCoding)
c = ((AdaptiveCoding)c).tailCoding;
return (Coding) c;
}
// CodingMethod methods.
public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException {
int[][] vals = encodeValues(a, start, end);
writeSequencesTo(out, vals[0], vals[1]);
}
void writeSequencesTo(OutputStream out, int[] tokens, int[] uValues) throws IOException {
favoredCoding.writeArrayTo(out, fValues, 1, 1+fVlen);
getTailCoding(favoredCoding).writeTo(out, computeSentinelValue());
tokenCoding.writeArrayTo(out, tokens, 0, tokens.length);
if (uValues.length > 0)
unfavoredCoding.writeArrayTo(out, uValues, 0, uValues.length);
}
int computeSentinelValue() {
Coding fc = getTailCoding(favoredCoding);
if (fc.isDelta()) {
// repeat the last favored value, using delta=0
return 0;
} else {
// else repeat the shorter of the min or last value
int min = fValues[1];
int last = min;
// (remember that fVlen is an inclusive limit in fValues)
for (int i = 2; i <= fVlen; i++) {
last = fValues[i];
min = moreCentral(min, last);
}
int endVal;
if (fc.getLength(min) <= fc.getLength(last))
return min;
else
return last;
}
}
public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException {
// Parameters are fCode, L, uCode.
setFavoredValues(readFavoredValuesFrom(in, end-start));
// Read the tokens. Read them into the final array, for the moment.
tokenCoding.readArrayFrom(in, a, start, end);
// Decode the favored tokens.
int headp = 0, tailp = -1;
int uVlen = 0;
for (int i = start; i < end; i++) {
int tok = a[i];
if (tok == 0) {
// Make a linked list, and decode in a second pass.
if (tailp < 0) {
headp = i;
} else {
a[tailp] = i;
}
tailp = i;
uVlen += 1;
} else {
a[i] = fValues[tok];
}
}
// Walk the linked list of "zero" locations, decoding unfavored vals.
int[] uValues = new int[uVlen];
if (uVlen > 0)
unfavoredCoding.readArrayFrom(in, uValues, 0, uVlen);
for (int i = 0; i < uVlen; i++) {
int nextp = a[headp];
a[headp] = uValues[i];
headp = nextp;
}
}
int[] readFavoredValuesFrom(InputStream in, int maxForDebug) throws IOException {
int[] lfValues = new int[1000]; // realloc as needed
// The set uniqueValuesForDebug records all favored values.
// As each new value is added, we assert that the value
// was not already in the set.
Set uniqueValuesForDebug = null;
assert((uniqueValuesForDebug = new HashSet<>()) != null);
int fillp = 1;
maxForDebug += fillp;
int min = Integer.MIN_VALUE; // farthest from the center
//int min2 = Integer.MIN_VALUE; // emulate buggy 150.7 spec.
int last = 0;
CodingMethod fcm = favoredCoding;
while (fcm instanceof AdaptiveCoding) {
AdaptiveCoding ac = (AdaptiveCoding) fcm;
int len = ac.headLength;
while (fillp + len > lfValues.length) {
lfValues = BandStructure.realloc(lfValues);
}
int newFillp = fillp + len;
ac.headCoding.readArrayFrom(in, lfValues, fillp, newFillp);
while (fillp < newFillp) {
int val = lfValues[fillp++];
assert(uniqueValuesForDebug.add(val));
assert(fillp <= maxForDebug);
last = val;
min = moreCentral(min, val);
//min2 = moreCentral2(min2, val, min);
}
fcm = ac.tailCoding;
}
Coding fc = (Coding) fcm;
if (fc.isDelta()) {
for (long state = 0;;) {
// Read a new value:
state += fc.readFrom(in);
int val;
if (fc.isSubrange())
val = fc.reduceToUnsignedRange(state);
else
val = (int)state;
state = val;
if (fillp > 1 && (val == last || val == min)) //|| val == min2
break;
if (fillp == lfValues.length)
lfValues = BandStructure.realloc(lfValues);
lfValues[fillp++] = val;
assert(uniqueValuesForDebug.add(val));
assert(fillp <= maxForDebug);
last = val;
min = moreCentral(min, val);
//min2 = moreCentral(min2, val);
}
} else {
for (;;) {
int val = fc.readFrom(in);
if (fillp > 1 && (val == last || val == min)) //|| val == min2
break;
if (fillp == lfValues.length)
lfValues = BandStructure.realloc(lfValues);
lfValues[fillp++] = val;
assert(uniqueValuesForDebug.add(val));
assert(fillp <= maxForDebug);
last = val;
min = moreCentral(min, val);
//min2 = moreCentral2(min2, val, min);
}
}
return BandStructure.realloc(lfValues, fillp);
}
private static int moreCentral(int x, int y) {
int kx = (x >> 31) ^ (x << 1);
int ky = (y >> 31) ^ (y << 1);
// bias kx/ky to get an unsigned comparison:
kx -= Integer.MIN_VALUE;
ky -= Integer.MIN_VALUE;
int xy = (kx < ky? x: y);
// assert that this ALU-ish version is the same:
assert(xy == moreCentralSlow(x, y));
return xy;
}
// private static int moreCentral2(int x, int y, int min) {
// // Strict implementation of buggy 150.7 specification.
// // The bug is that the spec. says absolute-value ties are broken
// // in favor of positive numbers, but the suggested implementation
// // (also mentioned in the spec.) breaks ties in favor of negatives.
// if (x + y == 0) return (x > y? x : y);
// return min;
// }
private static int moreCentralSlow(int x, int y) {
int ax = x;
if (ax < 0) ax = -ax;
if (ax < 0) return y; //x is MIN_VALUE
int ay = y;
if (ay < 0) ay = -ay;
if (ay < 0) return x; //y is MIN_VALUE
if (ax < ay) return x;
if (ax > ay) return y;
// At this point the absolute values agree, and the negative wins.
return x < y ? x : y;
}
static final int[] LValuesCoded
= { -1, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 };
public byte[] getMetaCoding(Coding dflt) {
int K = fVlen;
int LCoded = 0;
if (tokenCoding instanceof Coding) {
Coding tc = (Coding) tokenCoding;
if (tc.B() == 1) {
LCoded = 1;
} else if (L >= 0) {
assert(L == tc.L());
for (int i = 1; i < LValuesCoded.length; i++) {
if (LValuesCoded[i] == L) { LCoded = i; break; }
}
}
}
CodingMethod tokenDflt = null;
if (LCoded != 0 && tokenCoding == fitTokenCoding(fVlen, L)) {
// A simple L value is enough to recover the tokenCoding.
tokenDflt = tokenCoding;
}
int FDef = (favoredCoding == dflt)?1:0;
int UDef = (unfavoredCoding == dflt || unfavoredCoding == null)?1:0;
int TDef = (tokenCoding == tokenDflt)?1:0;
int TDefL = (TDef == 1) ? LCoded : 0;
assert(TDef == ((TDefL>0)?1:0));
ByteArrayOutputStream bytes = new ByteArrayOutputStream(10);
bytes.write(_meta_pop + FDef + 2*UDef + 4*TDefL);
try {
if (FDef == 0) bytes.write(favoredCoding.getMetaCoding(dflt));
if (TDef == 0) bytes.write(tokenCoding.getMetaCoding(dflt));
if (UDef == 0) bytes.write(unfavoredCoding.getMetaCoding(dflt));
} catch (IOException ee) {
throw new RuntimeException(ee);
}
return bytes.toByteArray();
}
public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) {
int op = bytes[pos++] & 0xFF;
if (op < _meta_pop || op >= _meta_limit) return pos-1; // backup
op -= _meta_pop;
int FDef = op % 2;
int UDef = (op / 2) % 2;
int TDefL = (op / 4);
int TDef = (TDefL > 0)?1:0;
int L = LValuesCoded[TDefL];
CodingMethod[] FCode = {dflt}, TCode = {null}, UCode = {dflt};
if (FDef == 0)
pos = BandStructure.parseMetaCoding(bytes, pos, dflt, FCode);
if (TDef == 0)
pos = BandStructure.parseMetaCoding(bytes, pos, dflt, TCode);
if (UDef == 0)
pos = BandStructure.parseMetaCoding(bytes, pos, dflt, UCode);
PopulationCoding pop = new PopulationCoding();
pop.L = L; // might be -1
pop.favoredCoding = FCode[0];
pop.tokenCoding = TCode[0]; // might be null!
pop.unfavoredCoding = UCode[0];
res[0] = pop;
return pos;
}
private String keyString(CodingMethod m) {
if (m instanceof Coding)
return ((Coding)m).keyString();
if (m == null)
return "none";
return m.toString();
}
public String toString() {
PropMap p200 = Utils.currentPropMap();
boolean verbose
= (p200 != null &&
p200.getBoolean(Utils.COM_PREFIX+"verbose.pop"));
StringBuilder res = new StringBuilder(100);
res.append("pop(").append("fVlen=").append(fVlen);
if (verbose && fValues != null) {
res.append(" fV=[");
for (int i = 1; i <= fVlen; i++) {
res.append(i==1?"":",").append(fValues[i]);
}
res.append(";").append(computeSentinelValue());
res.append("]");
}
res.append(" fc=").append(keyString(favoredCoding));
res.append(" tc=").append(keyString(tokenCoding));
res.append(" uc=").append(keyString(unfavoredCoding));
res.append(")");
return res.toString();
}
}